├── .eslintignore ├── .eslintrc.json ├── .gitattributes ├── .github └── workflows │ └── test.yml ├── .gitignore ├── .jsdoc.conf.js ├── LICENSE ├── README.md ├── package-lock.json ├── package.json ├── rollup.config.js ├── scripts ├── lib │ └── tsdConvertTupleArrays.js └── tsd-postprocess.js ├── src ├── MatchingContext.js ├── ReplacementStringBuilder.js ├── UnionReplacer.js ├── UnionReplacerElement.js ├── typedefs-tscompat.js └── typedefs.js ├── test ├── .eslintrc.json ├── jasmine.json ├── markdown-doc-behavior.js ├── matching-context.spec.js ├── readme.spec.js ├── union-replacer-matching.spec.js └── union-replacer.spec.js └── types ├── index.d.ts ├── test.ts ├── tsconfig.json └── tslint.json /.eslintignore: -------------------------------------------------------------------------------- 1 | dist 2 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["airbnb-base", "plugin:jsdoc/recommended"], 3 | "plugins": ["jsdoc"], 4 | "rules": { 5 | "no-plusplus": "off", 6 | "no-cond-assign": ["error", "except-parens"], 7 | "no-unused-expressions": ["error", { "allowShortCircuit": true }], 8 | "jsdoc/require-jsdoc": ["warn", { 9 | "publicOnly": true 10 | }], 11 | "jsdoc/no-undefined-types": ["warn", { 12 | "definedTypes": [ 13 | "UnionReplacer", 14 | "UnionReplacer.ReplacementBuilder", 15 | "RegExpExecArray", 16 | "true", 17 | "false", 18 | "T" 19 | ] 20 | }], 21 | "jsdoc/check-tag-names": ["warn", { 22 | "definedTags": ["template"] 23 | }], 24 | "jsdoc/check-examples": 1, 25 | "jsdoc/check-syntax": 1, 26 | "jsdoc/match-description": 1, 27 | "jsdoc/require-description": ["warn", { 28 | "exemptedBy": ["inheritdoc", "private", "deprecated", "hideconstructor"] 29 | }], 30 | "jsdoc/require-description-complete-sentence": 1, 31 | "jsdoc/require-param": ["warn", { 32 | "exemptedBy": ["inheritdoc", "hideconstructor"] 33 | }], 34 | "jsdoc/require-example": ["warn", { 35 | "contexts" : [ 36 | "ClassDeclaration[id.name='UnionReplacer'] > ClassBody > MethodDefinition" 37 | ], 38 | "exemptedBy": ["inheritdoc", "private", "deprecated"] 39 | }], 40 | "jsdoc/require-hyphen-before-param-description": 1 41 | }, 42 | "settings": { 43 | "jsdoc": { 44 | "mode": "jsdoc", 45 | "tagNamePreference": { 46 | "function": "method" 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test-matrix 2 | on: push 3 | jobs: 4 | build: 5 | runs-on: ${{ matrix.os }} 6 | strategy: 7 | matrix: 8 | os: [ macos-latest, ubuntu-latest, windows-latest ] 9 | steps: 10 | - uses: actions/checkout@v1 11 | - uses: actions/setup-node@v1 12 | - run: npm install 13 | - run: npm run build --if-present 14 | - run: npm test 15 | env: 16 | CI: true 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Runtime data 9 | pids 10 | *.pid 11 | *.seed 12 | *.pid.lock 13 | 14 | # Directory for instrumented libs generated by jscoverage/JSCover 15 | lib-cov 16 | 17 | # Coverage directory used by tools like istanbul 18 | coverage 19 | 20 | # nyc test coverage 21 | .nyc_output 22 | 23 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 24 | .grunt 25 | 26 | # Bower dependency directory (https://bower.io/) 27 | bower_components 28 | 29 | # node-waf configuration 30 | .lock-wscript 31 | 32 | # Compiled binary addons (https://nodejs.org/api/addons.html) 33 | build/Release 34 | 35 | # Dependency directories 36 | node_modules/ 37 | jspm_packages/ 38 | 39 | # TypeScript v1 declaration files 40 | typings/ 41 | 42 | # Optional npm cache directory 43 | .npm 44 | 45 | # Optional eslint cache 46 | .eslintcache 47 | 48 | # Optional REPL history 49 | .node_repl_history 50 | 51 | # Output of 'npm pack' 52 | *.tgz 53 | 54 | # Yarn Integrity file 55 | .yarn-integrity 56 | 57 | # dotenv environment variables file 58 | .env 59 | 60 | # next.js build output 61 | .next 62 | 63 | # Build directory 64 | dist 65 | -------------------------------------------------------------------------------- /.jsdoc.conf.js: -------------------------------------------------------------------------------- 1 | const template = env.opts.template || ''; 2 | const targetTs = template.includes('tsd-jsdoc'); 3 | 4 | const sourceIncludes = [ 5 | 'src/UnionReplacer.js', 6 | 'src/MatchingContext.js', 7 | 'src/typedefs.js', 8 | ]; 9 | 10 | if (!targetTs) { 11 | sourceIncludes.push('src/typedefs-tscompat.js'); 12 | } 13 | 14 | module.exports = { 15 | source: { include: sourceIncludes }, 16 | plugins: [ 17 | './node_modules/tsd-jsdoc/dist/plugin', 18 | 'plugins/markdown', 19 | ], 20 | opts: { 21 | package: 'package.json', 22 | readme: 'README.md', 23 | }, 24 | }; 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Orchitech Solutions, s.r.o. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UnionReplacer 2 | 3 |

4 | test-matrix 5 |

6 | 7 | UnionReplacer provides one-pass global search and replace functionality 8 | using multiple regular expressions and corresponging replacements. 9 | Otherwise the behavior matches `String.prototype.replace(regexp, newSubstr|function)`. 10 | 11 | ## Outline 12 | 13 | ### Installation and usage 14 | 15 | In browsers: 16 | ```html 17 | '; 80 | console.log(htmlEscaper.replace(toBeHtmlEscaped)); 81 | ``` 82 | Output: 83 | ``` 84 | <script>alert("inject & control")</script> 85 | ``` 86 | 87 | ### Simple Markdown highlighter 88 | 89 | Highlighting Markdown special characters while preserving code blocks and spans. 90 | Only a subset of Markdown syntax is supported for simplicity. 91 | ~~~js 92 | const mdHighlighter = new UnionReplacer([ 93 | 94 | // opening fence = at least three backticks 95 | // closing fence = opening fence or longer 96 | // regexp backreferences are ideal to match this 97 | [/^(`{3,}).*\n([\s\S]*?)(^\1`*\s*?$|\Z)/, (match, fence1, pre, fence2) => { 98 | let block = `${fence1}
\n` 99 | block += `
${htmlEscaper.replace(pre)}

\n` 100 | block += `${fence2}` 101 | return block; 102 | }], 103 | 104 | // Code spans are delimited by two same-length backtick strings. 105 | // Note that backreferences within the regexp are numbered as usual, 106 | // i.e. \1 still means first capturing group. 107 | // Union replacer renumbers them when composing the final internal regexp. 108 | [/(^|[^`])(`+)(?!`)(.*?[^`]\2)(?!`)/, (match, lead, delim, code) => { 109 | return `${htmlEscaper.replace(lead)}${htmlEscaper.replace(code)}` 110 | }], 111 | 112 | // Subsequent replaces are performed only outside code blocks and spans. 113 | [/[*~=+_-`]+/, '$&'], 114 | [/\n/, '
\n'] 115 | 116 | // HTML entity-like strings would be interpreted too 117 | ].concat(htmlEscapes)); 118 | 119 | const toBeMarkdownHighlighted = '\ 120 | **Markdown** code to be "highlighted"\n\ 121 | with special care to fenced code blocks:\n\ 122 | ````\n\ 123 | _Markdown_ within fenced code blocks is not *processed*:\n\ 124 | ```\n\ 125 | Even embedded "fence strings" work well with **UnionEscaper**\n\ 126 | ```\n\ 127 | ````\n\ 128 | *CommonMark is sweet & cool.*'; 129 | 130 | console.log(mdHighlighter.replace(toBeMarkdownHighlighted)); 131 | ~~~ 132 | Produces: 133 | ~~~ 134 | **Markdown** code to be "highlighted"
135 | with special care to fenced code blocks:
136 | ````
137 |
_Markdown_ within fenced code blocks is not *processed*:
138 | ```
139 | Even embedded "fence strings" work well with **UnionEscaper**
140 | ```
141 | 

142 | ````
143 | *CommonMark is sweet & cool.* 144 | ~~~ 145 | 146 | ### Conservative markdown escaping 147 | 148 | The code below escapes text, so that special Markdown sequences are 149 | protected from interpreting. Two considerations are applied: 150 | 1. Avoid messing the output with too many unnecessary escapings. 151 | 2. GFM autolinks are a special case, as escaping the special chars in them 152 | would cripple the result of rendering. We need to detect them and keep 153 | them untouched. 154 | 155 | ```js 156 | const mdEscaper = new UnionReplacer([ 157 | 158 | // Keep urls untouched (simplified for demonstration purposes). 159 | // The same should apply for GFM email autolinks. 160 | [/\bhttps?:\/\/(?!\.)(?:\.?[\w-]+)+(?:[^\s<]*?)(?=[?!.,:*~]*(?:\s|$))/, '$&'], 161 | 162 | // global backslash escapes 163 | [/[\\*_[\]`&<>]/, '\\$&'], 164 | 165 | // backslash-escape at line start 166 | [/^(?:~~~|=+)/, '\\$&'], 167 | 168 | // strike-through w/o lookbehinds 169 | [/~+/, m => m.length == 2 ? `\\${m}` : m], 170 | 171 | // backslash-escape at line start if followed by space 172 | [/^(?:[-+]|#{1,6})(?=\s)/, '\\$&'], 173 | 174 | // backslash-escape the dot to supress ordered list 175 | [/^(\d+)\.(?=\s)/, '$1\\. '] 176 | ]); 177 | 178 | const toBeMarkdownEscaped = '\ 179 | A five-*starred* escaper:\n\ 180 | 1. Would preserve _underscored_ in the http://example.com/_underscored_/ URL.\n\ 181 | 2. Would also preserve backspaces (\\) in http://example.com/\\_underscored\\_/.'; 182 | 183 | console.log(mdEscaper.replace(toBeMarkdownEscaped)); 184 | ``` 185 | Produces: 186 | ``` 187 | A five-\*starred\* escaper: 188 | 1\. Would preserve \_underscored\_ in the http://example.com/_underscored_/ URL. 189 | 2\. Would also preserve backspaces (\\) in http://example.com/\_underscored\_/. 190 | ``` 191 | 192 | ## Background 193 | 194 | The library has been created to support complex text processing in situations 195 | when certain configurability is desired. 196 | The initial need occured when using the [Turndown](https://github.com/domchristie/turndown) 197 | project. It is a an excellent and flexible tool, but we faced several hard-to-solve 198 | difficulties with escaping special sequences. 199 | 200 | ### Without `UnionReplacer` 201 | 202 | When text processing with several patterns is required, there are two approaches: 203 | 1. Iterative processing of the full text, such as 204 | ```js 205 | // No UnionEscaper 206 | return unsafe 207 | .replace(/&/g, '&') 208 | .replace(//g, '>') 210 | .replace(/"/g, '"') 211 | ``` 212 | The issue is not only the performance. Since the subsequent replacements are 213 | performed on a partially-processed result, the developer has to ensure that 214 | no intermediate steps affect the processing. E.g.: 215 | ```js 216 | // No UnionEscaper 217 | return 'a "tricky" task' 218 | .replace(/"/g, '"') 219 | .replace(/&/g, '&') 220 | // desired: 'a "tricky" task' 221 | // actual: 'a &quot;tricky&quot; task' 222 | ``` 223 | So _'a "tricky" task'_ became _'a &quot;tricky&quot; task'_. This 224 | particular task is manageable with carefuly choosing the processing order. 225 | But when the processing is context-dependent, iterative processing becomes 226 | impossible. 227 | 2. One-pass processing using regexp with alternations, which is correct, but 228 | it might easily become overly complex, hard to read and hard to manage. As 229 | one can see, the result seems pretty static and very fragile in terms of 230 | keeping track of all the individual capture groups: 231 | ```js 232 | // No UnionEscaper 233 | const mdHighlightRe = /(^(`{3,}).*\n([\s\S]*?)(^\2`*\s*?$|\Z))|((^|[^`])(`+)(?!`)(.*?[^`]\7)(?!`))|([*~=+_-`]+)|(\n)|(<)|(>)|(")|(&)/gm 234 | return md.replace(mdHighlightRe, 235 | (match, fenced, fence1, pre, fence2, codespan, lead, delim, code, special, nl, lt, gt, quot, amp) => { 236 | if (fenced) { 237 | let block = `${fence1}
\n` 238 | block += `
${htmlEscaper.replace(pre)}

\n` 239 | block += `${fence2}` 240 | return block; 241 | } else if (codespan) { 242 | return `${myHtmlEscape(lead)}${myHtmlEscape.replace(code)}` 243 | } else if (special) { 244 | return `${special}` 245 | } else if (nl) { 246 | return '
\n' 247 | } // else etc. 248 | }); 249 | ``` 250 | 251 | ### Introducing `UnionReplacer` 252 | 253 | Iterative processing is simple and well-readable, though it is very limited. 254 | Developers are often trading simplicity for bugs. 255 | 256 | While regexp with alternations is the way to go, we wanted to provide an easy 257 | way to build it, use it and even allow its variable composition in runtime. 258 | 259 | Instead of using a single long regular regexp, developers can use an array 260 | of individual smaller regexps, which will be merged together by the 261 | `UnionReplacer` class. Its usage is as simple as in the iterative processing 262 | approach. 263 | 264 | ## Features 265 | 266 | - Fast. The processing is one-pass and native regexps are used. There might 267 | be a tiny resource penalty when initially constructing the internal 268 | compound regexp. 269 | - Supports regexp backreferences. Backreferences in the compound regexp are 270 | renumbered, so that the user does not have to care about it. 271 | - Supports also ES2018 named capture group. See limitations. 272 | - You can reuse everything used with `String.prototype.replace()`, namely: 273 | - String replacements work the very same. 274 | - Function replacements work the same with just a subtle difference for 275 | ES2018 named capture groups. 276 | - Standard regexp alternation semantics. The first replace that matches 277 | consumes the match from input, no matter how long the match is. An example 278 | follows. 279 | 280 | ### Alternation semantics 281 | 282 | ```js 283 | // The order of replaces is important 284 | const replacer1 = new UnionReplacer([ 285 | [/foo/, '(FOO)'], // when foo is matched, subsequent parts are not examined 286 | [/.+/, '(nonfoo)'] // no mather that this also matches foo 287 | ]); 288 | // replacer1 still eats the rest of the inputwhen foo is not matched 289 | const replacer2 = new UnionReplacer([ 290 | [/foo/, '(FOO)'], 291 | [/.+?(?=foo|$)/, '(nonfoo)'] // non-greedy match up to next foo or line end 292 | ]); 293 | const text = 'foobarfoobaz' 294 | replacer1.replace(text); // (FOO)(nonfoo) 295 | replacer2.replace(text); // (FOO)(nonfoo)(FOO)(nonfoo) 296 | ``` 297 | 298 | ### Performance 299 | 300 | Most important, the code was written with performance in mind. 301 | 302 | In runtime, `UnionReplacer` performs one-pass processing driven by 303 | a single native regexp. 304 | The replacements are always done as an arrow function internally, even for 305 | string replacements. The eventual performance impact of this would be 306 | engine-dependent. 307 | 308 | Feel free to benchmark the library and please share the results. 309 | 310 | ## Limitations 311 | 312 | ### Named capture groups 313 | 314 | ES2018 named capture groups work with the following limitations: 315 | - Replacement functions are always provided with all the named captures, i.e. not limited to the matched rule. 316 | - Capture group names must be unique amongst all capture rules. 317 | 318 | ### Octal escapes 319 | 320 | Not supported. The syntax is the same as backreferences (`\1`) and 321 | their interpretation is input-dependent even in native regexps. 322 | It is better to avoid them completely and use hex escapes instead (`\xNN`). 323 | 324 | ### Regexp flags 325 | 326 | Any flags in paticular search regexps are ignored. 327 | The resulting replacement has always the flags from constructor call, 328 | which defaults to global (`g`) and multiline (`m`). 329 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "union-replacer", 3 | "version": "2.0.1", 4 | "description": "One-pass String.prototype.replace-like processor with multiple regexps and replacements", 5 | "main": "dist/union-replacer.cjs.js", 6 | "module": "dist/union-replacer.esm.js", 7 | "browser": "dist/union-replacer.umd.js", 8 | "types": "types", 9 | "scripts": { 10 | "lint": "eslint . && dtslint types", 11 | "build": "rollup -c", 12 | "postbuild": "npm run update-types", 13 | "pretest": "npm run build && npm run lint", 14 | "test": "jasmine JASMINE_CONFIG_PATH=test/jasmine.json", 15 | "prepare": "npm test", 16 | "update-types": "jsdoc -d types -t node_modules/tsd-jsdoc/dist -c .jsdoc.conf.js", 17 | "postupdate-types": "node scripts/tsd-postprocess.js", 18 | "docs": "jsdoc -d dist/docs -c .jsdoc.conf.js", 19 | "version": "npm run build && git add types/index.d.ts" 20 | }, 21 | "repository": { 22 | "type": "git", 23 | "url": "git+https://github.com/orchitech/union-replacer.git" 24 | }, 25 | "devDependencies": { 26 | "@babel/core": "^7.11.6", 27 | "@babel/plugin-transform-runtime": "^7.11.5", 28 | "@babel/preset-env": "^7.11.5", 29 | "dtslint": "^4.0.4", 30 | "eslint": "^7.10.0", 31 | "eslint-config-airbnb-base": "^14.2.0", 32 | "eslint-plugin-import": "^2.22.1", 33 | "eslint-plugin-jsdoc": "^30.6.1", 34 | "jasmine": "^3.6.1", 35 | "jasmine-diff": "^0.1.3", 36 | "jsdoc": "^3.6.6", 37 | "rollup": "^2.28.2", 38 | "rollup-plugin-babel": "^4.4.0", 39 | "rollup-plugin-node-resolve": "^5.2.0", 40 | "tsd-jsdoc": "^2.5.0" 41 | }, 42 | "keywords": [ 43 | "regexp", 44 | "replace", 45 | "union", 46 | "alternation" 47 | ], 48 | "files": [ 49 | "dist", 50 | "types/index.d.ts" 51 | ], 52 | "author": "Martin Cizek ", 53 | "license": "MIT", 54 | "bugs": { 55 | "url": "https://github.com/orchitech/union-replacer/issues" 56 | }, 57 | "homepage": "https://github.com/orchitech/union-replacer#readme" 58 | } 59 | -------------------------------------------------------------------------------- /rollup.config.js: -------------------------------------------------------------------------------- 1 | import resolve from 'rollup-plugin-node-resolve'; 2 | import babel from 'rollup-plugin-babel'; 3 | import pkg from './package.json'; 4 | 5 | export default [ 6 | { 7 | input: 'src/UnionReplacer.js', 8 | plugins: [ 9 | resolve(), 10 | babel({ 11 | exclude: 'node_modules/**', 12 | comments: false, 13 | presets: [ 14 | [ 15 | '@babel/env', { 16 | modules: false, 17 | targets: { 18 | browsers: '> 1%, IE 11, not op_mini all, not dead', 19 | node: 8, 20 | esmodules: false, 21 | }, 22 | useBuiltIns: false, 23 | loose: true, 24 | }, 25 | ], 26 | ], 27 | }), 28 | ], 29 | output: [ 30 | { file: pkg.main, format: 'cjs' }, 31 | { file: pkg.module, format: 'es' }, 32 | { file: pkg.browser, name: 'UnionReplacer', format: 'umd' }, 33 | ], 34 | }, 35 | ]; 36 | -------------------------------------------------------------------------------- /scripts/lib/tsdConvertTupleArrays.js: -------------------------------------------------------------------------------- 1 | // While TypesSript compiler might be a tool-of-choice for this task, 2 | // we've indeed decided to let UnionReplacer show off its power. 3 | 4 | const UnionReplacer = require('../../dist/union-replacer.cjs'); 5 | 6 | const TUPLE_ARRAY_TYPE_RE = /^([ \t]*(?:declare\s+)?type\s+(\w+)\s*=\s*)\(([^;\r\n]*)\)\[\];/gm; 7 | const OP_BRACKETS = ['[', '{', '<', '(']; 8 | const CL_BRACKETS = [']', '}', '>', ')']; 9 | const BRACKET_MAP = new Map( 10 | CL_BRACKETS.map((bracket, index) => [bracket, OP_BRACKETS[index]]), 11 | ); 12 | const [OP_BRACKETS_RE, CL_BRACKETS_RE] = [OP_BRACKETS, CL_BRACKETS].map( 13 | (list) => new RegExp(`[${list.map((br) => `\\${br}`).join('')}]`), 14 | ); 15 | const unionToListReplacer = new UnionReplacer([ 16 | [/=>/, '$&'], 17 | [OP_BRACKETS_RE, function opBracket(m) { this.open(m); return m; }], 18 | [CL_BRACKETS_RE, function clBracket(m) { this.close(m); return m; }], 19 | [/\s*\|\s*/, function separator(m) { return this.atRoot() ? ', ' : m; }], 20 | ]); 21 | 22 | /* eslint-disable lines-between-class-members */ 23 | class UnionToListConverter { 24 | constructor() { this.nestLevels = {}; } 25 | open(bracket) { this.nestLevels[bracket] += 1; } 26 | close(bracket) { this.nestLevels[BRACKET_MAP[bracket]] -= 1; } 27 | atRoot() { return Object.values(this.nestLevels).every((count) => count === 0); } 28 | convert(unionTypeDef) { 29 | OP_BRACKETS.forEach((bracket) => { this.nestLevels[bracket] = 0; }); 30 | return unionToListReplacer.replace(unionTypeDef, this); 31 | } 32 | } 33 | 34 | /** 35 | * Convert tuple-like arrays to tuples. 36 | * 37 | * @param {string} tsd - Typescript declarations to perform conversion on. 38 | * @param {RegExp} nameRe - Pattern determining tuple type names to convert. 39 | * @returns {string} Converted typescript declarations. 40 | */ 41 | function tsdConvertTupleArrays(tsd, nameRe) { 42 | const unionToListConverter = new UnionToListConverter(); 43 | return tsd.replace(TUPLE_ARRAY_TYPE_RE, (m, declarator, name, unionTypeDef) => { 44 | if (!nameRe.test(name)) { 45 | return m; 46 | } 47 | const typeList = unionToListConverter.convert(unionTypeDef); 48 | if (!unionToListConverter.atRoot()) { 49 | throw new SyntaxError(`Unbalanced brackets in union type definition '${unionTypeDef}'`); 50 | } 51 | return `${declarator}[${typeList}];`; 52 | }); 53 | } 54 | 55 | module.exports = tsdConvertTupleArrays; 56 | -------------------------------------------------------------------------------- /scripts/tsd-postprocess.js: -------------------------------------------------------------------------------- 1 | // Postprocess generated type definitions: 2 | // - introduce correctly typed tuples 3 | // - add header and correct whitespace 4 | // - rename to index.d.ts 5 | 6 | const fs = require('fs'); 7 | const path = require('path'); 8 | const pkg = require('../package.json'); 9 | const UnionReplacer = require('../dist/union-replacer.cjs'); 10 | const tsdConvertTupleArrays = require('./lib/tsdConvertTupleArrays'); 11 | 12 | const origFileName = `types/${pkg.name}.d.ts`; 13 | const tsd = fs.readFileSync(origFileName, 'utf8'); 14 | const converted = tsdConvertTupleArrays(tsd, /^ReplaceWith/).replace(/\s*$/, '\n'); 15 | 16 | const intro = [ 17 | `// Generated type definitions for ${pkg.name} ${pkg.version}`, 18 | `// File created by tsd-jsdoc and ${path.relative(process.cwd(), __filename)}.`, 19 | '// Do not modify directly.', 20 | '', 21 | `export = ${UnionReplacer.name};`, 22 | `export as namespace ${UnionReplacer.name};`, 23 | ]; 24 | const output = `${intro.join('\n')}\n\n${converted}`; 25 | fs.writeFileSync('types/index.d.ts', output, 'utf8'); 26 | fs.unlinkSync(origFileName); 27 | -------------------------------------------------------------------------------- /src/MatchingContext.js: -------------------------------------------------------------------------------- 1 | const emptyMatchAdvance = (input, index, unicode) => { 2 | if (!unicode || index < 0 || index + 1 >= input.length) { 3 | return 1; 4 | } 5 | const c1 = input.charCodeAt(index); 6 | if (c1 < 0xD800 || c1 > 0xDBFF) { 7 | return 1; 8 | } 9 | const c2 = input.charCodeAt(index + 1); 10 | return c2 < 0xDC00 || c2 > 0xDFFF ? 1 : 2; 11 | }; 12 | 13 | /** 14 | * Encapsulation of matcher variables. 15 | * 16 | * @interface 17 | * @memberof UnionReplacer 18 | */ 19 | class MatchingContext { 20 | /** 21 | * @interface 22 | * @hideconstructor 23 | */ 24 | constructor(replacer) { 25 | /** 26 | * The {@link UnionReplacer} instance being used. 27 | * 28 | * @name UnionReplacer.MatchingContext#replacer 29 | * @type {UnionReplacer} 30 | * @readonly 31 | */ 32 | this.replacer = replacer; 33 | /** 34 | * Last match, as returned by {@link RegExp#exec}. 35 | * 36 | * @name UnionReplacer.MatchingContext#match 37 | * @type {RegExpExecArray|null} 38 | * @readonly 39 | * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec#Return_value 40 | */ 41 | this.match = null; 42 | /** @private */ 43 | this.lastIndex = 0; 44 | } 45 | 46 | /** 47 | * Advance matching position `n` characters after the match end position. 48 | * 49 | * @param {number} n - Number of characters to skip. Zero and negative values 50 | * are valid, but introduce risk of infinite processing. It is then user 51 | * responsibility to prevent it. 52 | */ 53 | skip(n) { 54 | this.lastIndex = this.match.index + this.match[0].length + n; 55 | } 56 | 57 | /** 58 | * Set matching position to `n` characters from match start. 59 | * 60 | * @param {number} n - Number of characters jump. Values less than or equal 61 | * to match length are valid, but introduce risk of infinite processing. 62 | * It is then user responsibility to prevent it. 63 | */ 64 | jump(n) { 65 | this.lastIndex = this.match.index + n; 66 | } 67 | 68 | /** 69 | * Reset matching position according to standard regexp match position advancing. 70 | */ 71 | reset() { 72 | const { index } = this.match; 73 | const mlen = this.match[0].length; 74 | this.lastIndex = index + (mlen > 0 75 | ? mlen 76 | : emptyMatchAdvance(this.match.input, index, this.replacer.regexp.unicode)); 77 | } 78 | 79 | /** 80 | * Determine whether the current match is at the input start. 81 | * 82 | * @returns {boolean} `true` if current match is at input start, `false` otherwise. 83 | */ 84 | atStart() { 85 | return this.match && this.match.index === 0; 86 | } 87 | 88 | /** 89 | * Determine whether the current match is at the input end. 90 | * 91 | * @returns {boolean} `true` if current match is at input end, `false` otherwise. 92 | */ 93 | atEnd() { 94 | const { match } = this; 95 | return match && (match.index + match[0].length >= match.input.length); 96 | } 97 | } 98 | 99 | export default MatchingContext; 100 | -------------------------------------------------------------------------------- /src/ReplacementStringBuilder.js: -------------------------------------------------------------------------------- 1 | /** 2 | * String processing builder that builds a string output in the same way 3 | * how String.prototype.replace implementation does it. 4 | * 5 | * @implements {UnionReplacer.ReplacementBuilder} 6 | * @private 7 | */ 8 | class ReplacementStringBuilder { 9 | constructor() { 10 | this.output = ''; 11 | } 12 | 13 | addSubjectSlice(subject, start, end) { 14 | this.output += subject.slice(start, end); 15 | } 16 | 17 | addReplacedString(string) { 18 | this.output += string; 19 | } 20 | 21 | build() { 22 | return this.output; 23 | } 24 | } 25 | 26 | export default ReplacementStringBuilder; 27 | -------------------------------------------------------------------------------- /src/UnionReplacer.js: -------------------------------------------------------------------------------- 1 | /*! UnionReplacer 1.0 | orchi.tech | (c) 2019 Orchitech Solutions, s.r.o. | MIT License */ 2 | 3 | /** 4 | * UnionReplacer provides a simple and easy-to-use alternative for more complex lexers. 5 | * From the user perspective, it's just a natural extension of {@link String#replace} 6 | * functionality. The processing is driven by a single native regular expression 7 | * combined from user-supplied patterns, which makes it very efficient. 8 | */ 9 | 10 | import UnionReplacerElement from './UnionReplacerElement'; 11 | import ReplacementStringBuilder from './ReplacementStringBuilder'; 12 | import MatchingContext from './MatchingContext'; 13 | 14 | // Sum number of capture groups within the provided elements 15 | const countCaptureGroups = (elements) => elements 16 | .reduce((num, element) => num + element.captureCount, 0); 17 | 18 | // Performance-critical 19 | const findMatchingElementEs6 = (elements, fullMatch) => elements 20 | .find((element) => fullMatch[element.captureNum] !== undefined); 21 | // ...but avoid polyfill 22 | const findMatchingElementEs5 = (elements, fullMatch) => { 23 | for (let i = 0; i < elements.length; i++) { 24 | const element = elements[i]; 25 | if (fullMatch[element.captureNum] !== undefined) { 26 | return element; 27 | } 28 | } 29 | return undefined; 30 | }; 31 | const findMatchingElement = Array.prototype.find 32 | ? findMatchingElementEs6 33 | : findMatchingElementEs5; 34 | 35 | function compile() { 36 | this.totalCaptureGroups = countCaptureGroups(this.elements); 37 | const regexpStr = this.elements.length > 0 38 | ? this.elements.map((element) => element.capturePatternStr).join('|') 39 | : '^[^\\s\\S]'; 40 | this.regexp = new RegExp(regexpStr, this.flags); 41 | } 42 | 43 | /** 44 | * Class encapsulating several {@link String#replace}-like replacements 45 | * combined into a single one-pass text processor. 46 | */ 47 | class UnionReplacer { 48 | /** 49 | * Create a UnionReplacer instance performing the specified replaces. 50 | * 51 | * @param {Array} replaces - Replaces to be performed 52 | * specified as an array of {@link UnionReplacer.ReplaceTuple} array tuples. 53 | * The order of elements in `replaces` is important: if any pattern is matched, 54 | * the corresponding amount of input is consumed and subsequent patterns 55 | * will not match on such part of the input. 56 | * @param {string} [flags=gm] - Flags for replacement, defaults to 'gm'. 57 | * @throws {SyntaxError} Invalid regular expression pattern encountered. This 58 | * currently occurs when named capture groups of the same name are supplied 59 | * in different replacement patterns. 60 | * @throws {SyntaxError} Octal escapes are not allowed in patterns. 61 | * @see {@link https://github.com/orchitech/union-replacer/blob/master/README.md#alternation-semantics|Alternation semantics} 62 | * @example replacer = new UnionReplacer([[/\$foo\b/, 'bar'], [/\\(.)/, '$1']]); 63 | * @example 64 | * // Simple URI encoder 65 | * replacer = new UnionReplacer([ 66 | * [/ /, '+'], 67 | * [/[^\w.,-]/, (m) => `%${m.charCodeAt(0).toString(16)}`], 68 | * ]); 69 | * @example replacer = new UnionReplacer([[/\$foo\b/, 'bar'], [/\\(.)/, '$1']], 'gi'); 70 | * @see RegExp#flags 71 | */ 72 | constructor(replaces, flags = 'gm') { 73 | /** 74 | * @readonly 75 | * @type {string} 76 | */ 77 | this.flags = flags; 78 | /** @private */ 79 | this.elements = []; 80 | replaces.forEach((replace) => { 81 | const element = new UnionReplacerElement(...replace); 82 | element.compile(countCaptureGroups(this.elements) + 1); 83 | this.elements.push(element); 84 | }); 85 | compile.call(this); 86 | } 87 | 88 | /** 89 | * Build the underlying combined regular expression. This method has no effect 90 | * since v2.0, as the builder-like functionality has been removed and underlying 91 | * data structures are prepared in the constructor. 92 | * 93 | * @deprecated Since v2.0. 94 | */ 95 | // eslint-disable-next-line class-methods-use-this 96 | compile() { 97 | } 98 | 99 | /** 100 | * Perform search and replace with the combined patterns and use corresponding 101 | * replacements for the particularly matched patterns. 102 | * 103 | * @method UnionReplacer#replace 104 | * @variation 1 105 | * @param {string} subject - Input to search and process. 106 | * @param {object} [userCtx={}] - User-provided context to be passed as `this` 107 | * when calling replacement functions and as a parameter of the builder calls. 108 | * @returns {string} New string with the matches replaced. Or any type when a 109 | * custom builder is provided. 110 | */ 111 | 112 | /** 113 | * Perform search and replace with the combined patterns and use corresponding 114 | * replacements for the particularly matched patterns. Pass the resulting chunks 115 | * to an user-provided {@link UnionReplacer.ReplacementBuilder} instead of 116 | * concatenating them into one string. 117 | * 118 | * @variation 2 119 | * @template T 120 | * @param {string} subject - Input to search and process. 121 | * @param {object} userCtx - User-provided context to be passed as `this` when 122 | * calling replacement functions and as a parameter of the builder calls. 123 | * @param {UnionReplacer.ReplacementBuilder} builder - Collects and builds 124 | * the result from unmatched subject slices and replaced matches. A custom 125 | * builder allows for creating arbitrary structures based on matching or 126 | * streaming these chunks without building any output. 127 | * @returns {T} Result built by the builder. 128 | * @example replacer.replace('foo'); 129 | */ 130 | replace(subject, userCtx = {}, builder = new ReplacementStringBuilder()) { 131 | const ctx = new MatchingContext(this); 132 | // Allow for reentrancy 133 | const savedLastIndex = this.regexp.lastIndex; 134 | try { 135 | this.regexp.lastIndex = 0; 136 | let prevLastIndex = 0; 137 | while ((ctx.match = this.regexp.exec(subject)) !== null) { 138 | const element = findMatchingElement(this.elements, ctx.match); 139 | element.narrowMatch(ctx, this.totalCaptureGroups); 140 | ctx.reset(); 141 | builder.addSubjectSlice(subject, prevLastIndex, ctx.match.index, ctx, userCtx); 142 | const replaced = element.replacementFn.call(userCtx, ctx); 143 | builder.addReplacedString(replaced, ctx, userCtx); 144 | prevLastIndex = Math.min(ctx.match.index + ctx.match[0].length, ctx.lastIndex); 145 | // Also would solve eventual reentrant calls, but needed anyway 146 | this.regexp.lastIndex = ctx.lastIndex; 147 | if (!this.regexp.global) { 148 | break; 149 | } 150 | } 151 | builder.addSubjectSlice(subject, prevLastIndex, subject.length, ctx, userCtx); 152 | return builder.build(); 153 | } finally { 154 | this.regexp.lastIndex = savedLastIndex; 155 | } 156 | } 157 | } 158 | 159 | export default UnionReplacer; 160 | -------------------------------------------------------------------------------- /src/UnionReplacerElement.js: -------------------------------------------------------------------------------- 1 | const createStringReplacer = (replacementStr) => function stringReplacer(ctx) { 2 | const m = ctx.match; 3 | const groups = m.groups || {}; 4 | return replacementStr.replace(/\$([1-9]\d*)|\$([&`'$])|\$<([^\d\s>][^\s>]*)>/g, 5 | (s, capture, special, namedCapture) => { 6 | if (capture && +capture < m.length) { 7 | return m[+capture]; 8 | } 9 | if (special) { 10 | switch (special) { 11 | case '$': return '$'; 12 | case '&': return m[0]; 13 | case '`': return m.input.slice(0, m.index); 14 | case "'": return m.input.slice(m.index + m[0].length); 15 | default: throw new Error(); 16 | } 17 | } 18 | if (namedCapture && namedCapture in groups) { 19 | return groups[namedCapture]; 20 | } 21 | return s; 22 | }); 23 | }; 24 | 25 | const wrapStringReplaceFn = (replacementFn) => function callStringReplaceFn(ctx) { 26 | const m = ctx.match; 27 | const info = m.groups ? [m.index, m.input, m.groups] : [m.index, m.input]; 28 | return replacementFn.apply(this, [...m, ...info]); 29 | }; 30 | 31 | /** 32 | * Single pattern and replacement encapsulation. 33 | * 34 | * @private 35 | */ 36 | class UnionReplacerElement { 37 | constructor(pattern, replacement, extended) { 38 | if (pattern.constructor !== RegExp) { 39 | throw new TypeError(`Replacement pattern ${pattern} is not a RegExp.`); 40 | } 41 | this.pattern = pattern; 42 | if (typeof replacement === 'function') { 43 | this.replacementFn = extended 44 | ? replacement 45 | : wrapStringReplaceFn(replacement); 46 | } else { 47 | const replacementStr = String(replacement); 48 | this.replacementFn = createStringReplacer(replacementStr); 49 | } 50 | } 51 | 52 | compile(captureNum) { 53 | let captureCount = 0; 54 | // regexp adapted from https://github.com/slevithan/xregexp 55 | const parts = /(\(\?<)(?=[^!=])|(\()(?!\?)|\\([1-9]\d*)|\\[\s\S]|\[(?:[^\\\]]|\\[\s\S])*\]/g; 56 | const patternStr = this.pattern.source.replace(parts, 57 | (match, parenNamed, paren, backref) => { 58 | if (paren || parenNamed) { 59 | captureCount++; 60 | } else if (backref) { 61 | if (+backref > captureCount) { 62 | throw new SyntaxError(`Octal or backreference to undefined capture group ${backref} in ${this.pattern}`); 63 | } 64 | // renumber backreference 65 | return `\\${+backref + captureNum}`; 66 | } 67 | return match; 68 | }); 69 | this.captureNum = captureNum; 70 | this.capturePatternStr = `(${patternStr})`; 71 | this.captureCount = captureCount + 1; 72 | } 73 | 74 | /* eslint-disable no-unused-vars */ 75 | narrowMatch(ctx, totalCaptures) { // eslint-disable-line no-unused-vars 76 | // Much faster than modifying the match whit `splice()` on V8 77 | const m0 = ctx.match; 78 | const m1 = m0.slice(this.captureNum, this.captureNum + this.captureCount); 79 | m1.index = m0.index; 80 | m1.input = m0.input; 81 | m1.groups = m0.groups; 82 | ctx.match = m1; 83 | } 84 | } 85 | 86 | export default UnionReplacerElement; 87 | -------------------------------------------------------------------------------- /src/typedefs-tscompat.js: -------------------------------------------------------------------------------- 1 | /** 2 | * [`RegExp.prototype.exec`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/exec#Return_value) return value. 3 | * 4 | * @typedef {Array} RegExpExecArray 5 | * @property {number} index Index of the match in the string. 6 | * @property {string} input The string that was matched against. 7 | * @property {?object} groups Named capture groups. 8 | */ 9 | 10 | /** 11 | * Generic type. 12 | * 13 | * @typedef {*} T 14 | */ 15 | -------------------------------------------------------------------------------- /src/typedefs.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Replacement callback function, as [defined for `String.prototype.replace`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_function_as_a_parameter). 3 | * 4 | * @callback UnionReplacer.StringReplaceCb 5 | * @param {string} substring 6 | * @param {...*} args 7 | * @returns {string} 8 | */ 9 | 10 | /** 11 | * Extended replacement callback function that provides more options during processing. 12 | * 13 | * @callback UnionReplacer.ExtendedReplaceCb 14 | * @param {UnionReplacer.MatchingContext} ctx 15 | * @returns {string} 16 | */ 17 | 18 | /** 19 | * Particular replace with ECMAScript string replacement. 20 | * 21 | * @typedef UnionReplacer.ReplaceWithString 22 | * @type {Array} 23 | * @property {RegExp} 0 - Particular regexp to match. 24 | * @property {string} 1 - Replacement string, as [defined for `String.prototype.replace`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_string_as_a_parameter). 25 | */ 26 | 27 | /** 28 | * Particular replace with ECMAScript callback replacement. 29 | * 30 | * @typedef UnionReplacer.ReplaceWithCb 31 | * @type {Array} 32 | * @property {RegExp} 0 - Particular regexp to match. 33 | * @property {UnionReplacer.StringReplaceCb} 1 - Replacement callback, as [defined for `String.prototype.replace`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_function_as_a_parameter). 34 | */ 35 | 36 | /** 37 | * Particular replace with extended callback replacement (UnionReplacer specific). 38 | * 39 | * @typedef UnionReplacer.ReplaceWithExtendedCb 40 | * @type {Array} 41 | * @property {RegExp} 0 - Particular regexp to match. 42 | * @property {UnionReplacer.ExtendedReplaceCb} 1 - Replacement callback accepting 43 | * {@link UnionReplacer.MatchingContext}. 44 | * @property {true} 2 - Flag `true` marking the callback as {@link UnionReplacer.ExtendedReplaceCb}. 45 | */ 46 | 47 | /** 48 | * Particular replace with explicitly set ECMAScript callback replacement. 49 | * Leads to the same behavior as {@link UnionReplacer.ReplaceWithCb}. 50 | * 51 | * @typedef UnionReplacer.ReplaceWithUnextendedCb 52 | * @type {Array} 53 | * @property {RegExp} 0 - Particular regexp to match. 54 | * @property {UnionReplacer.StringReplaceCb} 1 - Replacement callback accepting 55 | * {@link UnionReplacer.MatchingContext}. 56 | * @property {false} 2 - Flag `false` marking the callback as {@link UnionReplacer.StringReplaceCb}. 57 | */ 58 | 59 | /** 60 | * Particular replace definition similiar to {@link String#replace} arguments specified 61 | * as an array (tuple) with the following items: 62 | * 1. RegExp to match. The flags are ignored. 63 | * 2. Replacement string or function to be applied if the pattern matches. 64 | * Replacement strings: 65 | * - Syntax is the same as for {@link String#replace}: 66 | * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_string_as_a_parameter|Specifying a string as a parameter} 67 | * - ES2018 named capture groups follow the proposal syntax `$`. 68 | * Replacement function is by default the {@link String#replace}-style callback: 69 | * - The same as for {@link String#replace}: 70 | * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_function_as_a_parameter|Specifying a function as a parameter} 71 | * - If ES2018 named capture groups are used, their values are passed 72 | * as the last argument just like in the standard JavaScript replacements: 73 | * `(match, p1, ..., pn, offset, string, namedCaptures) => { ... }`. 74 | * Unlike numbered captures that are narrowed for the particular match, 75 | * this extra `namedCaptures` parameter would contain keys for all the named 76 | * capture groups within the replacer and the values of "foreign" named captures 77 | * would be always `undefined`. 78 | * Replacement callback can also be specified as `extended`. Then only one 79 | * parameter is passed, an instance of {@link UnionReplacer.MatchingContext}. 80 | * This variant is more powerful. 81 | * 3. Optional `extended` flag - if true, the {@link UnionReplacer.MatchingContext} 82 | * will be passed to the replacement function instead of {@link String#replace}-ish 83 | * parameters. 84 | * 85 | * @typedef UnionReplacer.ReplaceTuple 86 | * @type {UnionReplacer.ReplaceWithString| 87 | * UnionReplacer.ReplaceWithCb| 88 | * UnionReplacer.ReplaceWithExtendedCb| 89 | * UnionReplacer.ReplaceWithUnextendedCb} 90 | */ 91 | 92 | /** 93 | * Interface for processors of string chunks during replacement process. 94 | * 95 | * @interface UnionReplacer.ReplacementBuilder 96 | * @template T 97 | */ 98 | 99 | /** 100 | * Process unmatched slice of the input string. 101 | * 102 | * @method UnionReplacer.ReplacementBuilder#addSubjectSlice 103 | * @param {string} subject - String to be processed. 104 | * @param {number} start - Zero-based index at which to begin extraction. 105 | * @param {number} end - Zero-based index before which to end extraction. 106 | * The character at this index will not be included. 107 | * @example builder.addSubjectSlice('example', 1, 2); 108 | */ 109 | 110 | /** 111 | * Process replaced match. 112 | * 113 | * @method UnionReplacer.ReplacementBuilder#addReplacedString 114 | * @param {string} string - String to be processed. 115 | * @example builder.addReplacedString('example'); 116 | */ 117 | 118 | /** 119 | * Build output to be returned by {@link UnionReplacer#replace(2)}. 120 | * 121 | * @method UnionReplacer.ReplacementBuilder#build 122 | * @returns {T} Output to be returned by {@link UnionReplacer#replace(2)}. 123 | * @example const x = builder.build(); 124 | */ 125 | -------------------------------------------------------------------------------- /test/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": { 3 | "jasmine": true 4 | }, 5 | "rules": { 6 | "import/no-unresolved": "off", 7 | "no-cond-assign": ["error", "except-parens"] 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /test/jasmine.json: -------------------------------------------------------------------------------- 1 | { 2 | "spec_dir": "test", 3 | "spec_files": [ 4 | "**/*[sS]pec.js", 5 | "!**/*nospec.js" 6 | ], 7 | "stopSpecOnExpectationFailure": false, 8 | "random": false 9 | } 10 | -------------------------------------------------------------------------------- /test/markdown-doc-behavior.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | const jasmineDiff = require('jasmine-diff'); 3 | const UnionReplacer = require('../dist/union-replacer.cjs'); 4 | 5 | const cleanupFencedCodeBlockContents = (contents, indent) => { 6 | let code = contents.trim(); 7 | if (indent.length > 0) { 8 | code = code.replace(new RegExp(`^ {1,${indent.length}}`, 'gm'), ''); 9 | } 10 | return code; 11 | }; 12 | 13 | const itProducesTheClaimedOutputs = (md, opts) => { 14 | let asserted = false; 15 | const docBlockRe = /^( *)(([`~])\3{2,})[ \t]*(.*?)\s+([\s\S]*?)(?:^ *\2\3*[ \t]*$|(?![\s\S]))/gm; 16 | const testify = new UnionReplacer([ 17 | [/^(?:[ \t\w{}.]+=)?\s*\brequire\b.*$/, '// $&'], 18 | [/^[ \t]*(var|const|let)[ \t]*\b/, '/* $1 */ this.'], 19 | [/^[ \t]*console\.log\b(.*?)\s*(?![\s\S])/, (m, logExpr) => { 20 | asserted = true; 21 | return `return ${logExpr};\n`; 22 | }], 23 | ]); 24 | const input = md.replace(/\r?\n/, '\n'); 25 | let m; 26 | let lastRunnableSnippet = false; 27 | let number = 0; 28 | while ((m = docBlockRe.exec(input)) !== null) { 29 | const indent = m[1]; 30 | const infoStr = m[4]; 31 | const snippet = cleanupFencedCodeBlockContents(m[5], indent); 32 | const snippetToRun = lastRunnableSnippet; 33 | lastRunnableSnippet = false; 34 | 35 | if (opts.snippetExcludeRe.test(snippet)) { 36 | // ignore 37 | } else if (infoStr === 'js') { 38 | lastRunnableSnippet = testify.replace(snippet); 39 | lastRunnableSnippet = asserted && lastRunnableSnippet; 40 | } else if (infoStr === '' && snippetToRun) { 41 | number += 1; 42 | it(`produces the claimed output from snippet number ${number}`, function runSnippet() { 43 | /* eslint-disable-next-line no-new-func */ 44 | const runCode = new Function(`with (this) {\n ${snippetToRun}\n}\n`); 45 | expect(runCode.call(this.subject)).toBe(snippet); 46 | }); 47 | } 48 | } 49 | }; 50 | 51 | const markdownDocBehavior = (file, context, options) => { 52 | let docContext = { ...context }; 53 | const opts = { 54 | stackSnippets: false, 55 | snippetExcludeRe: /^\s*(?:\/\/|\/\*)\s*no\b/i, 56 | ...options, 57 | }; 58 | const refreshDocContext = () => { 59 | if (!opts.stackSnippets) { 60 | docContext = { ...context }; 61 | } 62 | return docContext; 63 | }; 64 | 65 | describe(file, () => { 66 | beforeEach(function prepareForSnippetRun() { 67 | this.subject = refreshDocContext(); 68 | jasmine.addMatchers(jasmineDiff(jasmine, { 69 | colors: true, 70 | inline: true, 71 | })); 72 | }); 73 | const md = fs.readFileSync(file, 'utf8'); 74 | itProducesTheClaimedOutputs(md, opts); 75 | }); 76 | }; 77 | 78 | module.exports = markdownDocBehavior; 79 | -------------------------------------------------------------------------------- /test/matching-context.spec.js: -------------------------------------------------------------------------------- 1 | const UnionReplacer = require('../dist/union-replacer.cjs'); 2 | 3 | describe('MatchingContext', () => { 4 | /** 5 | * Replace match. 6 | * 7 | * @param {object} mctx - Matching context. 8 | * @returns {Array} Matching result. 9 | * @example const r = new UnionReplacer([[/^x|y|z$/, startEndReplace, true]]); 10 | */ 11 | function startEndReplace(mctx) { 12 | const l = mctx.match[0].length; 13 | const s = mctx.atStart() ? 'S' : '_'; 14 | const e = mctx.atEnd() ? 'E' : '_'; 15 | return `[${s}${e}:${l}]`; 16 | } 17 | it('recognizes start and end', () => { 18 | const r = new UnionReplacer([[/^x|y|z$/, startEndReplace, true]]); 19 | expect(r.replace('xyz')).toBe('[S_:1][__:1][_E:1]'); 20 | }); 21 | it('recognizes start = end match and empty end', () => { 22 | const r = new UnionReplacer([[/x|$/, startEndReplace, true]]); 23 | expect(r.replace('x')).toBe('[SE:1][_E:0]'); 24 | }); 25 | it('recognizes empty start and empty end', () => { 26 | const r = new UnionReplacer([[/^|$/, startEndReplace, true]]); 27 | expect(r.replace('x')).toBe('[S_:0]x[_E:0]'); 28 | }); 29 | it('recognizes empty start and empty on empty string', () => { 30 | const r = new UnionReplacer([[/^|$/, startEndReplace, true]]); 31 | expect(r.replace('')).toBe('[SE:0]'); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /test/readme.spec.js: -------------------------------------------------------------------------------- 1 | const path = require('path'); 2 | const markdownDocBehavior = require('./markdown-doc-behavior'); 3 | const UnionReplacer = require('../dist/union-replacer.cjs'); 4 | 5 | const readme = path.join(__dirname, '..', 'README.md'); 6 | markdownDocBehavior(readme, { UnionReplacer }, { stackSnippets: true }); 7 | -------------------------------------------------------------------------------- /test/union-replacer-matching.spec.js: -------------------------------------------------------------------------------- 1 | const UnionReplacer = require('../dist/union-replacer.cjs'); 2 | 3 | const itMatchesLikeStringReplace = (cases) => { 4 | cases.forEach((c) => { 5 | const name = c.shift(); 6 | it(`${name} like String.prototype.replace`, () => { 7 | const input = c.shift(); 8 | expect(new UnionReplacer([c], c[0].flags).replace(input)) 9 | .toBe(input.replace(c[0], c[1])); 10 | }); 11 | }); 12 | }; 13 | 14 | describe('UnionReplacer.prototype.replace matching', () => { 15 | itMatchesLikeStringReplace([ 16 | ['ignores unicode surrogate pairs by default', '=\uD83D\uDC362=', /\b/g, '_'], 17 | ['respects unicode surrogate pairs with the u flag', '=\uD83D\uDC362=', /\b/gu, '_'], 18 | ['works with surrogate pairs at the beggining and end', '\uD83D\uDC362', /\b/gu, '_'], 19 | ['treats leading match', 'abc', /^./g, '_'], 20 | ['treats trailing match', 'abc', /^./g, '_'], 21 | ['treats consecutive matches', 'abc', /./g, '$`'], 22 | ['works when nothing matched', 'abc', /[^\s\S]/g, '_'], 23 | ]); 24 | }); 25 | -------------------------------------------------------------------------------- /test/union-replacer.spec.js: -------------------------------------------------------------------------------- 1 | const UnionReplacer = require('../dist/union-replacer.cjs'); 2 | 3 | const RULES = [ 4 | [/function(withGroup)/, (match, group) => `matched ${group}`], 5 | [/anotherfunc(foo)(?bar)/, (...args) => { 6 | if (args.length !== 6 7 | || args[0] !== 'anotherfuncfoobar' 8 | || args[1] !== 'foo' 9 | || args[2] !== 'bar' 10 | || args[3] !== 0 11 | || args[4] !== 'anotherfuncfoobar' 12 | || typeof (args[5]) !== 'object' 13 | || args[5].named !== 'bar') { 14 | return `Replacement function received incorrect arguments: ${args.toString()}`; 15 | } 16 | return 'ok'; 17 | }], 18 | [/1cd/, 'c$`'], // these two will be replaced to abc*, 19 | [/2cd/, "c$'"], // which will not be matched by the following rule 20 | [/abc/, 'bar'], 21 | [/(def)/, '$1 $2 $'], 22 | [/(#{1,6})/, '\\$1'], 23 | [/(={1,6})-\1/, '\\$1'], 24 | [/(?named)/, '$capture> groups'], 25 | [/(?)empty/, '($) was empty'], 26 | [/(?:noncapture) (groups)/, '$1 are allowed'], 27 | [/(lookahead)(?!foo)/, '$1 is allowed'], 28 | [/(?<=so is )(lookbehind)/, '$1 allowed'], 29 | [/^$/, 'empty string'], 30 | [/(multiple) (groups)/, '$2 $1 $$1 $&'], 31 | [/^(\d+)\.(?=\s)/, '$1\\.'], 32 | ]; 33 | 34 | const CASES = [ 35 | ['functionwithGroup', 'matched withGroup'], 36 | ['anotherfuncfoobar', 'ok'], 37 | ['ab1cdef', 'abcabef'], // would be 'barabef' if rules were sequential 38 | ['ab2cdef', 'abcefef'], // would be 'barefef' if rules were sequential 39 | ['abc', 'bar'], 40 | ['def', 'def $2 $'], 41 | ['##', '\\##'], 42 | ['==-==', '\\=='], 43 | ['named', 'namedcapture> groups'], 44 | ['empty', '() was empty'], 45 | ['lookahead', 'lookahead is allowed'], 46 | ['noncapture groups', 'groups are allowed'], 47 | ['so is lookbehind', 'so is lookbehind allowed'], 48 | ['', 'empty string'], 49 | ['multiple groups', 'groups multiple $1 multiple groups'], 50 | ['1. before 2. item\n2. after 1. item', '1\\. before 2. item\n2\\. after 1. item'], 51 | ]; 52 | 53 | const FAILS = [ 54 | [[/(ab)\2/, 'Unmatched backreference']], 55 | [[/\123/, 'Octals are not allowed']], 56 | [ 57 | [/(?foo)/, 'Multiple named capture groups'], 58 | [/(?foo)/, 'with the same name are not allowed'], 59 | ], 60 | ]; 61 | 62 | describe('UnionReplacer.js', () => { 63 | CASES.forEach((value) => { 64 | it(`should produce '${value[1]}' for '${value[0]}'`, () => { 65 | const replacer = new UnionReplacer(RULES); 66 | expect(replacer.replace(value[0])).toBe(value[1]); 67 | }); 68 | }); 69 | 70 | FAILS.forEach((value) => { 71 | it(`should fail for '${value}'`, () => { 72 | expect(() => { 73 | const replacer = new UnionReplacer(value); 74 | replacer.replace(''); 75 | }).toThrowError(SyntaxError); 76 | }); 77 | }); 78 | 79 | it('passes through the input when no replaces are set', () => { 80 | const replacer = new UnionReplacer([], 'g'); 81 | expect(replacer.replace('foo')).toBe('foo'); 82 | }); 83 | }); 84 | -------------------------------------------------------------------------------- /types/index.d.ts: -------------------------------------------------------------------------------- 1 | // Generated type definitions for union-replacer 2.0.1 2 | // File created by tsd-jsdoc and scripts/tsd-postprocess.js. 3 | // Do not modify directly. 4 | 5 | export = UnionReplacer; 6 | export as namespace UnionReplacer; 7 | 8 | /** 9 | *

Create a UnionReplacer instance performing the specified replaces.

10 | * @example 11 | * replacer = new UnionReplacer([[/\$foo\b/, 'bar'], [/\\(.)/, '$1']]); 12 | * @example 13 | * // Simple URI encoder 14 | * replacer = new UnionReplacer([ 15 | * [/ /, '+'], 16 | * [/[^\w.,-]/, (m) => `%${m.charCodeAt(0).toString(16)}`], 17 | * ]); 18 | * @example 19 | * replacer = new UnionReplacer([[/\$foo\b/, 'bar'], [/\\(.)/, '$1']], 'gi'); 20 | * @param replaces -

Replaces to be performed 21 | * specified as an array of {@link UnionReplacer.ReplaceTuple} array tuples. 22 | * The order of elements in replaces is important: if any pattern is matched, 23 | * the corresponding amount of input is consumed and subsequent patterns 24 | * will not match on such part of the input.

25 | * @param [flags = gm] -

Flags for replacement, defaults to 'gm'.

26 | */ 27 | declare class UnionReplacer { 28 | constructor(replaces: UnionReplacer.ReplaceTuple[], flags?: string); 29 | readonly flags: string; 30 | /** 31 | *

Build the underlying combined regular expression. This method has no effect 32 | * since v2.0, as the builder-like functionality has been removed and underlying 33 | * data structures are prepared in the constructor.

34 | */ 35 | compile(): void; 36 | /** 37 | *

Perform search and replace with the combined patterns and use corresponding 38 | * replacements for the particularly matched patterns.

39 | * @param subject -

Input to search and process.

40 | * @param [userCtx = {}] -

User-provided context to be passed as this 41 | * when calling replacement functions and as a parameter of the builder calls.

42 | * @returns

New string with the matches replaced. Or any type when a 43 | * custom builder is provided.

44 | */ 45 | replace(subject: string, userCtx?: any): string; 46 | /** 47 | *

Perform search and replace with the combined patterns and use corresponding 48 | * replacements for the particularly matched patterns. Pass the resulting chunks 49 | * to an user-provided {@link UnionReplacer.ReplacementBuilder} instead of 50 | * concatenating them into one string.

51 | * @example 52 | * replacer.replace('foo'); 53 | * @param subject -

Input to search and process.

54 | * @param userCtx -

User-provided context to be passed as this when 55 | * calling replacement functions and as a parameter of the builder calls.

56 | * @param builder -

Collects and builds 57 | * the result from unmatched subject slices and replaced matches. A custom 58 | * builder allows for creating arbitrary structures based on matching or 59 | * streaming these chunks without building any output.

60 | * @returns

Result built by the builder.

61 | */ 62 | replace(subject: string, userCtx: any, builder: UnionReplacer.ReplacementBuilder): T; 63 | } 64 | 65 | declare namespace UnionReplacer { 66 | /** 67 | *

Encapsulation of matcher variables.

68 | */ 69 | interface MatchingContext { 70 | /** 71 | *

The {@link UnionReplacer} instance being used.

72 | */ 73 | replacer: UnionReplacer; 74 | /** 75 | *

Last match, as returned by {@link RegExp#exec}.

76 | */ 77 | match: RegExpExecArray | null; 78 | /** 79 | *

Advance matching position n characters after the match end position.

80 | * @param n -

Number of characters to skip. Zero and negative values 81 | * are valid, but introduce risk of infinite processing. It is then user 82 | * responsibility to prevent it.

83 | */ 84 | skip(n: number): void; 85 | /** 86 | *

Set matching position to n characters from match start.

87 | * @param n -

Number of characters jump. Values less than or equal 88 | * to match length are valid, but introduce risk of infinite processing. 89 | * It is then user responsibility to prevent it.

90 | */ 91 | jump(n: number): void; 92 | /** 93 | *

Reset matching position according to standard regexp match position advancing.

94 | */ 95 | reset(): void; 96 | /** 97 | *

Determine whether the current match is at the input start.

98 | * @returns

true if current match is at input start, false otherwise.

99 | */ 100 | atStart(): boolean; 101 | /** 102 | *

Determine whether the current match is at the input end.

103 | * @returns

true if current match is at input end, false otherwise.

104 | */ 105 | atEnd(): boolean; 106 | } 107 | /** 108 | *

Replacement callback function, as defined for String.prototype.replace.

109 | */ 110 | type StringReplaceCb = (substring: string, ...args: any[]) => string; 111 | /** 112 | *

Extended replacement callback function that provides more options during processing.

113 | */ 114 | type ExtendedReplaceCb = (ctx: UnionReplacer.MatchingContext) => string; 115 | /** 116 | *

Particular replace with ECMAScript string replacement.

117 | * @property 0 -

Particular regexp to match.

118 | * @property 1 -

Replacement string, as defined for String.prototype.replace.

119 | */ 120 | type ReplaceWithString = [RegExp, string]; 121 | /** 122 | *

Particular replace with ECMAScript callback replacement.

123 | * @property 0 -

Particular regexp to match.

124 | * @property 1 -

Replacement callback, as defined for String.prototype.replace.

125 | */ 126 | type ReplaceWithCb = [RegExp, UnionReplacer.StringReplaceCb]; 127 | /** 128 | *

Particular replace with extended callback replacement (UnionReplacer specific).

129 | * @property 0 -

Particular regexp to match.

130 | * @property 1 -

Replacement callback accepting 131 | * {@link UnionReplacer.MatchingContext}.

132 | * @property 2 -

Flag true marking the callback as {@link UnionReplacer.ExtendedReplaceCb}.

133 | */ 134 | type ReplaceWithExtendedCb = [RegExp, UnionReplacer.ExtendedReplaceCb, true]; 135 | /** 136 | *

Particular replace with explicitly set ECMAScript callback replacement. 137 | * Leads to the same behavior as {@link UnionReplacer.ReplaceWithCb}.

138 | * @property 0 -

Particular regexp to match.

139 | * @property 1 -

Replacement callback accepting 140 | * {@link UnionReplacer.MatchingContext}.

141 | * @property 2 -

Flag false marking the callback as {@link UnionReplacer.StringReplaceCb}.

142 | */ 143 | type ReplaceWithUnextendedCb = [RegExp, UnionReplacer.StringReplaceCb, false]; 144 | /** 145 | *

Particular replace definition similiar to {@link String#replace} arguments specified 146 | * as an array (tuple) with the following items:

147 | *
    148 | *
  1. RegExp to match. The flags are ignored.
  2. 149 | *
  3. Replacement string or function to be applied if the pattern matches. 150 | * Replacement strings: 151 | *
      152 | *
    • Syntax is the same as for {@link String#replace}: 153 | * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_string_as_a_parameter|Specifying a string as a parameter}
    • 154 | *
    • ES2018 named capture groups follow the proposal syntax $<name>. 155 | * Replacement function is by default the {@link String#replace}-style callback:
    • 156 | *
    • The same as for {@link String#replace}: 157 | * {@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace#Specifying_a_function_as_a_parameter|Specifying a function as a parameter}
    • 158 | *
    • If ES2018 named capture groups are used, their values are passed 159 | * as the last argument just like in the standard JavaScript replacements: 160 | * (match, p1, ..., pn, offset, string, namedCaptures) => { ... }. 161 | * Unlike numbered captures that are narrowed for the particular match, 162 | * this extra namedCaptures parameter would contain keys for all the named 163 | * capture groups within the replacer and the values of "foreign" named captures 164 | * would be always undefined. 165 | * Replacement callback can also be specified as extended. Then only one 166 | * parameter is passed, an instance of {@link UnionReplacer.MatchingContext}. 167 | * This variant is more powerful.
    • 168 | *
    169 | *
  4. 170 | *
  5. Optional extended flag - if true, the {@link UnionReplacer.MatchingContext} 171 | * will be passed to the replacement function instead of {@link String#replace}-ish 172 | * parameters.
  6. 173 | *
174 | */ 175 | type ReplaceTuple = UnionReplacer.ReplaceWithString | UnionReplacer.ReplaceWithCb | UnionReplacer.ReplaceWithExtendedCb | UnionReplacer.ReplaceWithUnextendedCb; 176 | /** 177 | *

Interface for processors of string chunks during replacement process.

178 | */ 179 | interface ReplacementBuilder { 180 | /** 181 | *

Process unmatched slice of the input string.

182 | * @example 183 | * builder.addSubjectSlice('example', 1, 2); 184 | * @param subject -

String to be processed.

185 | * @param start -

Zero-based index at which to begin extraction.

186 | * @param end -

Zero-based index before which to end extraction. 187 | * The character at this index will not be included.

188 | */ 189 | addSubjectSlice(subject: string, start: number, end: number): void; 190 | /** 191 | *

Process replaced match.

192 | * @example 193 | * builder.addReplacedString('example'); 194 | * @param string -

String to be processed.

195 | */ 196 | addReplacedString(string: string): void; 197 | /** 198 | *

Build output to be returned by {@link UnionReplacer#replace(2)}.

199 | * @example 200 | * const x = builder.build(); 201 | * @returns

Output to be returned by {@link UnionReplacer#replace(2)}.

202 | */ 203 | build(): T; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /types/test.ts: -------------------------------------------------------------------------------- 1 | import UnionReplacer = require('union-replacer'); 2 | 3 | new UnionReplacer([]); 4 | new UnionReplacer(); // $ExpectError 5 | new UnionReplacer([/foo/, 'bar']); // $ExpectError 6 | new UnionReplacer([['foo', 'bar']]); // $ExpectError 7 | 8 | new UnionReplacer([[/foo/, 'bar']]); 9 | new UnionReplacer([[/foo/, 'bar', true]]); // $ExpectError 10 | new UnionReplacer([[/foo/, 'bar', false]]); // $ExpectError 11 | 12 | new UnionReplacer([[/foo/, (m: string, index: number): string => '']]); 13 | new UnionReplacer([[/foo/, (m: string, index: number): number => 1]]); // $ExpectError 14 | new UnionReplacer([[/foo/, (m: string): string => m, false]]); 15 | new UnionReplacer([[/foo/, (m: string): string => m, true]]); // $ExpectError 16 | 17 | new UnionReplacer([[/foo/, (ctx: UnionReplacer.MatchingContext): string => '', true]]); 18 | new UnionReplacer([[/foo/, (ctx: UnionReplacer.MatchingContext): number => 1, true]]); // $ExpectError 19 | new UnionReplacer([[/foo/, (ctx: UnionReplacer.MatchingContext): string => '', false]]); // $ExpectError 20 | 21 | const replacer: UnionReplacer = new UnionReplacer([ 22 | [/foo/, (m, index) => { 23 | m; // $ExpectType string 24 | return ''; 25 | }], 26 | [/bar/, (ctx: UnionReplacer.MatchingContext) => { 27 | ctx; // $ExpectType MatchingContext 28 | ctx.match; // $ExpectType RegExpExecArray | null 29 | return ''; 30 | }, true], 31 | ]); 32 | 33 | replacer.replace('foobar'); // $ExpectType string 34 | 35 | class MyBuilder implements UnionReplacer.ReplacementBuilder { 36 | addSubjectSlice(subject: string, start: number, end: number) {} 37 | addReplacedString(string: string) {} 38 | build() { return 1; } 39 | } 40 | replacer.replace('foobar', {}, new MyBuilder()); // $ExpectType number 41 | -------------------------------------------------------------------------------- /types/tsconfig.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "compilerOptions": { 4 | "module": "commonjs", 5 | "lib": [ 6 | "es6" 7 | ], 8 | "noImplicitAny": true, 9 | "noImplicitThis": true, 10 | "strictNullChecks": true, 11 | "strictFunctionTypes": true, 12 | "noEmit": true, 13 | "forceConsistentCasingInFileNames": true, 14 | "baseUrl": ".", 15 | "types": [], 16 | "paths": { "union-replacer": ["."] } 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /types/tslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "dtslint/dtslint.json", 3 | "rules": { 4 | "strict-export-declare-modifiers": false, 5 | "no-unnecessary-qualifier": false, 6 | "max-line-length": false, 7 | "no-redundant-jsdoc": false 8 | } 9 | } 10 | --------------------------------------------------------------------------------