├── .travis.yml ├── externs.js ├── test ├── all.js ├── index.html └── tests.js ├── check.sh ├── package.json ├── unresolved.md ├── README.md ├── LICENSE └── RegExp.make.js /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "stable" 4 | -------------------------------------------------------------------------------- /externs.js: -------------------------------------------------------------------------------- 1 | /** @type{string} @const */ 2 | RegExp.prototype.flags; 3 | 4 | RegExp.make; 5 | -------------------------------------------------------------------------------- /test/all.js: -------------------------------------------------------------------------------- 1 | require('babel/register'); 2 | require('../RegExp.make.js'); 3 | require('./tests.js'); 4 | -------------------------------------------------------------------------------- /check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | java -jar tools/compiler.jar --language_in=ECMASCRIPT6 --warning_level=VERBOSE --jscomp_error="*" --compilation_level=ADVANCED --js RegExp.make.js --externs externs.js 4 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "regexp-make-js", 3 | "version": "1.0.0", 4 | "description": "An ES6 string template tag for dynamically creating regular expressions.", 5 | "main": "RegExp.make.js", 6 | "directories": { 7 | "test": "test" 8 | }, 9 | "scripts": { 10 | "test": "node test/all.js" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "git+https://github.com/mikesamuel/regexp-make-js.git" 15 | }, 16 | "author": "Various", 17 | "license": "Apache-2.0", 18 | "bugs": { 19 | "url": "https://github.com/mikesamuel/regexp-make-js/issues" 20 | }, 21 | "homepage": "https://github.com/mikesamuel/regexp-make-js#readme", 22 | "dependencies": { 23 | "babel": "^5.8.29" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /unresolved.md: -------------------------------------------------------------------------------- 1 | # Unresolved Issues 2 | 3 | ## [How should flags be specified](https://github.com/mikesamuel/regexp-make-js/issues/19) 4 | 5 | Syntax | Example 6 | ------ | ------- 7 | Current | `RegExp.make('i')`foo` 8 | Alternate | `RegExp.make`/foo/i` 9 | 10 | ## [Group Indexes when a RegExp with groups is interpolated](https://github.com/mikesamuel/regexp-make-js/issues/1) 11 | 12 | Right now 13 | 14 | ```js 15 | var litRegex = /f(o)o/; 16 | 17 | var regexWithInterpolation = RegExp.make`(bar) ${myRegex} (baz)`; 18 | 19 | var match = regexWithInterpolation.exec('bar foo baz'); 20 | 21 | // How can I reliably extract "baz" from match? 22 | ``` 23 | 24 | Approach | Example 25 | -------- | ------- 26 | Current | match[regexWithInterpolation.templateGroups[2]] 27 | Alterante | ??? -------------------------------------------------------------------------------- /test/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | RegExp.make Tests 4 | 12 | 13 |

RegExp.make Tests

14 |

15 | This library uses ES6 features. It runs in modern Firefoxen. 16 |

17 | 18 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # regexp-make-js 2 | `RegExp.make` is an ES6 string template tag for dynamically creating regular expressions. 3 | 4 | ## Usage 5 | 6 | ```javascript 7 | RegExp.make`^${foo},${bar}$` 8 | ``` 9 | 10 | is a `RegExp` instance that matches the whole string (`^...$`) 11 | consisting of a substring matching the value of the expression `foo` 12 | followed by the literal substring `","` followed by a substring 13 | matching the value of the expression `bar`. 14 | 15 | Interpolated expressions like `foo` and `bar` can be strings, or `RegExp` 16 | instances, or other values that are coerced to strings. 17 | 18 | `RegExp` instances are treated like the set of substrings they match 19 | -- their source is not used as a literal string. 20 | 21 | ```javascript 22 | RegExp.make`^${ /fo+/ }$` 23 | ``` 24 | 25 | matches the entire string consisting of `'f'` followed by one or more 26 | `'o'`s; the Kleene + is not treated literally. 27 | 28 | 29 | ## Goals 30 | 31 | This currently uses the subset of EcmaScript 2015 (ES6) that is 32 | implemented on FF >= 39. To see the test visit the 33 | [test page](https://rawgit.com/mikesamuel/regexp-make-js/master/test/) 34 | in your browser using Firefox. 35 | 36 | This is a proposed alternative to 37 | [RegExp.escape](https://github.com/benjamingr/RegExp.escape). 38 | To get simply the equivalent functionality of `RegExp.escape`, 39 | anywhere you would have said 40 | 41 | ```javascript 42 | RegExp.escape(str) 43 | ``` 44 | 45 | you can say instead 46 | 47 | ```javascript 48 | RegExp.make`${str}`.source 49 | ``` 50 | 51 | However, if you do only that you have not gained anything. The 52 | advantage of using the tag is that it can do reliable 53 | context-dependent escaping of the string as interpolated into RegExp 54 | source text. Where you might have said, for example, 55 | 56 | ```javascript 57 | const re = new RegExp('^(' + RegExp.escape(str) + ')$'); 58 | ``` 59 | 60 | with `RegExp.make` you can say instead 61 | 62 | ```javascript 63 | const re = RegExp.make`^(${str})$`; 64 | ``` 65 | 66 | ## Expressions 67 | 68 | | Context | Example | String | Numeric | RegExp | 69 | | ------- | ------- | ------ | ------- | ------ | 70 | | Block | `/${...}/` | Treated literally | Treated Literally | With back-references adjusted | 71 | | Charset | `/[^${...}]/` | Individual chars | Individual Chars | All chars in any string matched by the RegExp | 72 | | Count | `/x{1,${...}}/` | Inlined without wrapping | Inlined without wrapping | Inlined without wrapping | 73 | 74 | Interpolated values are treated as atoms so 75 | 76 | ```javascript 77 | RegExp.make`${foo}*` 78 | ``` 79 | 80 | matches any number of the pattern specified by `foo`; it's not just 81 | the last character in that pattern that the Kleene star applies to. 82 | 83 | 84 | ## Flags 85 | 86 | ```javascript 87 | RegExp.make('i')`^${foo}$` 88 | ``` 89 | 90 | applies the `i` flag (case-insensitive) to the RegExp after interpolation happens, 91 | so substrings matched by the expression `foo` are matched case-insensitively. 92 | 93 | 94 | When a case-insensitive `RegExp` is interpolated into a case-sensitive one, the 95 | interpolated one still matches case insensitively. 96 | 97 | ```javascript 98 | RegExp.make`foo-${ /bar/i }` 99 | ``` 100 | 101 | matches `"foo-BAR"` but not `"FOO-BAR"`. 102 | 103 | 104 | 105 | ## Groups 106 | 107 | `RegExp`s produced have the `templateGroups` property set so that if 108 | values specify groups, you can figure out the group index of a group 109 | specified by the template. 110 | 111 | ```javascript 112 | var re = RegExp.make`${ /(foo)/ }(\d+)`; 113 | // value group ^ ^ template group 1 114 | var match = "foo123".match(); 115 | match[1] === 'foo'; // Because of /(foo)/ 116 | match[re.templateGroups[1]] === '123'; 117 | ``` 118 | 119 | 120 | ## TODO 121 | 122 | * [The `u` flag](https://mathiasbynens.be/notes/es6-unicode-regex) is not recognized and it should affect how we do case-folding and treat `.`, `\w` character classes, `\u{...}` escapes, etc. 123 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /test/tests.js: -------------------------------------------------------------------------------- 1 | // Using the subset of ES6 currently supported by FF Nightly 42.0a1 (2015-08-01) 2 | // For full ES6: 3 | // * replace "var" below with "let" 4 | 5 | (function () { 6 | "use strict"; 7 | 8 | if (typeof RegExp.make !== 'function') { 9 | return; 10 | } 11 | 12 | // Set up a RegExp subclass so that we can test subclass creation. 13 | function SubRegExp(source, flags) { 14 | const re = new RegExp(source, flags); 15 | Object.setPrototypeOf(re, SubRegExp.prototype); 16 | return re; 17 | } 18 | SubRegExp.prototype = Object.create(RegExp.prototype, { 19 | constructor: { 20 | value: SubRegExp 21 | }, 22 | toString: { 23 | value: function () { 24 | return 'SubRegExp:/' + this.source + '/' + this.flags; 25 | } 26 | } 27 | }); 28 | SubRegExp.make = RegExp.make; 29 | 30 | const test = ( 31 | function testMaker(ctor, flags, x, ...values) { 32 | if ('object' === typeof x && 'raw' in x) { 33 | // Produce a test record if called as a string template. 34 | const template = x; 35 | return { 36 | ctor: ctor, 37 | flags: flags, 38 | template: template, 39 | values: values.slice() 40 | }; 41 | } else { 42 | var makerCtor = ctor; 43 | var makerFlags = flags; 44 | const args = [x, ...values]; 45 | for (var i = 0, n = args.length; i < n; ++i) { 46 | const arg = args[i]; 47 | switch (typeof arg) { 48 | case 'function': makerCtor = arg; break; 49 | case 'string': makerFlags = arg; break; 50 | } 51 | } 52 | return testMaker.bind(this, makerCtor, makerFlags); 53 | } 54 | } 55 | ).bind(null, RegExp, null); 56 | 57 | /** Python style raw strings. */ 58 | function r(template, ...values) { 59 | if (values.length !== 0) { 60 | throw new Error( 61 | 'Interpolation not allowed into r`...` style raw strings'); 62 | } 63 | return template.raw[0]; 64 | } 65 | 66 | const tests = [ 67 | // No interpolations 68 | [test`^foo$bar$;\n$`, 69 | r`/^foo$bar$;\n$/`], 70 | 71 | // No interpolations but flags 72 | [test('gi')`^foo$bar$;\n$`, 73 | r`/^foo$bar$;\n$/gi`], 74 | // A single string into a block context. 75 | [test`^${ 'foo' }$`, 76 | r`/^(?:foo)$/`], 77 | // Testing transitions between contexts. 78 | [test('i')`^([${ '\\' }${ /[a-z]/ }]{${ 42 }})${ /$/ }`, 79 | r`/^([\\a-z]{42})(?:$)/i`, [0, 1]], 80 | 81 | // We allow numbers in counts and don't wrap with (?:...) since those 82 | // are unnecessary. 83 | // Simply coercing to string will allow [1,2] as a count value here to 84 | // have the intuitive meaning. 85 | // We want to treat empty strings differently here since 86 | [test`x{3,${''}}`, 87 | // can be reasonably 88 | r`/x{3,}/`], 89 | // while if we allowed the empty string to be interpolated as the 90 | // empty string, then we would screw up the way postfix operators 91 | // associate as in 92 | [test`x${''}*`, 93 | // where it would be unintuitive for the * to associate with x. 94 | r`/x(?:)*/`], 95 | 96 | // Back-reference not scoped to containing RegExp 97 | [test`^(#+)([^#\r\n]*)${ /\1/ }`, 98 | // Can't use r`...` since \1 triggers an octal-escape strict parse error. 99 | '/^(#+)([^#\\r\\n]*)(?:\\1)/', [0, 1, 2]], 100 | // Negated charset into a charset 101 | [test`[${ /[^A-Z]/ }]`, 102 | r`/[\u0000-@\[-\uffff]/`], 103 | // String into a charset 104 | [test`[${ "A-Z" }]`, 105 | r`/[A\-Z]/`], 106 | // String into a negated charset 107 | [test`[^${ "A-Z" }]`, 108 | r`/[^A\-Z]/`], 109 | // Multiple elements into a charset: individual chars, charsets, 110 | // and special groups. 111 | [test`[${ /[a]|([c]|b)|d|_/ }]`, 112 | r`/[_a-d]/`], 113 | // Multiple case-insensitive elements into a charset: individual chars, 114 | // charsets, and special groups. 115 | [test`[${ /[a]|(?:[c]|b)|d|_/i }]`, 116 | r`/[A-D_a-d]/`], 117 | // {1,2} does not contribute chars. 118 | [test`[${ /x{1,2}/ }]`, 119 | r`/[x]/`], 120 | // . does contribute chars. 121 | [test`[${ /.|\r|\n/ }]`, 122 | r`/[\u0000-\u2027\u202a-\uffff]/`], 123 | // Rewrite group indices. 124 | [ 125 | //test`(fo(o))${ /(x)\1(?:\2)/ }bar${ /\1/ }(baz)`, // Octal esc error 126 | { 127 | ctor: RegExp, 128 | template: { raw: ['(fo(o))', 'bar', '(baz)'] }, 129 | flags: '', 130 | values: [/(x)\1(?:\2)/, /\1/] 131 | }, 132 | '/(fo(o))(?:(x)\\3(?:\\2))bar(?:\\1)(baz)/', 133 | // Group 3 -^ comes from an interpolated group. 134 | [0, 1, 2, 4] 135 | ], 136 | // Rewrite template back-references when interrupted. 137 | [ 138 | //test`^(${ /(.*)/ }\n(#+)\n${ /(.*)/ }\n\2)\n`, 139 | { 140 | ctor: RegExp, 141 | flags: '', 142 | template: { raw: ['^(', '\\n(#+)\\n', '\\n\\2)\\n'] }, 143 | values: [ /(.*)/, /(.*)/] 144 | // 0 1 2 <- Template groups 145 | // 0 1 2 3 4 <- Output groups 146 | }, 147 | '/^((?:(.*))\\n(#+)\\n(?:(.*))\\n\\3)\\n/', 148 | [0, 1, 3] 149 | ], 150 | // Test that interpolations break tokens. 151 | // ($x?:x) should not run together into (?:x) when x is empty. 152 | [test`(${""}?:x)`, 153 | '/((?:)?:x)/', [0, 1]], 154 | [test`(${new RegExp('')}?:x)`, 155 | '/((?:(?:))?:x)/', [0, 1]], 156 | 157 | // Test that interpolation of case-insensitive into case-sensitive 158 | // expands letters. 159 | [test`${ //i }[a-z0-9_]*${ /<\/foo>/ }`, 160 | r`/(?:<[Ff][Oo][Oo]>)[a-z0-9_]*(?:<\/foo>)/`], 161 | 162 | // Test that \b means different things in different contexts. 163 | [test`[${ /[\b\t\n]/ }],[${ /\b|\t|\n/ }]`, 164 | r`/[\u0008-\u000a],[\u0009\u000a]/`], 165 | 166 | // Treat null and undefined like the empty string 167 | [test`${null},${undefined},${NaN},${false},${0}`, 168 | r`/(?:),(?:),(?:NaN),(?:false),(?:0)/`], 169 | 170 | // Test un-bindable back-reference 171 | [test`${ /\1/ }`, r`/(?:(?:))/`], 172 | 173 | // Subclassing of RegExp 174 | [test(SubRegExp)`foo`, 'SubRegExp:/foo/'], 175 | [test(SubRegExp, 'i')`foo`, 'SubRegExp:/foo/i'], 176 | 177 | // TODO: Handle case-folding properly when u flag is present 178 | // TODO: Test interpolation in middle of charset start. `[${...}^]` 179 | ]; 180 | 181 | function tableMaker() { 182 | if (typeof document !== 'undefined') { 183 | const el = function (name, parent, opt_text) { 184 | const elem = document.createElement(name); 185 | parent.appendChild(elem); 186 | if (opt_text) { 187 | elem.appendChild(document.createTextNode(opt_text)); 188 | } 189 | return elem; 190 | }; 191 | 192 | const table = el('table', document.body); 193 | const tbody = el('tbody', table); 194 | var hasBodyData = false; 195 | el('tr', tbody); 196 | 197 | const addCell = function (cellTag, text, passFailOpt) { 198 | var row = tbody.lastChild; 199 | var lastCell = row.lastChild; 200 | if (text === null && lastCell) { 201 | lastCell.setAttribute( 202 | 'colspan', 203 | (+lastCell.getAttribute('colspan') || 1) + 1); 204 | } else { 205 | el(cellTag, row, text); 206 | } 207 | }; 208 | 209 | return { 210 | endRow: function (passFailOpt) { 211 | var row = tbody.lastChild; 212 | if (passFailOpt !== undefined) { 213 | row.className += passFailOpt ? ' pass' : ' fail'; 214 | } 215 | el('tr', tbody); 216 | }, 217 | header: addCell.bind(null, 'th'), 218 | cell: addCell.bind(null, 'td'), 219 | endTable: function () { 220 | var row = tbody.lastChild; 221 | if (!row.firstChild) { 222 | tbody.removeChild(row); 223 | } 224 | } 225 | }; 226 | } else { 227 | const tableData = [[]]; 228 | const addCellData = function (header, text) { 229 | tableData[tableData.length - 1].push({ text: text || '', header: header }); 230 | }; 231 | return { 232 | endRow: function () { 233 | tableData.push([]); 234 | }, 235 | header: addCellData.bind(null, true), 236 | cell: addCellData.bind(null, false), 237 | endTable: function () { 238 | if (tableData.length && tableData[tableData.length - 1].length === 0) { 239 | // Drop any empty last row. 240 | --tableData.length; 241 | } 242 | 243 | var colLengths = []; 244 | tableData.forEach(function (rowData) { 245 | for (var i = 0, n = rowData.length; i < n; ++i) { 246 | colLengths[i] = Math.max(colLengths[i] || 0, rowData[i].text.length); 247 | } 248 | }); 249 | var padding = colLengths.map(function (n) { 250 | var space = ' '; 251 | while (space.length < n) { space += space; } 252 | return space.substring(0, n); 253 | }); 254 | 255 | var rowTexts = tableData.map(function (rowData) { 256 | var cellTexts = rowData.map(function (cellData, cellIndex) { 257 | var cellText = cellData.text; 258 | var isHeader = cellData.header; 259 | var cellPadding = padding[cellIndex]; 260 | var nLeftPadding = isHeader ? (cellPadding.length - cellText.length) >> 1 : 0; 261 | return ( 262 | cellPadding.substring(0, nLeftPadding) + 263 | cellText + 264 | cellPadding.substring(cellText.length + nLeftPadding) 265 | ); 266 | }); 267 | return cellTexts.join(' | '); 268 | }); 269 | 270 | var tableText = rowTexts.join('\n'); 271 | console.log(tableText); 272 | } 273 | }; 274 | } 275 | } 276 | 277 | function stringify(arr) { 278 | var s = ''; 279 | s += '['; 280 | for (var i = 0, n = arr.length; i < n; ++i) { 281 | if (i) { s += ', '; } 282 | const x = arr[i]; 283 | if (x && 'object' === typeof x) { 284 | s += x; 285 | } else { 286 | s += JSON.stringify(x); 287 | } 288 | } 289 | s += ']'; 290 | return s; 291 | } 292 | 293 | const testSummary = tableMaker(); 294 | testSummary.header('string parts'); 295 | testSummary.header('values'); 296 | testSummary.header('expected pattern'); 297 | testSummary.header('expected groups'); 298 | testSummary.endRow(); 299 | testSummary.header(''); 300 | testSummary.header(''); 301 | testSummary.header('actual pattern'); 302 | testSummary.header('actual groups'); 303 | testSummary.endRow(); 304 | var nPassing = 0, nFailing = 0; 305 | const failing = []; 306 | for (var i = 0, n = tests.length; i < n; ++i) { 307 | const [ 308 | { template, values, ctor: RegExpCtor, flags }, 309 | expectedPattern, 310 | expectedGroupsOpt 311 | ] = tests[i]; 312 | const expectedGroups = expectedGroupsOpt || [0]; 313 | 314 | const maker = function (template, ...values) { 315 | if (flags != null) { 316 | return RegExpCtor.make(flags)(template, ...values); 317 | } else { 318 | return RegExpCtor.make(template, ...values); 319 | } 320 | }; 321 | var actualPattern, actualGroups; 322 | try { 323 | const re = maker(template, ...values); 324 | actualPattern = re.toString(); 325 | actualGroups = re.templateGroups; 326 | } catch (e) { 327 | actualPattern = '###Error:' + e + '###'; 328 | actualGroups = ['###Error###']; 329 | console.error(e); 330 | } 331 | 332 | var message = '#' + i; 333 | var checkEqual = function (expected, actual) { 334 | if (expected === actual) { return true; } 335 | expected = String(expected); 336 | actual = String(actual); 337 | if (/[^\w ]/.test(expected)) { 338 | expected = JSON.stringify(expected); 339 | } 340 | if (/[^\w ]/.test(actual)) { 341 | actual = JSON.stringify(actual); 342 | } 343 | message += ' : ' + expected + ' != ' + actual; 344 | return false; 345 | }; 346 | 347 | const passPattern = checkEqual(expectedPattern, actualPattern); 348 | const passGroups = checkEqual(actualGroups.join(' '), expectedGroups.join(' ')); 349 | const passAll = passPattern && passGroups; 350 | 351 | testSummary.cell(JSON.stringify(template.raw)); 352 | testSummary.cell(stringify(values)); 353 | testSummary.cell(expectedPattern); 354 | testSummary.cell(expectedGroups.join(' ')); 355 | testSummary.endRow(passAll); 356 | 357 | // Position the actual values below the wanted for easy scanning. 358 | testSummary.cell(null); 359 | testSummary.cell(null); 360 | testSummary.cell(actualPattern, passPattern); 361 | testSummary.cell(actualGroups.join(' '), passGroups); 362 | 363 | testSummary.endRow(passAll); 364 | 365 | if (passAll) { 366 | ++nPassing; 367 | } else { 368 | ++nFailing; 369 | failing.push(message); 370 | } 371 | } 372 | testSummary.endTable(); 373 | 374 | if (typeof document !== 'undefined') { 375 | document.getElementById('warning').style.display = 'none'; 376 | document.title = (nFailing === 0 ? 'PASS' : 'FAIL') + ' : ' + document.title; 377 | } else { 378 | console.log('PASS:', nPassing); 379 | console.log('FAIL:', nFailing); 380 | failing.forEach(function (message) { 381 | console.error(message); 382 | }); 383 | if (nFailing) { 384 | throw new Error(nFailing + ' test' + (nFailing === 1 ? '' : 's') + ' failed'); 385 | } 386 | } 387 | }()); 388 | -------------------------------------------------------------------------------- /RegExp.make.js: -------------------------------------------------------------------------------- 1 | // Using the subset of ES6 currently supported by FF Nightly 42.0a1 (2015-08-01) 2 | // For full ES6: 3 | // * replace "var" below with "let" 4 | 5 | RegExp.make = (function () { 6 | "use strict"; 7 | 8 | /** @enum{number} */ 9 | const Context = { 10 | /** A context in which any top-level RegExp operator can appear. */ 11 | BLOCK: 0, 12 | /** A context inside a charset. {@code /[HERE]/} */ 13 | CHARSET: 1, 14 | /** A context inside a charset. /x{HERE}/ */ 15 | COUNT: 2 16 | }; 17 | 18 | 19 | /** 20 | * Matches characters that have special meaning at 21 | * the top-level of a RegExp. 22 | */ 23 | const UNSAFE_CHARS_BLOCK = /[\\(){}\[\]\|\?\*\+\^\$\/.]/g; 24 | /** 25 | * Matches characters that have special meaning within 26 | * a RegExp charset. 27 | */ 28 | const UNSAFE_CHARS_CHARSET = /[\[\]\-\\]/g; 29 | 30 | /** 31 | * Encodes the end-point of a character range in a RegExp charset. 32 | * 33 | * @param {number} n a UTF-16 code-unit. 34 | * @return {string} of regexp suitable for embedding in a charset. 35 | */ 36 | function encodeRangeEndPoint(n) { 37 | if (0x20 <= n && n <= 0x7e) { 38 | return String.fromCharCode(n).replace(UNSAFE_CHARS_CHARSET, '\\$&'); 39 | } 40 | var hex = n.toString(16); 41 | return '\\u0000'.substring(0, 6 - hex.length) + hex; 42 | } 43 | 44 | /** 45 | * Max code-unit is the maximum UTF-16 code-unit since 46 | * /^[\ud800\udc00]$/.test('\ud800\udc00') is false 47 | * and 48 | * /^[\ud800\udc00]$/.test('\ud800') is true. 49 | * TODO: Take into account 'u' flag. 50 | */ 51 | const MAX_CHAR_IN_RANGE = 0xFFFF; 52 | 53 | /** 54 | * A range of characters. 55 | * @param {!Array.=} opt_ranges 56 | * @constructor 57 | */ 58 | function CharRanges(opt_ranges) { 59 | /** 60 | * A series of ints bit-packed with the minimum in the high 16 bits and 61 | * the difference between the max and the min in the low 16 bits. 62 | * 63 | * The range consisting of the letter 'A' is then [0x00410000] which has 64 | * the char code for 'A' (65 == 0x41) in the top half, and the difference 65 | * between the min and max (0) in the lower 16 bits. 66 | * 67 | * The range [a-z] is represented as [0x00610019] which has the char code 68 | * for 'a' (97 == 0x61) in the upper four bits, and the difference between 69 | * min and max (25 == 0x19) in the lower 16 bits. 70 | * 71 | * @private 72 | * @type {!Array.} 73 | */ 74 | this.ranges = opt_ranges ? opt_ranges.slice() : []; 75 | } 76 | /** 77 | * @this {!CharRanges} 78 | * @return {boolean} 79 | */ 80 | CharRanges.prototype.isEmpty = function () { 81 | return !this.ranges.length; 82 | }; 83 | /** 84 | * Produces a string that has the same meaning in a RegExp charset. 85 | * Without enclosing square brackets. 86 | * @override 87 | * @this {!CharRanges} 88 | */ 89 | CharRanges.prototype.toString = function () { 90 | var s = ''; 91 | /** @type {!Array.}. */ 92 | const ranges = this.ranges; 93 | /** @type {number} */ 94 | const n = ranges.length; 95 | for (var i = 0; i < n; ++i) { 96 | /** @type {number} */ 97 | const leftAndSpan = ranges[i]; 98 | const left = leftAndSpan >> 16; 99 | const span = leftAndSpan & 0xffff; 100 | s += encodeRangeEndPoint(left); 101 | if (span) { 102 | if (span !== 1) { s += '-'; } 103 | s += encodeRangeEndPoint(left + span); 104 | } 105 | } 106 | return s; 107 | }; 108 | /** 109 | * The minimum code-point matched or NaN. 110 | * @this {!CharRanges} 111 | * @return {number|undefined} 112 | */ 113 | CharRanges.prototype.getMin = function () { 114 | this.canonicalize(); 115 | /** @type {!Array.} */ 116 | const ranges = this.ranges; 117 | return ranges.length ? (ranges[0] >> 16) : undefined; 118 | }; 119 | /** 120 | * Adds a range starting at left and going to right, inclusive. 121 | * 122 | * @this {!CharRanges} 123 | * @param {number} left inclusive code-unit 124 | * @param {number=} opt_right inclusive code-unit. left is assumed if absent. 125 | * @return {!CharRanges} this to allow chaining. 126 | */ 127 | CharRanges.prototype.addRange = function (left, opt_right) { 128 | var right = opt_right || left; 129 | left = +left; 130 | right = +right; 131 | if ('number' !== typeof left 132 | || left < 0 || right > MAX_CHAR_IN_RANGE || left > right 133 | || left % 1 || right % 1) { 134 | throw new Error(); 135 | } 136 | this.ranges.push((left << 16) | ((right - left) & 0xFFFF)); 137 | return this; 138 | }; 139 | /** 140 | * Adds the given ranges to this. 141 | * Modifies this in place making it the union of its prior value and ranges. 142 | * 143 | * @this {!CharRanges} 144 | * @param {CharRanges} ranges 145 | * @return {!CharRanges} this to allow chaining. 146 | */ 147 | CharRanges.prototype.addAll = function (ranges) { 148 | if (ranges !== this) { 149 | Array.prototype.push.apply(this.ranges, ranges.ranges); 150 | } 151 | return this; 152 | }; 153 | /** 154 | * @this {!CharRanges} 155 | * @return {!CharRanges} [\u0000-\uFFFF] - this. 156 | * Allocates a new output. Does not modify in place. 157 | */ 158 | CharRanges.prototype.inverse = function () { 159 | this.canonicalize(); 160 | /** @type {!Array.} */ 161 | const ranges = this.ranges; 162 | /** @type {number} */ 163 | const n = ranges.length; 164 | var pastLastRight = 0; 165 | const invertedRanges = []; 166 | for (var i = 0; i < n; ++i) { 167 | /** @type {number} */ 168 | const leftAndSpan = ranges[i]; 169 | const left = leftAndSpan >> 16; 170 | const span = leftAndSpan & 0xFFFF; 171 | if (pastLastRight < left) { 172 | invertedRanges.push( 173 | (pastLastRight << 16) 174 | | (left - pastLastRight - 1) 175 | ); 176 | } 177 | pastLastRight = left + span + 1; 178 | } 179 | if (pastLastRight <= MAX_CHAR_IN_RANGE) { 180 | invertedRanges.push( 181 | (pastLastRight << 16) 182 | | (MAX_CHAR_IN_RANGE - pastLastRight)); 183 | } 184 | return new CharRanges(invertedRanges); 185 | }; 186 | /** 187 | * Orders ranges and merges overlapping ranges. 188 | * @this {!CharRanges} 189 | * @return {!CharRanges} this to allow chaining. 190 | */ 191 | CharRanges.prototype.canonicalize = function () { 192 | // Sort ranges so that they are ordered by left. 193 | /** @type {!Array.} */ 194 | const ranges = this.ranges; 195 | /** @type {number} */ 196 | const n = ranges.length; 197 | if (!n) { return this; } 198 | ranges.sort(function (a, b) { return a - b; }); 199 | // Merge overlapping ranges. 200 | var j = 1; // Index into ranges past last merged item. 201 | var lastRight = (ranges[0] >> 16) + ranges[0] & 0xFFFF; 202 | for (var i = 1; i < n; ++i) { 203 | /** @type {number} */ 204 | const leftAndSpan = ranges[i]; 205 | const left = leftAndSpan >> 16; 206 | const span = leftAndSpan & 0xFFFF; 207 | if (lastRight + 1 >= left) { 208 | // We can merge the two. 209 | const lastLeft = ranges[j - 1] >> 16; 210 | lastRight = Math.max(lastRight, left + span); 211 | const merged = (lastLeft << 16) | (lastRight - lastLeft); 212 | ranges[j - 1] = merged; 213 | // Do not increment j. 214 | } else { 215 | ranges[j] = leftAndSpan; 216 | lastRight = left + span; 217 | ++j; 218 | } 219 | } 220 | ranges.length = j; 221 | return this; 222 | }; 223 | /** 224 | * A newly allocated set with those elements in this that fall inside 225 | * {@code new CharRanges().addRange(min, max)}. 226 | * @this {!CharRanges} 227 | * @param {number} min inclusive 228 | * @param {number} max inclusive 229 | * @return {!CharRanges} a newly allocated output. Not modified in place. 230 | */ 231 | CharRanges.prototype.intersectionWithRange = function (min, max) { 232 | /** @type {!Array.} */ 233 | const ranges = this.ranges; 234 | const intersection = new CharRanges(); 235 | /** @type {number} */ 236 | const n = ranges.length; 237 | for (var i = 0; i < n; ++i) { 238 | /** @type {number} */ 239 | const leftAndSpan = ranges[i]; 240 | const left = leftAndSpan >> 16; 241 | const span = leftAndSpan & 0xFFFF; 242 | /** @type {number} */ 243 | const right = left + span; 244 | 245 | if (!(left > max || right < min)) { 246 | intersection.addRange(Math.max(min, left), Math.min(max, right)); 247 | } 248 | } 249 | return intersection; 250 | }; 251 | /** 252 | * The ranges but with each ranges left-end-point shifted by delta. 253 | * @this {!CharRanges} 254 | * @param {number} delta 255 | * @return {!CharRanges} a newly allocated output. Not modified in place. 256 | */ 257 | CharRanges.prototype.shifted = function (delta) { 258 | return new CharRanges( 259 | this.ranges.map(function (x) { return x + (delta << 16); }) 260 | ); 261 | }; 262 | /** 263 | * Applies callback to each range. 264 | * @param {function(number, number)} callback receives left and right inclusive. 265 | * @this {!CharRanges} 266 | */ 267 | CharRanges.prototype.forEachRange = function (callback) { 268 | /** @type {!Array.} */ 269 | const ranges = this.ranges; 270 | /** @type {number} */ 271 | const n = ranges.length; 272 | for (var i = 0; i < n; ++i) { 273 | /** @type {number} */ 274 | const leftAndSpan = ranges[i]; 275 | const left = leftAndSpan >> 16; 276 | const span = leftAndSpan & 0xFFFF; 277 | /** @type {number} */ 278 | const right = left + span; 279 | callback(left, right); 280 | } 281 | }; 282 | CharRanges.prototype.clear = function () { 283 | this.ranges.length = 0; 284 | }; 285 | 286 | 287 | const TOKENIZERS = new Map(); 288 | 289 | /** 290 | * Returns a function that invokes the event handler below on tokens found in 291 | * RegExp source. 292 | * 293 | * @param {{ 294 | * wholeInput: boolean, 295 | * startCharset: (function(string) | undefined), 296 | * range: (function(number, number) | undefined), 297 | * endCharset: (function(string) | undefined), 298 | * bracket: (function(string) | undefined), 299 | * operators: (function(string) | undefined), 300 | * count: (function(?number, ?number) | undefined), 301 | * escape: (function(string) | undefined), 302 | * backref: (function(number) | undefined), 303 | * other: (function(string) | undefined) 304 | * }} eventHandler 305 | * @return {!function(!Context, string):!Context} a function that takes 306 | * a start context, and RegExp source, and returns an end context. 307 | */ 308 | function parseRegExpSource(eventHandler) { 309 | var { 310 | wholeInput, // Is the input whole. 311 | startCharset, 312 | range, 313 | endCharset, 314 | bracket, 315 | operators, 316 | count, 317 | escape, 318 | backref, 319 | other: otherOpt 320 | } = eventHandler; 321 | /** @type {function(string)} */ 322 | const other = otherOpt || function () {}; 323 | 324 | // We compile an efficient regular expression that groups as many things as 325 | // we don't care about as possible into runs of "other stuff". 326 | const signature = 0 327 | | (wholeInput ? 1 : 0) 328 | | ((startCharset || endCharset || range) ? 2 : 0) 329 | | (bracket ? 4 : 0) 330 | | (operators ? 8 : 0) 331 | | (escape ? 16 : 0) 332 | | (backref ? 32 : 0); 333 | 334 | var tokenizer = TOKENIZERS.get(signature); 335 | if (!tokenizer) { 336 | const tokens = []; 337 | const careChars = new CharRanges(); 338 | const dontCareTokens = []; 339 | if (escape || backref) { 340 | if (backref) { 341 | tokens.push('\\\\[1-9][0-9]*'); 342 | } 343 | if (escape) { 344 | tokens.push( 345 | '\\\\(?:[xX][0-9a-fA-F]{2}|[uU][0-9a-fA-F]{4}|[^1-9xXuU])'); 346 | } else { 347 | dontCareTokens.push('\\\\[^1-9]'); 348 | } 349 | } else { 350 | dontCareTokens.push('\\\\[\\s\\S]'); 351 | } 352 | careChars.addRange('\\'.charCodeAt(0)); 353 | 354 | // If we have the whole input, and don't need to report charsets, then we 355 | // can include them in dontCareTokens. 356 | ( 357 | (startCharset || endCharset || range || !wholeInput) 358 | ? tokens : dontCareTokens 359 | ).push( 360 | '\\[(?:[^\\]\\\\]|\\\\[\\S\\s])*\\]?' 361 | ); 362 | careChars.addRange('['.charCodeAt(0)); 363 | 364 | // Reasoning is similar to charset above. 365 | ( 366 | (count || !wholeInput) 367 | ? tokens : dontCareTokens 368 | ).push( 369 | '[{]\\d*(?:,\\d*)?[}]?' 370 | ); 371 | careChars.addRange('{'.charCodeAt(0)); 372 | 373 | if (bracket) { 374 | tokens.push('[(](?:[?][:=!])?|[)]'); 375 | careChars.addRange('('.charCodeAt(0)) 376 | .addRange(')'.charCodeAt(0)); 377 | } 378 | 379 | const operatorChars = '$^*+?|.'; 380 | if (operators) { 381 | tokens.push( 382 | '[' + operatorChars.replace(UNSAFE_CHARS_CHARSET, '\\$&') + ']'); 383 | for (var i = 0, nOpChars = operatorChars.length; i < nOpChars; ++i) { 384 | careChars.addRange(operatorChars.charCodeAt(i)); 385 | } 386 | } 387 | 388 | // I really wish we had a nice way of composing regular expressions. 389 | dontCareTokens.push('[' + careChars.inverse() + ']'); 390 | tokens.push('(?:' + dontCareTokens.join('|') + ')+'); 391 | tokenizer = new RegExp(tokens.join('|'), 'g'); 392 | TOKENIZERS.set(signature, tokenizer); 393 | } 394 | 395 | return function(startContext, source) { 396 | /** @type {?Array.} */ 397 | var match; 398 | var blockSource = String(source); 399 | var outputContext = startContext; 400 | switch (startContext) { 401 | case Context.CHARSET: 402 | // Strip off the unclosed CHARSET, dispatch it, 403 | // and switch to block context. 404 | match = blockSource.match(/^(?:[^\]\\]|\\[\S\s])*?\]/); 405 | var ranges; 406 | if (match) { 407 | outputContext = Context.BLOCK; 408 | blockSource = blockSource.substring(match[0].length); 409 | ranges = match[0]; 410 | ranges = ranges.substring(ranges.length - 1); 411 | } else { 412 | ranges = blockSource; 413 | blockSource = ''; 414 | } 415 | if (range) { 416 | parseCharsetRanges(range, ranges); 417 | } else if (!endCharset) { 418 | other(match ? match[0] : blockSource); 419 | } 420 | if (endCharset && outputContext !== Context.CHARSET) { 421 | endCharset(match[0]); 422 | } 423 | break; 424 | case Context.COUNT: 425 | /** @type {number} */ 426 | const rcurly = blockSource.indexOf('}'); 427 | const hasCurly = rcurly >= 0; 428 | /** @type {number} */ 429 | const end = hasCurly ? rcurly + 1 : blockSource.length; 430 | (count || other)(blockSource.substring(0, end)); 431 | blockSource = blockSource.substring(end); 432 | if (hasCurly) { 433 | outputContext = Context.BLOCK; 434 | } 435 | break; 436 | } 437 | 438 | /** @type {?Array.} */ 439 | const sourceTokens = blockSource.match(tokenizer) || []; 440 | /** @type {number} */ 441 | const nSourceTokens = sourceTokens ? sourceTokens.length : 0; 442 | 443 | // Assert that our tokenizer matched the whole input. 444 | var totalSourceTokenLength = 0; 445 | for (var i = 0; i < nSourceTokens; ++i) { 446 | totalSourceTokenLength += sourceTokens[i].length; 447 | } 448 | if (blockSource.length !== totalSourceTokenLength) { 449 | throw new Error( 450 | 'Failed to tokenize ' + blockSource + ' with ' + tokenizer + '. Got ' 451 | + JSON.stringify(sourceTokens) + ' which have a length delta of ' 452 | + (blockSource.length - totalSourceTokenLength)); 453 | } 454 | 455 | for (var i = 0; i < nSourceTokens; ++i) { 456 | /** @type {string} */ 457 | const sourceToken = sourceTokens[i]; 458 | switch (sourceToken[0]) { 459 | case '[': 460 | /** @type {boolean} */ 461 | const isClosed = ( 462 | i + 1 < nSourceTokens || /(?:^|[^\\])(?:\\\\)*\]$/.test(sourceToken) 463 | ); 464 | if (!isClosed) { 465 | outputContext = Context.CHARSET; 466 | } 467 | if (startCharset || range) { 468 | const start = sourceToken[1] === '^' ? '[^' : '['; 469 | if (startCharset) { 470 | startCharset(start); 471 | } 472 | if (range) { 473 | /** @type {number} */ 474 | const endPos = sourceToken.length + (isClosed ? -1 : 0); 475 | parseCharsetRanges( 476 | range, sourceToken.substring(start.length, endPos)); 477 | } 478 | } else if (!endCharset) { 479 | other(sourceToken); 480 | } 481 | if (isClosed && endCharset) { 482 | endCharset(']'); 483 | } 484 | break; 485 | case '\\': 486 | /** @type {string} */ 487 | const ch1 = sourceToken[1]; 488 | (('1' <= ch1 && ch1 <= '9' ? backref : escape) || other)(sourceToken); 489 | break; 490 | case '(': case ')': 491 | (bracket || other)(sourceToken); 492 | break; 493 | case '+': case '*': case '?': case '.': case '|': case '^': case '$': 494 | (operators || other)(sourceToken); 495 | break; 496 | case '{': 497 | if (count) { 498 | /** @type {?Array.} */ 499 | const minMaxMatch = /^\{(\d*)(?:,(\d*))?/.exec(sourceToken); 500 | const min = minMaxMatch ? +minMaxMatch[1] : 0; 501 | const max = +(minMaxMatch && minMaxMatch[2] || min); 502 | count(min, max); 503 | } else { 504 | other(sourceToken); 505 | } 506 | if (i + 1 == nSourceTokens 507 | && sourceToken[sourceToken.length - 1] !== '}') { 508 | outputContext = Context.COUNT; 509 | } 510 | break; 511 | default: 512 | other(sourceToken); 513 | } 514 | } 515 | 516 | return outputContext; 517 | }; 518 | } 519 | 520 | /** Maps template literals to information derived from them. */ 521 | const STATIC_INFO_CACHE = new WeakMap(); 522 | 523 | /** 524 | * Given the template literal parts, computes a record of 525 | * the form 526 | * { 527 | * contexts: [...], 528 | * templateGroupCounts: [...], 529 | * splitLiterals: [...], 530 | * } 531 | * 532 | * For each value, value[i], contexts[i] is the context in which 533 | * it is interpolated. 534 | * 535 | * For each template literal, template.raw[i], templateGroupCounts[i] 536 | * is the number of capturing groups entered in that part. 537 | * 538 | * For each template literal, template.raw[i], splitLiterals[i] is 539 | * an array that has template.raw[i] split around back-references and 540 | * the back-references replaces with the index referred to, so 541 | * the literal chunk 'foo\2bar' would split to ['foo', 2, 'bar']. 542 | * 543 | * @param {!Array.} raw template literal parts. 544 | * @return {!{contexts : !Array., 545 | * templateGroupCounts : !Array., 546 | * splitLiterals : !Array.>}} 547 | */ 548 | function getStaticInfo(raw) { 549 | var staticInfo = STATIC_INFO_CACHE.get(raw); 550 | if (staticInfo) { return staticInfo; } 551 | 552 | const contexts = []; 553 | const templateGroupCounts = []; 554 | const splitLiterals = []; 555 | 556 | var context = Context.BLOCK; 557 | var templateGroupCount = 0; 558 | var splitLiteral = []; 559 | 560 | function pushSplitLiteral(s) { 561 | /** @type {number} */ 562 | const n = splitLiteral.length; 563 | if (n && 'string' === typeof splitLiteral[n - 1]) { 564 | splitLiteral[n - 1] += s; 565 | } else { 566 | splitLiteral[n] = s; 567 | } 568 | } 569 | 570 | const parseHandler = { 571 | wholeInput: false, 572 | bracket: function (s) { 573 | if (s === '(') { 574 | ++templateGroupCount; 575 | } 576 | pushSplitLiteral(s); 577 | }, 578 | backref: function (s) { 579 | splitLiteral.push(+s.substring(1)); 580 | }, 581 | other: function (s) { 582 | pushSplitLiteral(s); 583 | } 584 | }; 585 | /** @type {function(!Context, string):!Context} */ 586 | const parse = parseRegExpSource(parseHandler); 587 | 588 | /** @type {number} */ 589 | const n = raw.length; 590 | for (var i = 0; i < n; ++i) { 591 | context = parse(context, raw[i]); 592 | contexts.push(context); 593 | templateGroupCounts.push(templateGroupCount); 594 | splitLiterals.push(splitLiteral); 595 | 596 | templateGroupCount = 0; 597 | splitLiteral = []; 598 | } 599 | 600 | // We don't need the context after the last part 601 | // since no value is interpolated there. 602 | contexts.length--; 603 | 604 | const computed = { 605 | contexts: contexts, 606 | templateGroupCounts: templateGroupCounts, 607 | splitLiterals: splitLiterals 608 | }; 609 | STATIC_INFO_CACHE.set(raw, computed); 610 | return computed; 611 | } 612 | 613 | /** 614 | * The characters matched by {@code /./}. 615 | * @type {CharRanges} 616 | */ 617 | const DOT_RANGES = new CharRanges() 618 | .addRange(0xA).addRange(0xD).addRange(0x2028, 0x2029) 619 | .inverse(); 620 | 621 | /** 622 | * @param {string} source the source of a RegExp. 623 | * @param {string} flags the flags of a RegExp. 624 | * @return {string} the text of a charset that matches all code-units that 625 | * could appear in any string in the language matched by the input. 626 | * This is liberal. For example {@code /ab{0}/} can match the string "a", 627 | * but cannot match the string "ab" because of the zero-count. 628 | * Lookaheads could similarly contribute characters unnecessarily. 629 | */ 630 | function toCharRanges(source, flags) { 631 | // We parse the source and try to find all character sets 632 | // and literal characters, union them. 633 | 634 | // Accumulate all ranges onto charRanges. 635 | const charRanges = new CharRanges(); 636 | var negCharRanges = null; 637 | 638 | parseRegExpSource( 639 | { 640 | wholeInput: true, 641 | escape: function (esc) { 642 | addEscapeValueTo(esc, false, charRanges); 643 | }, 644 | operators: function (s) { 645 | if (s.indexOf('.') >= 0) { 646 | charRanges.addAll(DOT_RANGES); 647 | } 648 | }, 649 | count: function(_) {}, 650 | bracket: function (_) {}, 651 | startCharset: function (start) { 652 | if (start[1] === '^') { 653 | negCharRanges = new CharRanges(); 654 | } 655 | }, 656 | endCharset: function (_) { 657 | if (negCharRanges) { 658 | charRanges.addAll(negCharRanges.inverse()); 659 | negCharRanges = null; 660 | } 661 | }, 662 | range: function (left, right) { 663 | (negCharRanges || charRanges).addRange(left, right); 664 | }, 665 | other: function (s) { 666 | for (var i = 0, n = s.length; i < n; ++i) { 667 | charRanges.addRange(s.charCodeAt(i)); 668 | } 669 | } 670 | })( 671 | Context.BLOCK, 672 | source); 673 | 674 | if (flags.indexOf('i') >= 0) { 675 | // Fold letters. 676 | caseFold(charRanges); 677 | } 678 | charRanges.canonicalize(); 679 | return charRanges.toString(); 680 | } 681 | 682 | 683 | /** 684 | * Adds other-case forms of any ASCII letters in charRanges. 685 | * @param {CharRanges} charRanges 686 | */ 687 | function caseFold(charRanges) { 688 | charRanges.canonicalize(); 689 | // TODO: Read spec and figure out what to do with non-ASCII characters. 690 | // Maybe take flags and look for the 'u' flag. 691 | /** @type {CharRanges} */ 692 | const upperLetters = charRanges.intersectionWithRange( 693 | 'A'.charCodeAt(0), 'Z'.charCodeAt(0)); 694 | /** @type {CharRanges} */ 695 | const lowerLetters = charRanges.intersectionWithRange( 696 | 'a'.charCodeAt(0), 'z'.charCodeAt(0)); 697 | charRanges.addAll(upperLetters.shifted(+32)); 698 | charRanges.addAll(lowerLetters.shifted(-32)); 699 | } 700 | 701 | /** An escape sequence that is definitely not a back-reference. */ 702 | const ESCAPE_SEQUENCE_PATTERN = 703 | '\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|[^1-9]?)'; 704 | 705 | /** 706 | * Pattern for the start or end of a character range. 707 | */ 708 | const CHARSET_END_POINT_PATTERN = ( 709 | '(?:' 710 | + '[^\\\\]' // Not an escape 711 | + '|' + ESCAPE_SEQUENCE_PATTERN // A full normal escape 712 | + '|\\\\[1-9]' // Back-references cannot appear in charsets. 713 | + ')' 714 | ); 715 | /** 716 | * Matches all the atomic parts of a charset: individual characters, groups, 717 | * and single ranges. 718 | */ 719 | const CHARSET_PARTS_RE = new RegExp( 720 | '\\\\[DdSsWw]' // A charset abbreviation 721 | + '|' + CHARSET_END_POINT_PATTERN 722 | + '(?:-' + CHARSET_END_POINT_PATTERN + ')?', 723 | 'g' 724 | ); 725 | /** 726 | * Matches a range putting the left of the range in group 1, 727 | * and the right in group 2. 728 | * If group 2 is not present, then it is implicitly the same as the left. 729 | */ 730 | const CHARSET_RANGE_RE = new RegExp( 731 | '(' + CHARSET_END_POINT_PATTERN + ')' 732 | + '(?:-(' + CHARSET_END_POINT_PATTERN + '))?' 733 | ); 734 | 735 | /** 736 | * Space characters that match \s 737 | * @type {CharRanges} 738 | */ 739 | const SPACE_CHARS = new CharRanges() 740 | .addRange(0x9, 0xd) 741 | .addRange(0x20) 742 | .addRange(0xa0) 743 | .addRange(0x1680) 744 | .addRange(0x180e) 745 | .addRange(0x2000, 0x200a) 746 | .addRange(0x2028, 0x2029) 747 | .addRange(0x202f) 748 | .addRange(0x205f) 749 | .addRange(0x3000) 750 | .addRange(0xfeff); 751 | /** 752 | * Word chars that match \w 753 | * @type {CharRanges} 754 | */ 755 | const WORD_CHARS = new CharRanges() 756 | .addRange('A'.charCodeAt(0), 'Z'.charCodeAt(0)) 757 | .addRange('0'.charCodeAt(0), '9'.charCodeAt(0)) 758 | .addRange('a'.charCodeAt(0), 'z'.charCodeAt(0)) 759 | .addRange('_'.charCodeAt(0)); 760 | /** 761 | * Digit chars that match \d 762 | * @type {CharRanges} 763 | */ 764 | const DIGIT_CHARS = new CharRanges() 765 | .addRange('0'.charCodeAt(0), '9'.charCodeAt(0)); 766 | /** 767 | * Maps letters after \ that are special in RegExps. 768 | * @type {!Map.} 769 | */ 770 | const ESCAPE_SEQ_MAP = new Map([ 771 | ['\\s', SPACE_CHARS], 772 | ['\\S', SPACE_CHARS.inverse()], 773 | ['\\w', WORD_CHARS], 774 | ['\\W', WORD_CHARS.inverse()], 775 | ['\\d', DIGIT_CHARS], 776 | ['\\D', DIGIT_CHARS.inverse()], 777 | ['\\t', new CharRanges().addRange(0x9)], 778 | ['\\n', new CharRanges().addRange(0xA)], 779 | ['\\v', new CharRanges().addRange(0xB)], 780 | ['\\f', new CharRanges().addRange(0xC)], 781 | ['\\r', new CharRanges().addRange(0xD)], 782 | // b doesn't appear here since its meaning depends on context. 783 | ['\\B', new CharRanges()] 784 | ]); 785 | 786 | /** 787 | * The code-unit corresponding to the end-point of a range. 788 | * TODO; What does [\s-\w] mean? 789 | * @param {string} endPoint a character, escape sequence, or named charset. 790 | */ 791 | function rangeEndPointToCodeUnit(endPoint) { 792 | var cu = ( 793 | (endPoint[0] == '\\') 794 | ? addEscapeValueTo(endPoint, true, new CharRanges()).getMin() 795 | : endPoint.charCodeAt(0) 796 | ); 797 | return cu; 798 | } 799 | 800 | /** @type {number} */ 801 | const SLASH_B_CHAR_CODE = '\b'.charCodeAt(0); 802 | /** 803 | * Decodes an escape sequence and adds any ranges it specifies to the given 804 | * ranges. 805 | * 806 | * @param {string} esc an escape sequence. 807 | * @param {boolean} inCharSet true iff esc appears inside a [...] charset. 808 | * @param {CharRanges} ranges the output to add to. Modified in place. 809 | */ 810 | function addEscapeValueTo(esc, inCharSet, ranges) { 811 | var chars = ESCAPE_SEQ_MAP.get(esc); 812 | if (chars !== undefined) { 813 | ranges.addAll(chars); 814 | } else { 815 | var ch1 = esc.charAt(1); 816 | switch (ch1) { 817 | case 'u': case 'x': 818 | /** @type {number} */ 819 | const cu = parseInt(esc.substring(2 /* strip \x or \u */), 16); 820 | ranges.addRange(cu); 821 | break; 822 | case 'b': 823 | if (inCharSet) { 824 | ranges.addRange(SLASH_B_CHAR_CODE); 825 | } 826 | break; 827 | default: 828 | if (!('1' <= ch1 && ch1 <= '9')) { 829 | ranges.addRange(ch1.charCodeAt(0)); 830 | } 831 | } 832 | } 833 | return ranges; 834 | } 835 | 836 | /** 837 | * Applies the given handler to the left and right end-points (inclusive) 838 | * of the ranges in rangeText. 839 | * 840 | * @param {function(number, number)} handler receives 2 code-units. 841 | * @param {string} rangeText text of a RegExp charSet body. 842 | */ 843 | function parseCharsetRanges(handler, rangeText) { 844 | /** @type {?Array.} */ 845 | const tokens = rangeText.match(CHARSET_PARTS_RE); 846 | /** @type {number} */ 847 | const n = tokens ? tokens.length : 0; 848 | for (var i = 0; i < n; ++i) { 849 | /** @type {string} */ 850 | const token = tokens[i]; 851 | /** @type {?Array.} */ 852 | const m = CHARSET_RANGE_RE.exec(token); 853 | if (m && m[2]) { 854 | handler( 855 | rangeEndPointToCodeUnit(m[1]), 856 | rangeEndPointToCodeUnit(m[2])); 857 | } else if (token[0] === '\\') { 858 | const ranges = new CharRanges(); 859 | addEscapeValueTo(token, true, ranges); 860 | ranges.forEachRange(handler); 861 | } else { 862 | /** @type {number} */ 863 | const cu = token.charCodeAt(0); 864 | handler(cu, cu); 865 | } 866 | } 867 | } 868 | 869 | 870 | /** 871 | * Adjusts an interpolated RegExp so that it can be interpolated in 872 | * the context of the template while preserving the meaning of 873 | * back-references and character sets. 874 | * 875 | * @param {string} containerFlags the flags of the RegExp into which source 876 | * is being interpolated. 877 | * @param {string} source the source of a RegExp being interpolated. 878 | * @param {string} flags associated with source. 879 | * @param {number} regexGroupCount The number of capturing groups that are 880 | * opened before source is interpolated. 881 | * @param {!Array.} templateGroups see the documentation for make for 882 | * the contract. 883 | * It only contains entries for capturing groups opened before the 884 | * insertion point. 885 | * 886 | * @return {{fixedSource: string, countOfCapturingGroupsInFixedSource: number}} 887 | */ 888 | function fixUpInterpolatedRegExp( 889 | containerFlags, source, flags, regexGroupCount, templateGroups) { 890 | // Count capturing groups, and use that to identify and 891 | // renumber back-references that are in scope. 892 | var sourceGroupCount = 0; 893 | var hasBackRef = false; 894 | const fixedSource = []; 895 | 896 | function append(tok) { fixedSource.push(tok); } 897 | 898 | const parseHandler = { 899 | wholeInput: true, 900 | bracket: function (tok) { 901 | if (tok === '(') { 902 | ++sourceGroupCount; 903 | } 904 | fixedSource.push(tok); 905 | }, 906 | other: append 907 | }; 908 | 909 | // Convert back-refs to numbers so we can renumber them below. 910 | if (regexGroupCount || templateGroups.length) { 911 | parseHandler.backref = function (tok) { 912 | hasBackRef = true; 913 | fixedSource.push(+tok.substring(1)); 914 | }; 915 | } 916 | 917 | const isCaseInsensitive = flags.indexOf('i') >= 0; 918 | if (isCaseInsensitive && containerFlags.indexOf('i') < 0) { 919 | // Expand literal letters and letters in charsets. 920 | parseHandler.startCharset = append; 921 | const ranges = new CharRanges(); 922 | parseHandler.range = function (left, right) { 923 | ranges.addRange(left, right); 924 | }; 925 | parseHandler.endCharset = function (s) { 926 | caseFold(ranges); 927 | fixedSource.push(ranges.toString(), s); 928 | ranges.clear(); 929 | }; 930 | parseHandler.other = function (tok) { 931 | fixedSource.push(tok.replace( 932 | /\\\\[\s\S]|[A-Za-z]/g, 933 | function (s) { 934 | if (s.length === 1) { 935 | const cu = s.charCodeAt(0) & ~32; 936 | if (65 <= cu && cu <= 90) { 937 | return '[' + String.fromCharCode(cu, cu | 32) + ']'; 938 | } 939 | } 940 | return s; 941 | })); 942 | }; 943 | } 944 | 945 | parseRegExpSource(parseHandler)(Context.BLOCK, source); 946 | 947 | // Rewrite back-references that are out of scope to refer 948 | // to the template group. 949 | if (hasBackRef) { 950 | for (var i = 0, n = fixedSource.length; i < n; ++i) { 951 | var el = fixedSource[i]; 952 | if ('number' === typeof el) { 953 | /** @type {number} */ 954 | const backRefIndex = el; 955 | if (backRefIndex <= sourceGroupCount) { 956 | // A local reference. 957 | el = '\\' + (backRefIndex + regexGroupCount - 1); 958 | } else if (backRefIndex < templateGroups.length) { 959 | // A reference to a template group that is in scope. 960 | el = '\\' + templateGroups[backRefIndex]; 961 | } else { 962 | // An out of scope back-reference matches the empty string. 963 | el = '(?:)'; 964 | } 965 | fixedSource[i] = el; 966 | } 967 | } 968 | } 969 | 970 | return { 971 | fixedSource: fixedSource.join(''), 972 | countOfCapturingGroupsInFixedSource: sourceGroupCount 973 | }; 974 | } 975 | 976 | 977 | /** 978 | * Builds a RegExp from a template and values to fill the template 979 | * holes. 980 | * 981 | * @param {!function(new:RegExp, string, string)} ctor 982 | * A constructor that takes a string pattern 983 | * @param {string} flags RegExp flags 984 | * @param {!{raw: !Array.}} template raw is n+1 RegExp parts. 985 | * @param {...*} values an array of n parts to interpolate between 986 | * the end of the corresponding raw part and the start of its follower. 987 | */ 988 | function make(ctor, flags, template, ...values) { 989 | /** @type {!Array.} */ 990 | const raw = template.raw; 991 | var { contexts, templateGroupCounts, splitLiterals } = getStaticInfo(raw); 992 | 993 | /** @type {number} */ 994 | const n = contexts.length; 995 | 996 | var pattern = raw[0]; 997 | // For each group specified in the template, the index of the corresponding 998 | // group in pattern. 999 | const templateGroups = [ 1000 | 0 // Map implicit group 0, the whole match, to itself 1001 | ]; 1002 | // The number of groups in the RegExp on pattern so far. 1003 | var regexGroupCount = 1; // Count group 0. 1004 | 1005 | function addTemplateGroups(i) { 1006 | /** @type {number} */ 1007 | const n = templateGroupCounts[i]; 1008 | for (var j = 0; j < n; ++j) { 1009 | templateGroups.push(regexGroupCount++); 1010 | } 1011 | } 1012 | addTemplateGroups(0); 1013 | 1014 | for (var i = 0; i < n; ++i) { 1015 | /** @type {Context} */ 1016 | const context = contexts[i]; 1017 | var value = values[i]; 1018 | if (value == null) { 1019 | value = ''; 1020 | } 1021 | var subst; 1022 | switch (context) { 1023 | case Context.BLOCK: 1024 | if (value instanceof RegExp) { 1025 | var { 1026 | fixedSource: valueSource, 1027 | countOfCapturingGroupsInFixedSource: valueGroupCount 1028 | } = fixUpInterpolatedRegExp( 1029 | flags, String(value.source), value.flags, 1030 | regexGroupCount, templateGroups); 1031 | subst = '(?:' + valueSource + ')'; 1032 | regexGroupCount += valueGroupCount; 1033 | } else { 1034 | subst = 1035 | '(?:' + String(value).replace(UNSAFE_CHARS_BLOCK, '\\$&') + ')'; 1036 | } 1037 | break; 1038 | case Context.CHARSET: 1039 | // TODO: We need to keep track of whether we're interpolating 1040 | // into an inverted charset or not. 1041 | subst = 1042 | (value instanceof RegExp) 1043 | ? toCharRanges(String(value.source), String(value.flags)) 1044 | : String(value).replace(UNSAFE_CHARS_CHARSET, '\\$&'); 1045 | break; 1046 | case Context.COUNT: 1047 | subst = String(value instanceof RegExp ? value.source : value); 1048 | } 1049 | 1050 | var rawLiteralPart = raw[i+1]; 1051 | var splitLiteral = splitLiterals[i + 1]; 1052 | if (regexGroupCount !== templateGroups.length 1053 | && (splitLiteral.length !== 1 1054 | || 'string' !== typeof splitLiteral[0])) { 1055 | /** @type {!Array.<(string|number)>}} */ 1056 | const splitCopy = splitLiteral.slice(0); 1057 | for (var j = 0, splitLength = splitCopy.length; j < splitLength; ++j) { 1058 | /** @type {string|number} */ 1059 | const splitElement = splitCopy[j]; 1060 | if ('number' === typeof splitElement) { 1061 | if (splitElement < templateGroups.length) { 1062 | // A reference to a template group that is in scope. 1063 | splitCopy[j] = '\\' + templateGroups[splitElement]; 1064 | } else { 1065 | // An out of scope back-reference matches the empty string. 1066 | // We can't just use the empty string, because returning nothing 1067 | // would change the way that postfix operators like * attach. 1068 | splitCopy[j] = '(?:)'; 1069 | } 1070 | } 1071 | } 1072 | rawLiteralPart = splitCopy.join(''); 1073 | } 1074 | 1075 | pattern += subst; 1076 | pattern += rawLiteralPart; 1077 | addTemplateGroups(i+1); 1078 | } 1079 | var output = new ctor(pattern, flags); 1080 | output.templateGroups = templateGroups; 1081 | return output; 1082 | } 1083 | 1084 | return function(x, ...values) { 1085 | // RegExp.make can be called in several modes. 1086 | // 1. RegExp.make`...undifferentiated RegExp stuff...` 1087 | // 2. RegExp.make('gi')`....` to specify flags 1088 | // 3. RegExp.make.bind(RegExpSubClass)`...` with a this value that specifies 1089 | // a different constructor. 1090 | if ('object' === typeof x && Array.isArray(x.raw)) { 1091 | return make(this, '', x, ...values); 1092 | } 1093 | if ('string' === typeof x && values.length === 0) { 1094 | return make.bind(null, this, x); 1095 | } 1096 | throw new Error('Unexpected arguments ' + JSON.stringify([x, ...values])); 1097 | }; 1098 | })(); 1099 | 1100 | // TODO: Figure out interpolation of charset after - as in `[a-${...}]` 1101 | --------------------------------------------------------------------------------