├── .travis.yml
├── externs.js
├── test
├── all.js
├── index.html
└── tests.js
├── check.sh
├── package.json
├── unresolved.md
├── README.md
├── LICENSE
└── RegExp.make.js
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 | node_js:
3 | - "stable"
4 |
--------------------------------------------------------------------------------
/externs.js:
--------------------------------------------------------------------------------
1 | /** @type{string} @const */
2 | RegExp.prototype.flags;
3 |
4 | RegExp.make;
5 |
--------------------------------------------------------------------------------
/test/all.js:
--------------------------------------------------------------------------------
1 | require('babel/register');
2 | require('../RegExp.make.js');
3 | require('./tests.js');
4 |
--------------------------------------------------------------------------------
/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | java -jar tools/compiler.jar --language_in=ECMASCRIPT6 --warning_level=VERBOSE --jscomp_error="*" --compilation_level=ADVANCED --js RegExp.make.js --externs externs.js
4 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "regexp-make-js",
3 | "version": "1.0.0",
4 | "description": "An ES6 string template tag for dynamically creating regular expressions.",
5 | "main": "RegExp.make.js",
6 | "directories": {
7 | "test": "test"
8 | },
9 | "scripts": {
10 | "test": "node test/all.js"
11 | },
12 | "repository": {
13 | "type": "git",
14 | "url": "git+https://github.com/mikesamuel/regexp-make-js.git"
15 | },
16 | "author": "Various",
17 | "license": "Apache-2.0",
18 | "bugs": {
19 | "url": "https://github.com/mikesamuel/regexp-make-js/issues"
20 | },
21 | "homepage": "https://github.com/mikesamuel/regexp-make-js#readme",
22 | "dependencies": {
23 | "babel": "^5.8.29"
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/unresolved.md:
--------------------------------------------------------------------------------
1 | # Unresolved Issues
2 |
3 | ## [How should flags be specified](https://github.com/mikesamuel/regexp-make-js/issues/19)
4 |
5 | Syntax | Example
6 | ------ | -------
7 | Current | `RegExp.make('i')`foo`
8 | Alternate | `RegExp.make`/foo/i`
9 |
10 | ## [Group Indexes when a RegExp with groups is interpolated](https://github.com/mikesamuel/regexp-make-js/issues/1)
11 |
12 | Right now
13 |
14 | ```js
15 | var litRegex = /f(o)o/;
16 |
17 | var regexWithInterpolation = RegExp.make`(bar) ${myRegex} (baz)`;
18 |
19 | var match = regexWithInterpolation.exec('bar foo baz');
20 |
21 | // How can I reliably extract "baz" from match?
22 | ```
23 |
24 | Approach | Example
25 | -------- | -------
26 | Current | match[regexWithInterpolation.templateGroups[2]]
27 | Alterante | ???
--------------------------------------------------------------------------------
/test/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
RegExp.make Tests
4 |
12 |
13 | RegExp.make Tests
14 |
15 | This library uses ES6 features. It runs in modern Firefoxen.
16 |
17 |
18 |
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # regexp-make-js
2 | `RegExp.make` is an ES6 string template tag for dynamically creating regular expressions.
3 |
4 | ## Usage
5 |
6 | ```javascript
7 | RegExp.make`^${foo},${bar}$`
8 | ```
9 |
10 | is a `RegExp` instance that matches the whole string (`^...$`)
11 | consisting of a substring matching the value of the expression `foo`
12 | followed by the literal substring `","` followed by a substring
13 | matching the value of the expression `bar`.
14 |
15 | Interpolated expressions like `foo` and `bar` can be strings, or `RegExp`
16 | instances, or other values that are coerced to strings.
17 |
18 | `RegExp` instances are treated like the set of substrings they match
19 | -- their source is not used as a literal string.
20 |
21 | ```javascript
22 | RegExp.make`^${ /fo+/ }$`
23 | ```
24 |
25 | matches the entire string consisting of `'f'` followed by one or more
26 | `'o'`s; the Kleene + is not treated literally.
27 |
28 |
29 | ## Goals
30 |
31 | This currently uses the subset of EcmaScript 2015 (ES6) that is
32 | implemented on FF >= 39. To see the test visit the
33 | [test page](https://rawgit.com/mikesamuel/regexp-make-js/master/test/)
34 | in your browser using Firefox.
35 |
36 | This is a proposed alternative to
37 | [RegExp.escape](https://github.com/benjamingr/RegExp.escape).
38 | To get simply the equivalent functionality of `RegExp.escape`,
39 | anywhere you would have said
40 |
41 | ```javascript
42 | RegExp.escape(str)
43 | ```
44 |
45 | you can say instead
46 |
47 | ```javascript
48 | RegExp.make`${str}`.source
49 | ```
50 |
51 | However, if you do only that you have not gained anything. The
52 | advantage of using the tag is that it can do reliable
53 | context-dependent escaping of the string as interpolated into RegExp
54 | source text. Where you might have said, for example,
55 |
56 | ```javascript
57 | const re = new RegExp('^(' + RegExp.escape(str) + ')$');
58 | ```
59 |
60 | with `RegExp.make` you can say instead
61 |
62 | ```javascript
63 | const re = RegExp.make`^(${str})$`;
64 | ```
65 |
66 | ## Expressions
67 |
68 | | Context | Example | String | Numeric | RegExp |
69 | | ------- | ------- | ------ | ------- | ------ |
70 | | Block | `/${...}/` | Treated literally | Treated Literally | With back-references adjusted |
71 | | Charset | `/[^${...}]/` | Individual chars | Individual Chars | All chars in any string matched by the RegExp |
72 | | Count | `/x{1,${...}}/` | Inlined without wrapping | Inlined without wrapping | Inlined without wrapping |
73 |
74 | Interpolated values are treated as atoms so
75 |
76 | ```javascript
77 | RegExp.make`${foo}*`
78 | ```
79 |
80 | matches any number of the pattern specified by `foo`; it's not just
81 | the last character in that pattern that the Kleene star applies to.
82 |
83 |
84 | ## Flags
85 |
86 | ```javascript
87 | RegExp.make('i')`^${foo}$`
88 | ```
89 |
90 | applies the `i` flag (case-insensitive) to the RegExp after interpolation happens,
91 | so substrings matched by the expression `foo` are matched case-insensitively.
92 |
93 |
94 | When a case-insensitive `RegExp` is interpolated into a case-sensitive one, the
95 | interpolated one still matches case insensitively.
96 |
97 | ```javascript
98 | RegExp.make`foo-${ /bar/i }`
99 | ```
100 |
101 | matches `"foo-BAR"` but not `"FOO-BAR"`.
102 |
103 |
104 |
105 | ## Groups
106 |
107 | `RegExp`s produced have the `templateGroups` property set so that if
108 | values specify groups, you can figure out the group index of a group
109 | specified by the template.
110 |
111 | ```javascript
112 | var re = RegExp.make`${ /(foo)/ }(\d+)`;
113 | // value group ^ ^ template group 1
114 | var match = "foo123".match();
115 | match[1] === 'foo'; // Because of /(foo)/
116 | match[re.templateGroups[1]] === '123';
117 | ```
118 |
119 |
120 | ## TODO
121 |
122 | * [The `u` flag](https://mathiasbynens.be/notes/es6-unicode-regex) is not recognized and it should affect how we do case-folding and treat `.`, `\w` character classes, `\u{...}` escapes, etc.
123 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/test/tests.js:
--------------------------------------------------------------------------------
1 | // Using the subset of ES6 currently supported by FF Nightly 42.0a1 (2015-08-01)
2 | // For full ES6:
3 | // * replace "var" below with "let"
4 |
5 | (function () {
6 | "use strict";
7 |
8 | if (typeof RegExp.make !== 'function') {
9 | return;
10 | }
11 |
12 | // Set up a RegExp subclass so that we can test subclass creation.
13 | function SubRegExp(source, flags) {
14 | const re = new RegExp(source, flags);
15 | Object.setPrototypeOf(re, SubRegExp.prototype);
16 | return re;
17 | }
18 | SubRegExp.prototype = Object.create(RegExp.prototype, {
19 | constructor: {
20 | value: SubRegExp
21 | },
22 | toString: {
23 | value: function () {
24 | return 'SubRegExp:/' + this.source + '/' + this.flags;
25 | }
26 | }
27 | });
28 | SubRegExp.make = RegExp.make;
29 |
30 | const test = (
31 | function testMaker(ctor, flags, x, ...values) {
32 | if ('object' === typeof x && 'raw' in x) {
33 | // Produce a test record if called as a string template.
34 | const template = x;
35 | return {
36 | ctor: ctor,
37 | flags: flags,
38 | template: template,
39 | values: values.slice()
40 | };
41 | } else {
42 | var makerCtor = ctor;
43 | var makerFlags = flags;
44 | const args = [x, ...values];
45 | for (var i = 0, n = args.length; i < n; ++i) {
46 | const arg = args[i];
47 | switch (typeof arg) {
48 | case 'function': makerCtor = arg; break;
49 | case 'string': makerFlags = arg; break;
50 | }
51 | }
52 | return testMaker.bind(this, makerCtor, makerFlags);
53 | }
54 | }
55 | ).bind(null, RegExp, null);
56 |
57 | /** Python style raw strings. */
58 | function r(template, ...values) {
59 | if (values.length !== 0) {
60 | throw new Error(
61 | 'Interpolation not allowed into r`...` style raw strings');
62 | }
63 | return template.raw[0];
64 | }
65 |
66 | const tests = [
67 | // No interpolations
68 | [test`^foo\(bar\);\n$`,
69 | r`/^foo\(bar\);\n$/`],
70 |
71 | // No interpolations but flags
72 | [test('gi')`^foo\(bar\);\n$`,
73 | r`/^foo\(bar\);\n$/gi`],
74 | // A single string into a block context.
75 | [test`^${ 'foo' }$`,
76 | r`/^(?:foo)$/`],
77 | // Testing transitions between contexts.
78 | [test('i')`^([${ '\\' }${ /[a-z]/ }]{${ 42 }})${ /$/ }`,
79 | r`/^([\\a-z]{42})(?:$)/i`, [0, 1]],
80 |
81 | // We allow numbers in counts and don't wrap with (?:...) since those
82 | // are unnecessary.
83 | // Simply coercing to string will allow [1,2] as a count value here to
84 | // have the intuitive meaning.
85 | // We want to treat empty strings differently here since
86 | [test`x{3,${''}}`,
87 | // can be reasonably
88 | r`/x{3,}/`],
89 | // while if we allowed the empty string to be interpolated as the
90 | // empty string, then we would screw up the way postfix operators
91 | // associate as in
92 | [test`x${''}*`,
93 | // where it would be unintuitive for the * to associate with x.
94 | r`/x(?:)*/`],
95 |
96 | // Back-reference not scoped to containing RegExp
97 | [test`^(#+)([^#\r\n]*)${ /\1/ }`,
98 | // Can't use r`...` since \1 triggers an octal-escape strict parse error.
99 | '/^(#+)([^#\\r\\n]*)(?:\\1)/', [0, 1, 2]],
100 | // Negated charset into a charset
101 | [test`[${ /[^A-Z]/ }]`,
102 | r`/[\u0000-@\[-\uffff]/`],
103 | // String into a charset
104 | [test`[${ "A-Z" }]`,
105 | r`/[A\-Z]/`],
106 | // String into a negated charset
107 | [test`[^${ "A-Z" }]`,
108 | r`/[^A\-Z]/`],
109 | // Multiple elements into a charset: individual chars, charsets,
110 | // and special groups.
111 | [test`[${ /[a]|([c]|b)|d|_/ }]`,
112 | r`/[_a-d]/`],
113 | // Multiple case-insensitive elements into a charset: individual chars,
114 | // charsets, and special groups.
115 | [test`[${ /[a]|(?:[c]|b)|d|_/i }]`,
116 | r`/[A-D_a-d]/`],
117 | // {1,2} does not contribute chars.
118 | [test`[${ /x{1,2}/ }]`,
119 | r`/[x]/`],
120 | // . does contribute chars.
121 | [test`[${ /.|\r|\n/ }]`,
122 | r`/[\u0000-\u2027\u202a-\uffff]/`],
123 | // Rewrite group indices.
124 | [
125 | //test`(fo(o))${ /(x)\1(?:\2)/ }bar${ /\1/ }(baz)`, // Octal esc error
126 | {
127 | ctor: RegExp,
128 | template: { raw: ['(fo(o))', 'bar', '(baz)'] },
129 | flags: '',
130 | values: [/(x)\1(?:\2)/, /\1/]
131 | },
132 | '/(fo(o))(?:(x)\\3(?:\\2))bar(?:\\1)(baz)/',
133 | // Group 3 -^ comes from an interpolated group.
134 | [0, 1, 2, 4]
135 | ],
136 | // Rewrite template back-references when interrupted.
137 | [
138 | //test`^(${ /(.*)/ }\n(#+)\n${ /(.*)/ }\n\2)\n`,
139 | {
140 | ctor: RegExp,
141 | flags: '',
142 | template: { raw: ['^(', '\\n(#+)\\n', '\\n\\2)\\n'] },
143 | values: [ /(.*)/, /(.*)/]
144 | // 0 1 2 <- Template groups
145 | // 0 1 2 3 4 <- Output groups
146 | },
147 | '/^((?:(.*))\\n(#+)\\n(?:(.*))\\n\\3)\\n/',
148 | [0, 1, 3]
149 | ],
150 | // Test that interpolations break tokens.
151 | // ($x?:x) should not run together into (?:x) when x is empty.
152 | [test`(${""}?:x)`,
153 | '/((?:)?:x)/', [0, 1]],
154 | [test`(${new RegExp('')}?:x)`,
155 | '/((?:(?:))?:x)/', [0, 1]],
156 |
157 | // Test that interpolation of case-insensitive into case-sensitive
158 | // expands letters.
159 | [test`${ //i }[a-z0-9_]*${ /<\/foo>/ }`,
160 | r`/(?:<[Ff][Oo][Oo]>)[a-z0-9_]*(?:<\/foo>)/`],
161 |
162 | // Test that \b means different things in different contexts.
163 | [test`[${ /[\b\t\n]/ }],[${ /\b|\t|\n/ }]`,
164 | r`/[\u0008-\u000a],[\u0009\u000a]/`],
165 |
166 | // Treat null and undefined like the empty string
167 | [test`${null},${undefined},${NaN},${false},${0}`,
168 | r`/(?:),(?:),(?:NaN),(?:false),(?:0)/`],
169 |
170 | // Test un-bindable back-reference
171 | [test`${ /\1/ }`, r`/(?:(?:))/`],
172 |
173 | // Subclassing of RegExp
174 | [test(SubRegExp)`foo`, 'SubRegExp:/foo/'],
175 | [test(SubRegExp, 'i')`foo`, 'SubRegExp:/foo/i'],
176 |
177 | // TODO: Handle case-folding properly when u flag is present
178 | // TODO: Test interpolation in middle of charset start. `[${...}^]`
179 | ];
180 |
181 | function tableMaker() {
182 | if (typeof document !== 'undefined') {
183 | const el = function (name, parent, opt_text) {
184 | const elem = document.createElement(name);
185 | parent.appendChild(elem);
186 | if (opt_text) {
187 | elem.appendChild(document.createTextNode(opt_text));
188 | }
189 | return elem;
190 | };
191 |
192 | const table = el('table', document.body);
193 | const tbody = el('tbody', table);
194 | var hasBodyData = false;
195 | el('tr', tbody);
196 |
197 | const addCell = function (cellTag, text, passFailOpt) {
198 | var row = tbody.lastChild;
199 | var lastCell = row.lastChild;
200 | if (text === null && lastCell) {
201 | lastCell.setAttribute(
202 | 'colspan',
203 | (+lastCell.getAttribute('colspan') || 1) + 1);
204 | } else {
205 | el(cellTag, row, text);
206 | }
207 | };
208 |
209 | return {
210 | endRow: function (passFailOpt) {
211 | var row = tbody.lastChild;
212 | if (passFailOpt !== undefined) {
213 | row.className += passFailOpt ? ' pass' : ' fail';
214 | }
215 | el('tr', tbody);
216 | },
217 | header: addCell.bind(null, 'th'),
218 | cell: addCell.bind(null, 'td'),
219 | endTable: function () {
220 | var row = tbody.lastChild;
221 | if (!row.firstChild) {
222 | tbody.removeChild(row);
223 | }
224 | }
225 | };
226 | } else {
227 | const tableData = [[]];
228 | const addCellData = function (header, text) {
229 | tableData[tableData.length - 1].push({ text: text || '', header: header });
230 | };
231 | return {
232 | endRow: function () {
233 | tableData.push([]);
234 | },
235 | header: addCellData.bind(null, true),
236 | cell: addCellData.bind(null, false),
237 | endTable: function () {
238 | if (tableData.length && tableData[tableData.length - 1].length === 0) {
239 | // Drop any empty last row.
240 | --tableData.length;
241 | }
242 |
243 | var colLengths = [];
244 | tableData.forEach(function (rowData) {
245 | for (var i = 0, n = rowData.length; i < n; ++i) {
246 | colLengths[i] = Math.max(colLengths[i] || 0, rowData[i].text.length);
247 | }
248 | });
249 | var padding = colLengths.map(function (n) {
250 | var space = ' ';
251 | while (space.length < n) { space += space; }
252 | return space.substring(0, n);
253 | });
254 |
255 | var rowTexts = tableData.map(function (rowData) {
256 | var cellTexts = rowData.map(function (cellData, cellIndex) {
257 | var cellText = cellData.text;
258 | var isHeader = cellData.header;
259 | var cellPadding = padding[cellIndex];
260 | var nLeftPadding = isHeader ? (cellPadding.length - cellText.length) >> 1 : 0;
261 | return (
262 | cellPadding.substring(0, nLeftPadding) +
263 | cellText +
264 | cellPadding.substring(cellText.length + nLeftPadding)
265 | );
266 | });
267 | return cellTexts.join(' | ');
268 | });
269 |
270 | var tableText = rowTexts.join('\n');
271 | console.log(tableText);
272 | }
273 | };
274 | }
275 | }
276 |
277 | function stringify(arr) {
278 | var s = '';
279 | s += '[';
280 | for (var i = 0, n = arr.length; i < n; ++i) {
281 | if (i) { s += ', '; }
282 | const x = arr[i];
283 | if (x && 'object' === typeof x) {
284 | s += x;
285 | } else {
286 | s += JSON.stringify(x);
287 | }
288 | }
289 | s += ']';
290 | return s;
291 | }
292 |
293 | const testSummary = tableMaker();
294 | testSummary.header('string parts');
295 | testSummary.header('values');
296 | testSummary.header('expected pattern');
297 | testSummary.header('expected groups');
298 | testSummary.endRow();
299 | testSummary.header('');
300 | testSummary.header('');
301 | testSummary.header('actual pattern');
302 | testSummary.header('actual groups');
303 | testSummary.endRow();
304 | var nPassing = 0, nFailing = 0;
305 | const failing = [];
306 | for (var i = 0, n = tests.length; i < n; ++i) {
307 | const [
308 | { template, values, ctor: RegExpCtor, flags },
309 | expectedPattern,
310 | expectedGroupsOpt
311 | ] = tests[i];
312 | const expectedGroups = expectedGroupsOpt || [0];
313 |
314 | const maker = function (template, ...values) {
315 | if (flags != null) {
316 | return RegExpCtor.make(flags)(template, ...values);
317 | } else {
318 | return RegExpCtor.make(template, ...values);
319 | }
320 | };
321 | var actualPattern, actualGroups;
322 | try {
323 | const re = maker(template, ...values);
324 | actualPattern = re.toString();
325 | actualGroups = re.templateGroups;
326 | } catch (e) {
327 | actualPattern = '###Error:' + e + '###';
328 | actualGroups = ['###Error###'];
329 | console.error(e);
330 | }
331 |
332 | var message = '#' + i;
333 | var checkEqual = function (expected, actual) {
334 | if (expected === actual) { return true; }
335 | expected = String(expected);
336 | actual = String(actual);
337 | if (/[^\w ]/.test(expected)) {
338 | expected = JSON.stringify(expected);
339 | }
340 | if (/[^\w ]/.test(actual)) {
341 | actual = JSON.stringify(actual);
342 | }
343 | message += ' : ' + expected + ' != ' + actual;
344 | return false;
345 | };
346 |
347 | const passPattern = checkEqual(expectedPattern, actualPattern);
348 | const passGroups = checkEqual(actualGroups.join(' '), expectedGroups.join(' '));
349 | const passAll = passPattern && passGroups;
350 |
351 | testSummary.cell(JSON.stringify(template.raw));
352 | testSummary.cell(stringify(values));
353 | testSummary.cell(expectedPattern);
354 | testSummary.cell(expectedGroups.join(' '));
355 | testSummary.endRow(passAll);
356 |
357 | // Position the actual values below the wanted for easy scanning.
358 | testSummary.cell(null);
359 | testSummary.cell(null);
360 | testSummary.cell(actualPattern, passPattern);
361 | testSummary.cell(actualGroups.join(' '), passGroups);
362 |
363 | testSummary.endRow(passAll);
364 |
365 | if (passAll) {
366 | ++nPassing;
367 | } else {
368 | ++nFailing;
369 | failing.push(message);
370 | }
371 | }
372 | testSummary.endTable();
373 |
374 | if (typeof document !== 'undefined') {
375 | document.getElementById('warning').style.display = 'none';
376 | document.title = (nFailing === 0 ? 'PASS' : 'FAIL') + ' : ' + document.title;
377 | } else {
378 | console.log('PASS:', nPassing);
379 | console.log('FAIL:', nFailing);
380 | failing.forEach(function (message) {
381 | console.error(message);
382 | });
383 | if (nFailing) {
384 | throw new Error(nFailing + ' test' + (nFailing === 1 ? '' : 's') + ' failed');
385 | }
386 | }
387 | }());
388 |
--------------------------------------------------------------------------------
/RegExp.make.js:
--------------------------------------------------------------------------------
1 | // Using the subset of ES6 currently supported by FF Nightly 42.0a1 (2015-08-01)
2 | // For full ES6:
3 | // * replace "var" below with "let"
4 |
5 | RegExp.make = (function () {
6 | "use strict";
7 |
8 | /** @enum{number} */
9 | const Context = {
10 | /** A context in which any top-level RegExp operator can appear. */
11 | BLOCK: 0,
12 | /** A context inside a charset. {@code /[HERE]/} */
13 | CHARSET: 1,
14 | /** A context inside a charset. /x{HERE}/ */
15 | COUNT: 2
16 | };
17 |
18 |
19 | /**
20 | * Matches characters that have special meaning at
21 | * the top-level of a RegExp.
22 | */
23 | const UNSAFE_CHARS_BLOCK = /[\\(){}\[\]\|\?\*\+\^\$\/.]/g;
24 | /**
25 | * Matches characters that have special meaning within
26 | * a RegExp charset.
27 | */
28 | const UNSAFE_CHARS_CHARSET = /[\[\]\-\\]/g;
29 |
30 | /**
31 | * Encodes the end-point of a character range in a RegExp charset.
32 | *
33 | * @param {number} n a UTF-16 code-unit.
34 | * @return {string} of regexp suitable for embedding in a charset.
35 | */
36 | function encodeRangeEndPoint(n) {
37 | if (0x20 <= n && n <= 0x7e) {
38 | return String.fromCharCode(n).replace(UNSAFE_CHARS_CHARSET, '\\$&');
39 | }
40 | var hex = n.toString(16);
41 | return '\\u0000'.substring(0, 6 - hex.length) + hex;
42 | }
43 |
44 | /**
45 | * Max code-unit is the maximum UTF-16 code-unit since
46 | * /^[\ud800\udc00]$/.test('\ud800\udc00') is false
47 | * and
48 | * /^[\ud800\udc00]$/.test('\ud800') is true.
49 | * TODO: Take into account 'u' flag.
50 | */
51 | const MAX_CHAR_IN_RANGE = 0xFFFF;
52 |
53 | /**
54 | * A range of characters.
55 | * @param {!Array.=} opt_ranges
56 | * @constructor
57 | */
58 | function CharRanges(opt_ranges) {
59 | /**
60 | * A series of ints bit-packed with the minimum in the high 16 bits and
61 | * the difference between the max and the min in the low 16 bits.
62 | *
63 | * The range consisting of the letter 'A' is then [0x00410000] which has
64 | * the char code for 'A' (65 == 0x41) in the top half, and the difference
65 | * between the min and max (0) in the lower 16 bits.
66 | *
67 | * The range [a-z] is represented as [0x00610019] which has the char code
68 | * for 'a' (97 == 0x61) in the upper four bits, and the difference between
69 | * min and max (25 == 0x19) in the lower 16 bits.
70 | *
71 | * @private
72 | * @type {!Array.}
73 | */
74 | this.ranges = opt_ranges ? opt_ranges.slice() : [];
75 | }
76 | /**
77 | * @this {!CharRanges}
78 | * @return {boolean}
79 | */
80 | CharRanges.prototype.isEmpty = function () {
81 | return !this.ranges.length;
82 | };
83 | /**
84 | * Produces a string that has the same meaning in a RegExp charset.
85 | * Without enclosing square brackets.
86 | * @override
87 | * @this {!CharRanges}
88 | */
89 | CharRanges.prototype.toString = function () {
90 | var s = '';
91 | /** @type {!Array.}. */
92 | const ranges = this.ranges;
93 | /** @type {number} */
94 | const n = ranges.length;
95 | for (var i = 0; i < n; ++i) {
96 | /** @type {number} */
97 | const leftAndSpan = ranges[i];
98 | const left = leftAndSpan >> 16;
99 | const span = leftAndSpan & 0xffff;
100 | s += encodeRangeEndPoint(left);
101 | if (span) {
102 | if (span !== 1) { s += '-'; }
103 | s += encodeRangeEndPoint(left + span);
104 | }
105 | }
106 | return s;
107 | };
108 | /**
109 | * The minimum code-point matched or NaN.
110 | * @this {!CharRanges}
111 | * @return {number|undefined}
112 | */
113 | CharRanges.prototype.getMin = function () {
114 | this.canonicalize();
115 | /** @type {!Array.} */
116 | const ranges = this.ranges;
117 | return ranges.length ? (ranges[0] >> 16) : undefined;
118 | };
119 | /**
120 | * Adds a range starting at left and going to right, inclusive.
121 | *
122 | * @this {!CharRanges}
123 | * @param {number} left inclusive code-unit
124 | * @param {number=} opt_right inclusive code-unit. left is assumed if absent.
125 | * @return {!CharRanges} this to allow chaining.
126 | */
127 | CharRanges.prototype.addRange = function (left, opt_right) {
128 | var right = opt_right || left;
129 | left = +left;
130 | right = +right;
131 | if ('number' !== typeof left
132 | || left < 0 || right > MAX_CHAR_IN_RANGE || left > right
133 | || left % 1 || right % 1) {
134 | throw new Error();
135 | }
136 | this.ranges.push((left << 16) | ((right - left) & 0xFFFF));
137 | return this;
138 | };
139 | /**
140 | * Adds the given ranges to this.
141 | * Modifies this in place making it the union of its prior value and ranges.
142 | *
143 | * @this {!CharRanges}
144 | * @param {CharRanges} ranges
145 | * @return {!CharRanges} this to allow chaining.
146 | */
147 | CharRanges.prototype.addAll = function (ranges) {
148 | if (ranges !== this) {
149 | Array.prototype.push.apply(this.ranges, ranges.ranges);
150 | }
151 | return this;
152 | };
153 | /**
154 | * @this {!CharRanges}
155 | * @return {!CharRanges} [\u0000-\uFFFF] - this.
156 | * Allocates a new output. Does not modify in place.
157 | */
158 | CharRanges.prototype.inverse = function () {
159 | this.canonicalize();
160 | /** @type {!Array.} */
161 | const ranges = this.ranges;
162 | /** @type {number} */
163 | const n = ranges.length;
164 | var pastLastRight = 0;
165 | const invertedRanges = [];
166 | for (var i = 0; i < n; ++i) {
167 | /** @type {number} */
168 | const leftAndSpan = ranges[i];
169 | const left = leftAndSpan >> 16;
170 | const span = leftAndSpan & 0xFFFF;
171 | if (pastLastRight < left) {
172 | invertedRanges.push(
173 | (pastLastRight << 16)
174 | | (left - pastLastRight - 1)
175 | );
176 | }
177 | pastLastRight = left + span + 1;
178 | }
179 | if (pastLastRight <= MAX_CHAR_IN_RANGE) {
180 | invertedRanges.push(
181 | (pastLastRight << 16)
182 | | (MAX_CHAR_IN_RANGE - pastLastRight));
183 | }
184 | return new CharRanges(invertedRanges);
185 | };
186 | /**
187 | * Orders ranges and merges overlapping ranges.
188 | * @this {!CharRanges}
189 | * @return {!CharRanges} this to allow chaining.
190 | */
191 | CharRanges.prototype.canonicalize = function () {
192 | // Sort ranges so that they are ordered by left.
193 | /** @type {!Array.} */
194 | const ranges = this.ranges;
195 | /** @type {number} */
196 | const n = ranges.length;
197 | if (!n) { return this; }
198 | ranges.sort(function (a, b) { return a - b; });
199 | // Merge overlapping ranges.
200 | var j = 1; // Index into ranges past last merged item.
201 | var lastRight = (ranges[0] >> 16) + ranges[0] & 0xFFFF;
202 | for (var i = 1; i < n; ++i) {
203 | /** @type {number} */
204 | const leftAndSpan = ranges[i];
205 | const left = leftAndSpan >> 16;
206 | const span = leftAndSpan & 0xFFFF;
207 | if (lastRight + 1 >= left) {
208 | // We can merge the two.
209 | const lastLeft = ranges[j - 1] >> 16;
210 | lastRight = Math.max(lastRight, left + span);
211 | const merged = (lastLeft << 16) | (lastRight - lastLeft);
212 | ranges[j - 1] = merged;
213 | // Do not increment j.
214 | } else {
215 | ranges[j] = leftAndSpan;
216 | lastRight = left + span;
217 | ++j;
218 | }
219 | }
220 | ranges.length = j;
221 | return this;
222 | };
223 | /**
224 | * A newly allocated set with those elements in this that fall inside
225 | * {@code new CharRanges().addRange(min, max)}.
226 | * @this {!CharRanges}
227 | * @param {number} min inclusive
228 | * @param {number} max inclusive
229 | * @return {!CharRanges} a newly allocated output. Not modified in place.
230 | */
231 | CharRanges.prototype.intersectionWithRange = function (min, max) {
232 | /** @type {!Array.} */
233 | const ranges = this.ranges;
234 | const intersection = new CharRanges();
235 | /** @type {number} */
236 | const n = ranges.length;
237 | for (var i = 0; i < n; ++i) {
238 | /** @type {number} */
239 | const leftAndSpan = ranges[i];
240 | const left = leftAndSpan >> 16;
241 | const span = leftAndSpan & 0xFFFF;
242 | /** @type {number} */
243 | const right = left + span;
244 |
245 | if (!(left > max || right < min)) {
246 | intersection.addRange(Math.max(min, left), Math.min(max, right));
247 | }
248 | }
249 | return intersection;
250 | };
251 | /**
252 | * The ranges but with each ranges left-end-point shifted by delta.
253 | * @this {!CharRanges}
254 | * @param {number} delta
255 | * @return {!CharRanges} a newly allocated output. Not modified in place.
256 | */
257 | CharRanges.prototype.shifted = function (delta) {
258 | return new CharRanges(
259 | this.ranges.map(function (x) { return x + (delta << 16); })
260 | );
261 | };
262 | /**
263 | * Applies callback to each range.
264 | * @param {function(number, number)} callback receives left and right inclusive.
265 | * @this {!CharRanges}
266 | */
267 | CharRanges.prototype.forEachRange = function (callback) {
268 | /** @type {!Array.} */
269 | const ranges = this.ranges;
270 | /** @type {number} */
271 | const n = ranges.length;
272 | for (var i = 0; i < n; ++i) {
273 | /** @type {number} */
274 | const leftAndSpan = ranges[i];
275 | const left = leftAndSpan >> 16;
276 | const span = leftAndSpan & 0xFFFF;
277 | /** @type {number} */
278 | const right = left + span;
279 | callback(left, right);
280 | }
281 | };
282 | CharRanges.prototype.clear = function () {
283 | this.ranges.length = 0;
284 | };
285 |
286 |
287 | const TOKENIZERS = new Map();
288 |
289 | /**
290 | * Returns a function that invokes the event handler below on tokens found in
291 | * RegExp source.
292 | *
293 | * @param {{
294 | * wholeInput: boolean,
295 | * startCharset: (function(string) | undefined),
296 | * range: (function(number, number) | undefined),
297 | * endCharset: (function(string) | undefined),
298 | * bracket: (function(string) | undefined),
299 | * operators: (function(string) | undefined),
300 | * count: (function(?number, ?number) | undefined),
301 | * escape: (function(string) | undefined),
302 | * backref: (function(number) | undefined),
303 | * other: (function(string) | undefined)
304 | * }} eventHandler
305 | * @return {!function(!Context, string):!Context} a function that takes
306 | * a start context, and RegExp source, and returns an end context.
307 | */
308 | function parseRegExpSource(eventHandler) {
309 | var {
310 | wholeInput, // Is the input whole.
311 | startCharset,
312 | range,
313 | endCharset,
314 | bracket,
315 | operators,
316 | count,
317 | escape,
318 | backref,
319 | other: otherOpt
320 | } = eventHandler;
321 | /** @type {function(string)} */
322 | const other = otherOpt || function () {};
323 |
324 | // We compile an efficient regular expression that groups as many things as
325 | // we don't care about as possible into runs of "other stuff".
326 | const signature = 0
327 | | (wholeInput ? 1 : 0)
328 | | ((startCharset || endCharset || range) ? 2 : 0)
329 | | (bracket ? 4 : 0)
330 | | (operators ? 8 : 0)
331 | | (escape ? 16 : 0)
332 | | (backref ? 32 : 0);
333 |
334 | var tokenizer = TOKENIZERS.get(signature);
335 | if (!tokenizer) {
336 | const tokens = [];
337 | const careChars = new CharRanges();
338 | const dontCareTokens = [];
339 | if (escape || backref) {
340 | if (backref) {
341 | tokens.push('\\\\[1-9][0-9]*');
342 | }
343 | if (escape) {
344 | tokens.push(
345 | '\\\\(?:[xX][0-9a-fA-F]{2}|[uU][0-9a-fA-F]{4}|[^1-9xXuU])');
346 | } else {
347 | dontCareTokens.push('\\\\[^1-9]');
348 | }
349 | } else {
350 | dontCareTokens.push('\\\\[\\s\\S]');
351 | }
352 | careChars.addRange('\\'.charCodeAt(0));
353 |
354 | // If we have the whole input, and don't need to report charsets, then we
355 | // can include them in dontCareTokens.
356 | (
357 | (startCharset || endCharset || range || !wholeInput)
358 | ? tokens : dontCareTokens
359 | ).push(
360 | '\\[(?:[^\\]\\\\]|\\\\[\\S\\s])*\\]?'
361 | );
362 | careChars.addRange('['.charCodeAt(0));
363 |
364 | // Reasoning is similar to charset above.
365 | (
366 | (count || !wholeInput)
367 | ? tokens : dontCareTokens
368 | ).push(
369 | '[{]\\d*(?:,\\d*)?[}]?'
370 | );
371 | careChars.addRange('{'.charCodeAt(0));
372 |
373 | if (bracket) {
374 | tokens.push('[(](?:[?][:=!])?|[)]');
375 | careChars.addRange('('.charCodeAt(0))
376 | .addRange(')'.charCodeAt(0));
377 | }
378 |
379 | const operatorChars = '$^*+?|.';
380 | if (operators) {
381 | tokens.push(
382 | '[' + operatorChars.replace(UNSAFE_CHARS_CHARSET, '\\$&') + ']');
383 | for (var i = 0, nOpChars = operatorChars.length; i < nOpChars; ++i) {
384 | careChars.addRange(operatorChars.charCodeAt(i));
385 | }
386 | }
387 |
388 | // I really wish we had a nice way of composing regular expressions.
389 | dontCareTokens.push('[' + careChars.inverse() + ']');
390 | tokens.push('(?:' + dontCareTokens.join('|') + ')+');
391 | tokenizer = new RegExp(tokens.join('|'), 'g');
392 | TOKENIZERS.set(signature, tokenizer);
393 | }
394 |
395 | return function(startContext, source) {
396 | /** @type {?Array.} */
397 | var match;
398 | var blockSource = String(source);
399 | var outputContext = startContext;
400 | switch (startContext) {
401 | case Context.CHARSET:
402 | // Strip off the unclosed CHARSET, dispatch it,
403 | // and switch to block context.
404 | match = blockSource.match(/^(?:[^\]\\]|\\[\S\s])*?\]/);
405 | var ranges;
406 | if (match) {
407 | outputContext = Context.BLOCK;
408 | blockSource = blockSource.substring(match[0].length);
409 | ranges = match[0];
410 | ranges = ranges.substring(ranges.length - 1);
411 | } else {
412 | ranges = blockSource;
413 | blockSource = '';
414 | }
415 | if (range) {
416 | parseCharsetRanges(range, ranges);
417 | } else if (!endCharset) {
418 | other(match ? match[0] : blockSource);
419 | }
420 | if (endCharset && outputContext !== Context.CHARSET) {
421 | endCharset(match[0]);
422 | }
423 | break;
424 | case Context.COUNT:
425 | /** @type {number} */
426 | const rcurly = blockSource.indexOf('}');
427 | const hasCurly = rcurly >= 0;
428 | /** @type {number} */
429 | const end = hasCurly ? rcurly + 1 : blockSource.length;
430 | (count || other)(blockSource.substring(0, end));
431 | blockSource = blockSource.substring(end);
432 | if (hasCurly) {
433 | outputContext = Context.BLOCK;
434 | }
435 | break;
436 | }
437 |
438 | /** @type {?Array.} */
439 | const sourceTokens = blockSource.match(tokenizer) || [];
440 | /** @type {number} */
441 | const nSourceTokens = sourceTokens ? sourceTokens.length : 0;
442 |
443 | // Assert that our tokenizer matched the whole input.
444 | var totalSourceTokenLength = 0;
445 | for (var i = 0; i < nSourceTokens; ++i) {
446 | totalSourceTokenLength += sourceTokens[i].length;
447 | }
448 | if (blockSource.length !== totalSourceTokenLength) {
449 | throw new Error(
450 | 'Failed to tokenize ' + blockSource + ' with ' + tokenizer + '. Got '
451 | + JSON.stringify(sourceTokens) + ' which have a length delta of '
452 | + (blockSource.length - totalSourceTokenLength));
453 | }
454 |
455 | for (var i = 0; i < nSourceTokens; ++i) {
456 | /** @type {string} */
457 | const sourceToken = sourceTokens[i];
458 | switch (sourceToken[0]) {
459 | case '[':
460 | /** @type {boolean} */
461 | const isClosed = (
462 | i + 1 < nSourceTokens || /(?:^|[^\\])(?:\\\\)*\]$/.test(sourceToken)
463 | );
464 | if (!isClosed) {
465 | outputContext = Context.CHARSET;
466 | }
467 | if (startCharset || range) {
468 | const start = sourceToken[1] === '^' ? '[^' : '[';
469 | if (startCharset) {
470 | startCharset(start);
471 | }
472 | if (range) {
473 | /** @type {number} */
474 | const endPos = sourceToken.length + (isClosed ? -1 : 0);
475 | parseCharsetRanges(
476 | range, sourceToken.substring(start.length, endPos));
477 | }
478 | } else if (!endCharset) {
479 | other(sourceToken);
480 | }
481 | if (isClosed && endCharset) {
482 | endCharset(']');
483 | }
484 | break;
485 | case '\\':
486 | /** @type {string} */
487 | const ch1 = sourceToken[1];
488 | (('1' <= ch1 && ch1 <= '9' ? backref : escape) || other)(sourceToken);
489 | break;
490 | case '(': case ')':
491 | (bracket || other)(sourceToken);
492 | break;
493 | case '+': case '*': case '?': case '.': case '|': case '^': case '$':
494 | (operators || other)(sourceToken);
495 | break;
496 | case '{':
497 | if (count) {
498 | /** @type {?Array.} */
499 | const minMaxMatch = /^\{(\d*)(?:,(\d*))?/.exec(sourceToken);
500 | const min = minMaxMatch ? +minMaxMatch[1] : 0;
501 | const max = +(minMaxMatch && minMaxMatch[2] || min);
502 | count(min, max);
503 | } else {
504 | other(sourceToken);
505 | }
506 | if (i + 1 == nSourceTokens
507 | && sourceToken[sourceToken.length - 1] !== '}') {
508 | outputContext = Context.COUNT;
509 | }
510 | break;
511 | default:
512 | other(sourceToken);
513 | }
514 | }
515 |
516 | return outputContext;
517 | };
518 | }
519 |
520 | /** Maps template literals to information derived from them. */
521 | const STATIC_INFO_CACHE = new WeakMap();
522 |
523 | /**
524 | * Given the template literal parts, computes a record of
525 | * the form
526 | * {
527 | * contexts: [...],
528 | * templateGroupCounts: [...],
529 | * splitLiterals: [...],
530 | * }
531 | *
532 | * For each value, value[i], contexts[i] is the context in which
533 | * it is interpolated.
534 | *
535 | * For each template literal, template.raw[i], templateGroupCounts[i]
536 | * is the number of capturing groups entered in that part.
537 | *
538 | * For each template literal, template.raw[i], splitLiterals[i] is
539 | * an array that has template.raw[i] split around back-references and
540 | * the back-references replaces with the index referred to, so
541 | * the literal chunk 'foo\2bar' would split to ['foo', 2, 'bar'].
542 | *
543 | * @param {!Array.} raw template literal parts.
544 | * @return {!{contexts : !Array.,
545 | * templateGroupCounts : !Array.,
546 | * splitLiterals : !Array.>}}
547 | */
548 | function getStaticInfo(raw) {
549 | var staticInfo = STATIC_INFO_CACHE.get(raw);
550 | if (staticInfo) { return staticInfo; }
551 |
552 | const contexts = [];
553 | const templateGroupCounts = [];
554 | const splitLiterals = [];
555 |
556 | var context = Context.BLOCK;
557 | var templateGroupCount = 0;
558 | var splitLiteral = [];
559 |
560 | function pushSplitLiteral(s) {
561 | /** @type {number} */
562 | const n = splitLiteral.length;
563 | if (n && 'string' === typeof splitLiteral[n - 1]) {
564 | splitLiteral[n - 1] += s;
565 | } else {
566 | splitLiteral[n] = s;
567 | }
568 | }
569 |
570 | const parseHandler = {
571 | wholeInput: false,
572 | bracket: function (s) {
573 | if (s === '(') {
574 | ++templateGroupCount;
575 | }
576 | pushSplitLiteral(s);
577 | },
578 | backref: function (s) {
579 | splitLiteral.push(+s.substring(1));
580 | },
581 | other: function (s) {
582 | pushSplitLiteral(s);
583 | }
584 | };
585 | /** @type {function(!Context, string):!Context} */
586 | const parse = parseRegExpSource(parseHandler);
587 |
588 | /** @type {number} */
589 | const n = raw.length;
590 | for (var i = 0; i < n; ++i) {
591 | context = parse(context, raw[i]);
592 | contexts.push(context);
593 | templateGroupCounts.push(templateGroupCount);
594 | splitLiterals.push(splitLiteral);
595 |
596 | templateGroupCount = 0;
597 | splitLiteral = [];
598 | }
599 |
600 | // We don't need the context after the last part
601 | // since no value is interpolated there.
602 | contexts.length--;
603 |
604 | const computed = {
605 | contexts: contexts,
606 | templateGroupCounts: templateGroupCounts,
607 | splitLiterals: splitLiterals
608 | };
609 | STATIC_INFO_CACHE.set(raw, computed);
610 | return computed;
611 | }
612 |
613 | /**
614 | * The characters matched by {@code /./}.
615 | * @type {CharRanges}
616 | */
617 | const DOT_RANGES = new CharRanges()
618 | .addRange(0xA).addRange(0xD).addRange(0x2028, 0x2029)
619 | .inverse();
620 |
621 | /**
622 | * @param {string} source the source of a RegExp.
623 | * @param {string} flags the flags of a RegExp.
624 | * @return {string} the text of a charset that matches all code-units that
625 | * could appear in any string in the language matched by the input.
626 | * This is liberal. For example {@code /ab{0}/} can match the string "a",
627 | * but cannot match the string "ab" because of the zero-count.
628 | * Lookaheads could similarly contribute characters unnecessarily.
629 | */
630 | function toCharRanges(source, flags) {
631 | // We parse the source and try to find all character sets
632 | // and literal characters, union them.
633 |
634 | // Accumulate all ranges onto charRanges.
635 | const charRanges = new CharRanges();
636 | var negCharRanges = null;
637 |
638 | parseRegExpSource(
639 | {
640 | wholeInput: true,
641 | escape: function (esc) {
642 | addEscapeValueTo(esc, false, charRanges);
643 | },
644 | operators: function (s) {
645 | if (s.indexOf('.') >= 0) {
646 | charRanges.addAll(DOT_RANGES);
647 | }
648 | },
649 | count: function(_) {},
650 | bracket: function (_) {},
651 | startCharset: function (start) {
652 | if (start[1] === '^') {
653 | negCharRanges = new CharRanges();
654 | }
655 | },
656 | endCharset: function (_) {
657 | if (negCharRanges) {
658 | charRanges.addAll(negCharRanges.inverse());
659 | negCharRanges = null;
660 | }
661 | },
662 | range: function (left, right) {
663 | (negCharRanges || charRanges).addRange(left, right);
664 | },
665 | other: function (s) {
666 | for (var i = 0, n = s.length; i < n; ++i) {
667 | charRanges.addRange(s.charCodeAt(i));
668 | }
669 | }
670 | })(
671 | Context.BLOCK,
672 | source);
673 |
674 | if (flags.indexOf('i') >= 0) {
675 | // Fold letters.
676 | caseFold(charRanges);
677 | }
678 | charRanges.canonicalize();
679 | return charRanges.toString();
680 | }
681 |
682 |
683 | /**
684 | * Adds other-case forms of any ASCII letters in charRanges.
685 | * @param {CharRanges} charRanges
686 | */
687 | function caseFold(charRanges) {
688 | charRanges.canonicalize();
689 | // TODO: Read spec and figure out what to do with non-ASCII characters.
690 | // Maybe take flags and look for the 'u' flag.
691 | /** @type {CharRanges} */
692 | const upperLetters = charRanges.intersectionWithRange(
693 | 'A'.charCodeAt(0), 'Z'.charCodeAt(0));
694 | /** @type {CharRanges} */
695 | const lowerLetters = charRanges.intersectionWithRange(
696 | 'a'.charCodeAt(0), 'z'.charCodeAt(0));
697 | charRanges.addAll(upperLetters.shifted(+32));
698 | charRanges.addAll(lowerLetters.shifted(-32));
699 | }
700 |
701 | /** An escape sequence that is definitely not a back-reference. */
702 | const ESCAPE_SEQUENCE_PATTERN =
703 | '\\\\(?:u[\\da-fA-F]{4}|x[\\da-fA-F]{2}|[^1-9]?)';
704 |
705 | /**
706 | * Pattern for the start or end of a character range.
707 | */
708 | const CHARSET_END_POINT_PATTERN = (
709 | '(?:'
710 | + '[^\\\\]' // Not an escape
711 | + '|' + ESCAPE_SEQUENCE_PATTERN // A full normal escape
712 | + '|\\\\[1-9]' // Back-references cannot appear in charsets.
713 | + ')'
714 | );
715 | /**
716 | * Matches all the atomic parts of a charset: individual characters, groups,
717 | * and single ranges.
718 | */
719 | const CHARSET_PARTS_RE = new RegExp(
720 | '\\\\[DdSsWw]' // A charset abbreviation
721 | + '|' + CHARSET_END_POINT_PATTERN
722 | + '(?:-' + CHARSET_END_POINT_PATTERN + ')?',
723 | 'g'
724 | );
725 | /**
726 | * Matches a range putting the left of the range in group 1,
727 | * and the right in group 2.
728 | * If group 2 is not present, then it is implicitly the same as the left.
729 | */
730 | const CHARSET_RANGE_RE = new RegExp(
731 | '(' + CHARSET_END_POINT_PATTERN + ')'
732 | + '(?:-(' + CHARSET_END_POINT_PATTERN + '))?'
733 | );
734 |
735 | /**
736 | * Space characters that match \s
737 | * @type {CharRanges}
738 | */
739 | const SPACE_CHARS = new CharRanges()
740 | .addRange(0x9, 0xd)
741 | .addRange(0x20)
742 | .addRange(0xa0)
743 | .addRange(0x1680)
744 | .addRange(0x180e)
745 | .addRange(0x2000, 0x200a)
746 | .addRange(0x2028, 0x2029)
747 | .addRange(0x202f)
748 | .addRange(0x205f)
749 | .addRange(0x3000)
750 | .addRange(0xfeff);
751 | /**
752 | * Word chars that match \w
753 | * @type {CharRanges}
754 | */
755 | const WORD_CHARS = new CharRanges()
756 | .addRange('A'.charCodeAt(0), 'Z'.charCodeAt(0))
757 | .addRange('0'.charCodeAt(0), '9'.charCodeAt(0))
758 | .addRange('a'.charCodeAt(0), 'z'.charCodeAt(0))
759 | .addRange('_'.charCodeAt(0));
760 | /**
761 | * Digit chars that match \d
762 | * @type {CharRanges}
763 | */
764 | const DIGIT_CHARS = new CharRanges()
765 | .addRange('0'.charCodeAt(0), '9'.charCodeAt(0));
766 | /**
767 | * Maps letters after \ that are special in RegExps.
768 | * @type {!Map.}
769 | */
770 | const ESCAPE_SEQ_MAP = new Map([
771 | ['\\s', SPACE_CHARS],
772 | ['\\S', SPACE_CHARS.inverse()],
773 | ['\\w', WORD_CHARS],
774 | ['\\W', WORD_CHARS.inverse()],
775 | ['\\d', DIGIT_CHARS],
776 | ['\\D', DIGIT_CHARS.inverse()],
777 | ['\\t', new CharRanges().addRange(0x9)],
778 | ['\\n', new CharRanges().addRange(0xA)],
779 | ['\\v', new CharRanges().addRange(0xB)],
780 | ['\\f', new CharRanges().addRange(0xC)],
781 | ['\\r', new CharRanges().addRange(0xD)],
782 | // b doesn't appear here since its meaning depends on context.
783 | ['\\B', new CharRanges()]
784 | ]);
785 |
786 | /**
787 | * The code-unit corresponding to the end-point of a range.
788 | * TODO; What does [\s-\w] mean?
789 | * @param {string} endPoint a character, escape sequence, or named charset.
790 | */
791 | function rangeEndPointToCodeUnit(endPoint) {
792 | var cu = (
793 | (endPoint[0] == '\\')
794 | ? addEscapeValueTo(endPoint, true, new CharRanges()).getMin()
795 | : endPoint.charCodeAt(0)
796 | );
797 | return cu;
798 | }
799 |
800 | /** @type {number} */
801 | const SLASH_B_CHAR_CODE = '\b'.charCodeAt(0);
802 | /**
803 | * Decodes an escape sequence and adds any ranges it specifies to the given
804 | * ranges.
805 | *
806 | * @param {string} esc an escape sequence.
807 | * @param {boolean} inCharSet true iff esc appears inside a [...] charset.
808 | * @param {CharRanges} ranges the output to add to. Modified in place.
809 | */
810 | function addEscapeValueTo(esc, inCharSet, ranges) {
811 | var chars = ESCAPE_SEQ_MAP.get(esc);
812 | if (chars !== undefined) {
813 | ranges.addAll(chars);
814 | } else {
815 | var ch1 = esc.charAt(1);
816 | switch (ch1) {
817 | case 'u': case 'x':
818 | /** @type {number} */
819 | const cu = parseInt(esc.substring(2 /* strip \x or \u */), 16);
820 | ranges.addRange(cu);
821 | break;
822 | case 'b':
823 | if (inCharSet) {
824 | ranges.addRange(SLASH_B_CHAR_CODE);
825 | }
826 | break;
827 | default:
828 | if (!('1' <= ch1 && ch1 <= '9')) {
829 | ranges.addRange(ch1.charCodeAt(0));
830 | }
831 | }
832 | }
833 | return ranges;
834 | }
835 |
836 | /**
837 | * Applies the given handler to the left and right end-points (inclusive)
838 | * of the ranges in rangeText.
839 | *
840 | * @param {function(number, number)} handler receives 2 code-units.
841 | * @param {string} rangeText text of a RegExp charSet body.
842 | */
843 | function parseCharsetRanges(handler, rangeText) {
844 | /** @type {?Array.} */
845 | const tokens = rangeText.match(CHARSET_PARTS_RE);
846 | /** @type {number} */
847 | const n = tokens ? tokens.length : 0;
848 | for (var i = 0; i < n; ++i) {
849 | /** @type {string} */
850 | const token = tokens[i];
851 | /** @type {?Array.} */
852 | const m = CHARSET_RANGE_RE.exec(token);
853 | if (m && m[2]) {
854 | handler(
855 | rangeEndPointToCodeUnit(m[1]),
856 | rangeEndPointToCodeUnit(m[2]));
857 | } else if (token[0] === '\\') {
858 | const ranges = new CharRanges();
859 | addEscapeValueTo(token, true, ranges);
860 | ranges.forEachRange(handler);
861 | } else {
862 | /** @type {number} */
863 | const cu = token.charCodeAt(0);
864 | handler(cu, cu);
865 | }
866 | }
867 | }
868 |
869 |
870 | /**
871 | * Adjusts an interpolated RegExp so that it can be interpolated in
872 | * the context of the template while preserving the meaning of
873 | * back-references and character sets.
874 | *
875 | * @param {string} containerFlags the flags of the RegExp into which source
876 | * is being interpolated.
877 | * @param {string} source the source of a RegExp being interpolated.
878 | * @param {string} flags associated with source.
879 | * @param {number} regexGroupCount The number of capturing groups that are
880 | * opened before source is interpolated.
881 | * @param {!Array.} templateGroups see the documentation for make for
882 | * the contract.
883 | * It only contains entries for capturing groups opened before the
884 | * insertion point.
885 | *
886 | * @return {{fixedSource: string, countOfCapturingGroupsInFixedSource: number}}
887 | */
888 | function fixUpInterpolatedRegExp(
889 | containerFlags, source, flags, regexGroupCount, templateGroups) {
890 | // Count capturing groups, and use that to identify and
891 | // renumber back-references that are in scope.
892 | var sourceGroupCount = 0;
893 | var hasBackRef = false;
894 | const fixedSource = [];
895 |
896 | function append(tok) { fixedSource.push(tok); }
897 |
898 | const parseHandler = {
899 | wholeInput: true,
900 | bracket: function (tok) {
901 | if (tok === '(') {
902 | ++sourceGroupCount;
903 | }
904 | fixedSource.push(tok);
905 | },
906 | other: append
907 | };
908 |
909 | // Convert back-refs to numbers so we can renumber them below.
910 | if (regexGroupCount || templateGroups.length) {
911 | parseHandler.backref = function (tok) {
912 | hasBackRef = true;
913 | fixedSource.push(+tok.substring(1));
914 | };
915 | }
916 |
917 | const isCaseInsensitive = flags.indexOf('i') >= 0;
918 | if (isCaseInsensitive && containerFlags.indexOf('i') < 0) {
919 | // Expand literal letters and letters in charsets.
920 | parseHandler.startCharset = append;
921 | const ranges = new CharRanges();
922 | parseHandler.range = function (left, right) {
923 | ranges.addRange(left, right);
924 | };
925 | parseHandler.endCharset = function (s) {
926 | caseFold(ranges);
927 | fixedSource.push(ranges.toString(), s);
928 | ranges.clear();
929 | };
930 | parseHandler.other = function (tok) {
931 | fixedSource.push(tok.replace(
932 | /\\\\[\s\S]|[A-Za-z]/g,
933 | function (s) {
934 | if (s.length === 1) {
935 | const cu = s.charCodeAt(0) & ~32;
936 | if (65 <= cu && cu <= 90) {
937 | return '[' + String.fromCharCode(cu, cu | 32) + ']';
938 | }
939 | }
940 | return s;
941 | }));
942 | };
943 | }
944 |
945 | parseRegExpSource(parseHandler)(Context.BLOCK, source);
946 |
947 | // Rewrite back-references that are out of scope to refer
948 | // to the template group.
949 | if (hasBackRef) {
950 | for (var i = 0, n = fixedSource.length; i < n; ++i) {
951 | var el = fixedSource[i];
952 | if ('number' === typeof el) {
953 | /** @type {number} */
954 | const backRefIndex = el;
955 | if (backRefIndex <= sourceGroupCount) {
956 | // A local reference.
957 | el = '\\' + (backRefIndex + regexGroupCount - 1);
958 | } else if (backRefIndex < templateGroups.length) {
959 | // A reference to a template group that is in scope.
960 | el = '\\' + templateGroups[backRefIndex];
961 | } else {
962 | // An out of scope back-reference matches the empty string.
963 | el = '(?:)';
964 | }
965 | fixedSource[i] = el;
966 | }
967 | }
968 | }
969 |
970 | return {
971 | fixedSource: fixedSource.join(''),
972 | countOfCapturingGroupsInFixedSource: sourceGroupCount
973 | };
974 | }
975 |
976 |
977 | /**
978 | * Builds a RegExp from a template and values to fill the template
979 | * holes.
980 | *
981 | * @param {!function(new:RegExp, string, string)} ctor
982 | * A constructor that takes a string pattern
983 | * @param {string} flags RegExp flags
984 | * @param {!{raw: !Array.}} template raw is n+1 RegExp parts.
985 | * @param {...*} values an array of n parts to interpolate between
986 | * the end of the corresponding raw part and the start of its follower.
987 | */
988 | function make(ctor, flags, template, ...values) {
989 | /** @type {!Array.} */
990 | const raw = template.raw;
991 | var { contexts, templateGroupCounts, splitLiterals } = getStaticInfo(raw);
992 |
993 | /** @type {number} */
994 | const n = contexts.length;
995 |
996 | var pattern = raw[0];
997 | // For each group specified in the template, the index of the corresponding
998 | // group in pattern.
999 | const templateGroups = [
1000 | 0 // Map implicit group 0, the whole match, to itself
1001 | ];
1002 | // The number of groups in the RegExp on pattern so far.
1003 | var regexGroupCount = 1; // Count group 0.
1004 |
1005 | function addTemplateGroups(i) {
1006 | /** @type {number} */
1007 | const n = templateGroupCounts[i];
1008 | for (var j = 0; j < n; ++j) {
1009 | templateGroups.push(regexGroupCount++);
1010 | }
1011 | }
1012 | addTemplateGroups(0);
1013 |
1014 | for (var i = 0; i < n; ++i) {
1015 | /** @type {Context} */
1016 | const context = contexts[i];
1017 | var value = values[i];
1018 | if (value == null) {
1019 | value = '';
1020 | }
1021 | var subst;
1022 | switch (context) {
1023 | case Context.BLOCK:
1024 | if (value instanceof RegExp) {
1025 | var {
1026 | fixedSource: valueSource,
1027 | countOfCapturingGroupsInFixedSource: valueGroupCount
1028 | } = fixUpInterpolatedRegExp(
1029 | flags, String(value.source), value.flags,
1030 | regexGroupCount, templateGroups);
1031 | subst = '(?:' + valueSource + ')';
1032 | regexGroupCount += valueGroupCount;
1033 | } else {
1034 | subst =
1035 | '(?:' + String(value).replace(UNSAFE_CHARS_BLOCK, '\\$&') + ')';
1036 | }
1037 | break;
1038 | case Context.CHARSET:
1039 | // TODO: We need to keep track of whether we're interpolating
1040 | // into an inverted charset or not.
1041 | subst =
1042 | (value instanceof RegExp)
1043 | ? toCharRanges(String(value.source), String(value.flags))
1044 | : String(value).replace(UNSAFE_CHARS_CHARSET, '\\$&');
1045 | break;
1046 | case Context.COUNT:
1047 | subst = String(value instanceof RegExp ? value.source : value);
1048 | }
1049 |
1050 | var rawLiteralPart = raw[i+1];
1051 | var splitLiteral = splitLiterals[i + 1];
1052 | if (regexGroupCount !== templateGroups.length
1053 | && (splitLiteral.length !== 1
1054 | || 'string' !== typeof splitLiteral[0])) {
1055 | /** @type {!Array.<(string|number)>}} */
1056 | const splitCopy = splitLiteral.slice(0);
1057 | for (var j = 0, splitLength = splitCopy.length; j < splitLength; ++j) {
1058 | /** @type {string|number} */
1059 | const splitElement = splitCopy[j];
1060 | if ('number' === typeof splitElement) {
1061 | if (splitElement < templateGroups.length) {
1062 | // A reference to a template group that is in scope.
1063 | splitCopy[j] = '\\' + templateGroups[splitElement];
1064 | } else {
1065 | // An out of scope back-reference matches the empty string.
1066 | // We can't just use the empty string, because returning nothing
1067 | // would change the way that postfix operators like * attach.
1068 | splitCopy[j] = '(?:)';
1069 | }
1070 | }
1071 | }
1072 | rawLiteralPart = splitCopy.join('');
1073 | }
1074 |
1075 | pattern += subst;
1076 | pattern += rawLiteralPart;
1077 | addTemplateGroups(i+1);
1078 | }
1079 | var output = new ctor(pattern, flags);
1080 | output.templateGroups = templateGroups;
1081 | return output;
1082 | }
1083 |
1084 | return function(x, ...values) {
1085 | // RegExp.make can be called in several modes.
1086 | // 1. RegExp.make`...undifferentiated RegExp stuff...`
1087 | // 2. RegExp.make('gi')`....` to specify flags
1088 | // 3. RegExp.make.bind(RegExpSubClass)`...` with a this value that specifies
1089 | // a different constructor.
1090 | if ('object' === typeof x && Array.isArray(x.raw)) {
1091 | return make(this, '', x, ...values);
1092 | }
1093 | if ('string' === typeof x && values.length === 0) {
1094 | return make.bind(null, this, x);
1095 | }
1096 | throw new Error('Unexpected arguments ' + JSON.stringify([x, ...values]));
1097 | };
1098 | })();
1099 |
1100 | // TODO: Figure out interpolation of charset after - as in `[a-${...}]`
1101 |
--------------------------------------------------------------------------------