├── .gitignore
├── .vscode
├── extensions.json
├── tasks.json
└── settings.json
├── .gitattributes
├── .github
└── workflows
│ ├── build.yml
│ └── pr.yml
├── .yo-rc.json
├── package.json
├── table-nonbinary-unicode-properties.html
├── gulpfile.js
├── LICENSE
├── table-unicode-general-category-values.html
├── README.md
├── table-binary-unicode-properties.html
├── table-unicode-script-values.html
└── spec.emu
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | coverage
3 | docs
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "rbuckton.ecmarkup-vscode"
4 | ]
5 | }
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | index.html -diff merge=ours
2 | spec.js -diff merge=ours
3 | spec.css -diff merge=ours
--------------------------------------------------------------------------------
/.vscode/tasks.json:
--------------------------------------------------------------------------------
1 | {
2 | // See https://go.microsoft.com/fwlink/?LinkId=733558
3 | // for the documentation about the tasks.json format
4 | "version": "2.0.0",
5 | "tasks": [
6 | {
7 | "type": "gulp",
8 | "task": "build",
9 | "group": {
10 | "kind": "build",
11 | "isDefault": true
12 | }
13 | }
14 | ]
15 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[markdown]": {
3 | "files.trimTrailingWhitespace": false
4 | },
5 | "[html]": {
6 | "editor.insertSpaces": true,
7 | "editor.tabSize": 2,
8 | },
9 | "[ecmarkup]": {
10 | "editor.insertSpaces": true,
11 | "editor.tabSize": 2,
12 | },
13 | "files.associations": {
14 | "*.html": "ecmarkup",
15 | "*.emu": "ecmarkup"
16 | }
17 | }
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Publish Spec to gh-pages
2 | on:
3 | push:
4 | branches: [ main ]
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/checkout@v2
10 | - run: npm install --legacy-peer-deps
11 | - run: npm run compile
12 | - name: Deploy
13 | uses: JamesIves/github-pages-deploy-action@4.1.4
14 | with:
15 | branch: gh-pages
16 | folder: docs
17 | clean-exclude: |
18 | pr
19 |
--------------------------------------------------------------------------------
/.yo-rc.json:
--------------------------------------------------------------------------------
1 | {
2 | "generator-ecmascript-proposal": {
3 | "promptValues": {
4 | "hasChampion": true,
5 | "championName": "Ron Buckton",
6 | "championGithub": "rbuckton",
7 | "spec": "https://rbuckton.github.io/proposal-regexp-modifiers",
8 | "stage": "0",
9 | "sections": [
10 | "prior-art",
11 | "syntax",
12 | "semantics",
13 | "examples",
14 | "grammar",
15 | "references",
16 | "prior-discussion"
17 | ],
18 | "vscode": true,
19 | "build": "gulp",
20 | "githubWorkflowCI": true,
21 | "githubWorkflowPR": true
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "proposal-regexp-modifiers",
3 | "version": "0.0.0",
4 | "private": true,
5 | "description": "Regular Expression Pattern Modifiers for ECMAScript",
6 | "homepage": "https://github.com/rbuckton/proposal-regexp-modifiers#readme",
7 | "author": {
8 | "name": "Ron Buckton",
9 | "email": "ron.buckton@microsoft.com"
10 | },
11 | "keywords": [
12 | "javascript",
13 | "ecmascript"
14 | ],
15 | "scripts": {
16 | "compile": "gulp build",
17 | "start": "gulp start"
18 | },
19 | "license": "SEE LICENSE IN https://tc39.github.io/ecma262/#sec-copyright-and-software-license",
20 | "devDependencies": {
21 | "@tc39/ecma262-biblio": "^2.0.2322",
22 | "del": "^6.0.0",
23 | "ecmarkup": "^12.1.0",
24 | "gulp": "^4.0.2",
25 | "gulp-emu": "^2.1.0",
26 | "gulp-live-server": "0.0.31",
27 | "gulp-rename": "^2.0.0"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/table-nonbinary-unicode-properties.html:
--------------------------------------------------------------------------------
1 |
4 |
5 |
32 |
6 |
9 |
10 | Property name and aliases
7 | Canonical property name
8 |
11 |
14 | `General_Category`
12 | `General_Category`
13 |
15 |
17 | `gc`
16 |
18 |
21 | `Script`
19 | `Script`
20 |
22 |
24 | `sc`
23 |
25 |
28 | `Script_Extensions`
26 | `Script_Extensions`
27 |
29 |
31 | `scx`
30 |
4 |
5 |
289 |
6 |
9 |
10 | Property value and aliases
7 | Canonical property value
8 |
11 |
14 | `Cased_Letter`
12 | `Cased_Letter`
13 |
15 |
17 | `LC`
16 |
18 |
21 | `Close_Punctuation`
19 | `Close_Punctuation`
20 |
22 |
24 | `Pe`
23 |
25 |
28 | `Connector_Punctuation`
26 | `Connector_Punctuation`
27 |
29 |
31 | `Pc`
30 |
32 |
35 | `Control`
33 | `Control`
34 |
36 |
38 | `Cc`
37 |
39 |
41 | `cntrl`
40 |
42 |
45 | `Currency_Symbol`
43 | `Currency_Symbol`
44 |
46 |
48 | `Sc`
47 |
49 |
52 | `Dash_Punctuation`
50 | `Dash_Punctuation`
51 |
53 |
55 | `Pd`
54 |
56 |
59 | `Decimal_Number`
57 | `Decimal_Number`
58 |
60 |
62 | `Nd`
61 |
63 |
65 | `digit`
64 |
66 |
69 | `Enclosing_Mark`
67 | `Enclosing_Mark`
68 |
70 |
72 | `Me`
71 |
73 |
76 | `Final_Punctuation`
74 | `Final_Punctuation`
75 |
77 |
79 | `Pf`
78 |
80 |
83 | `Format`
81 | `Format`
82 |
84 |
86 | `Cf`
85 |
87 |
90 | `Initial_Punctuation`
88 | `Initial_Punctuation`
89 |
91 |
93 | `Pi`
92 |
94 |
97 | `Letter`
95 | `Letter`
96 |
98 |
100 | `L`
99 |
101 |
104 | `Letter_Number`
102 | `Letter_Number`
103 |
105 |
107 | `Nl`
106 |
108 |
111 | `Line_Separator`
109 | `Line_Separator`
110 |
112 |
114 | `Zl`
113 |
115 |
118 | `Lowercase_Letter`
116 | `Lowercase_Letter`
117 |
119 |
121 | `Ll`
120 |
122 |
125 | `Mark`
123 | `Mark`
124 |
126 |
128 | `M`
127 |
129 |
131 | `Combining_Mark`
130 |
132 |
135 | `Math_Symbol`
133 | `Math_Symbol`
134 |
136 |
138 | `Sm`
137 |
139 |
142 | `Modifier_Letter`
140 | `Modifier_Letter`
141 |
143 |
145 | `Lm`
144 |
146 |
149 | `Modifier_Symbol`
147 | `Modifier_Symbol`
148 |
150 |
152 | `Sk`
151 |
153 |
156 | `Nonspacing_Mark`
154 | `Nonspacing_Mark`
155 |
157 |
159 | `Mn`
158 |
160 |
163 | `Number`
161 | `Number`
162 |
164 |
166 | `N`
165 |
167 |
170 | `Open_Punctuation`
168 | `Open_Punctuation`
169 |
171 |
173 | `Ps`
172 |
174 |
177 | `Other`
175 | `Other`
176 |
178 |
180 | `C`
179 |
181 |
184 | `Other_Letter`
182 | `Other_Letter`
183 |
185 |
187 | `Lo`
186 |
188 |
191 | `Other_Number`
189 | `Other_Number`
190 |
192 |
194 | `No`
193 |
195 |
198 | `Other_Punctuation`
196 | `Other_Punctuation`
197 |
199 |
201 | `Po`
200 |
202 |
205 | `Other_Symbol`
203 | `Other_Symbol`
204 |
206 |
208 | `So`
207 |
209 |
212 | `Paragraph_Separator`
210 | `Paragraph_Separator`
211 |
213 |
215 | `Zp`
214 |
216 |
219 | `Private_Use`
217 | `Private_Use`
218 |
220 |
222 | `Co`
221 |
223 |
226 | `Punctuation`
224 | `Punctuation`
225 |
227 |
229 | `P`
228 |
230 |
232 | `punct`
231 |
233 |
236 | `Separator`
234 | `Separator`
235 |
237 |
239 | `Z`
238 |
240 |
243 | `Space_Separator`
241 | `Space_Separator`
242 |
244 |
246 | `Zs`
245 |
247 |
250 | `Spacing_Mark`
248 | `Spacing_Mark`
249 |
251 |
253 | `Mc`
252 |
254 |
257 | `Surrogate`
255 | `Surrogate`
256 |
258 |
260 | `Cs`
259 |
261 |
264 | `Symbol`
262 | `Symbol`
263 |
265 |
267 | `S`
266 |
268 |
271 | `Titlecase_Letter`
269 | `Titlecase_Letter`
270 |
272 |
274 | `Lt`
273 |
275 |
278 | `Unassigned`
276 | `Unassigned`
277 |
279 |
281 | `Cn`
280 |
282 |
285 | `Uppercase_Letter`
283 | `Uppercase_Letter`
284 |
286 |
288 | `Lu`
287 |
4 |
5 |
358 |
6 |
9 |
10 | Property name and aliases
7 | Canonical property name
8 |
11 |
14 | `ASCII`
12 | `ASCII`
13 |
15 |
18 | `ASCII_Hex_Digit`
16 | `ASCII_Hex_Digit`
17 |
19 |
21 | `AHex`
20 |
22 |
25 | `Alphabetic`
23 | `Alphabetic`
24 |
26 |
28 | `Alpha`
27 |
29 |
32 | `Any`
30 | `Any`
31 |
33 |
36 | `Assigned`
34 | `Assigned`
35 |
37 |
40 | `Bidi_Control`
38 | `Bidi_Control`
39 |
41 |
43 | `Bidi_C`
42 |
44 |
47 | `Bidi_Mirrored`
45 | `Bidi_Mirrored`
46 |
48 |
50 | `Bidi_M`
49 |
51 |
54 | `Case_Ignorable`
52 | `Case_Ignorable`
53 |
55 |
57 | `CI`
56 |
58 |
61 | `Cased`
59 | `Cased`
60 |
62 |
65 | `Changes_When_Casefolded`
63 | `Changes_When_Casefolded`
64 |
66 |
68 | `CWCF`
67 |
69 |
72 | `Changes_When_Casemapped`
70 | `Changes_When_Casemapped`
71 |
73 |
75 | `CWCM`
74 |
76 |
79 | `Changes_When_Lowercased`
77 | `Changes_When_Lowercased`
78 |
80 |
82 | `CWL`
81 |
83 |
86 | `Changes_When_NFKC_Casefolded`
84 | `Changes_When_NFKC_Casefolded`
85 |
87 |
89 | `CWKCF`
88 |
90 |
93 | `Changes_When_Titlecased`
91 | `Changes_When_Titlecased`
92 |
94 |
96 | `CWT`
95 |
97 |
100 | `Changes_When_Uppercased`
98 | `Changes_When_Uppercased`
99 |
101 |
103 | `CWU`
102 |
104 |
107 | `Dash`
105 | `Dash`
106 |
108 |
111 | `Default_Ignorable_Code_Point`
109 | `Default_Ignorable_Code_Point`
110 |
112 |
114 | `DI`
113 |
115 |
118 | `Deprecated`
116 | `Deprecated`
117 |
119 |
121 | `Dep`
120 |
122 |
125 | `Diacritic`
123 | `Diacritic`
124 |
126 |
128 | `Dia`
127 |
129 |
132 | `Emoji`
130 | `Emoji`
131 |
133 |
136 | `Emoji_Component`
134 | `Emoji_Component`
135 |
137 |
139 | `EComp`
138 |
140 |
143 | `Emoji_Modifier`
141 | `Emoji_Modifier`
142 |
144 |
146 | `EMod`
145 |
147 |
150 | `Emoji_Modifier_Base`
148 | `Emoji_Modifier_Base`
149 |
151 |
153 | `EBase`
152 |
154 |
157 | `Emoji_Presentation`
155 | `Emoji_Presentation`
156 |
158 |
160 | `EPres`
159 |
161 |
164 | `Extended_Pictographic`
162 | `Extended_Pictographic`
163 |
165 |
167 | `ExtPict`
166 |
168 |
171 | `Extender`
169 | `Extender`
170 |
172 |
174 | `Ext`
173 |
175 |
178 | `Grapheme_Base`
176 | `Grapheme_Base`
177 |
179 |
181 | `Gr_Base`
180 |
182 |
185 | `Grapheme_Extend`
183 | `Grapheme_Extend`
184 |
186 |
188 | `Gr_Ext`
187 |
189 |
192 | `Hex_Digit`
190 | `Hex_Digit`
191 |
193 |
195 | `Hex`
194 |
196 |
199 | `IDS_Binary_Operator`
197 | `IDS_Binary_Operator`
198 |
200 |
202 | `IDSB`
201 |
203 |
206 | `IDS_Trinary_Operator`
204 | `IDS_Trinary_Operator`
205 |
207 |
209 | `IDST`
208 |
210 |
213 | `ID_Continue`
211 | `ID_Continue`
212 |
214 |
216 | `IDC`
215 |
217 |
220 | `ID_Start`
218 | `ID_Start`
219 |
221 |
223 | `IDS`
222 |
224 |
227 | `Ideographic`
225 | `Ideographic`
226 |
228 |
230 | `Ideo`
229 |
231 |
234 | `Join_Control`
232 | `Join_Control`
233 |
235 |
237 | `Join_C`
236 |
238 |
241 | `Logical_Order_Exception`
239 | `Logical_Order_Exception`
240 |
242 |
244 | `LOE`
243 |
245 |
248 | `Lowercase`
246 | `Lowercase`
247 |
249 |
251 | `Lower`
250 |
252 |
255 | `Math`
253 | `Math`
254 |
256 |
259 | `Noncharacter_Code_Point`
257 | `Noncharacter_Code_Point`
258 |
260 |
262 | `NChar`
261 |
263 |
266 | `Pattern_Syntax`
264 | `Pattern_Syntax`
265 |
267 |
269 | `Pat_Syn`
268 |
270 |
273 | `Pattern_White_Space`
271 | `Pattern_White_Space`
272 |
274 |
276 | `Pat_WS`
275 |
277 |
280 | `Quotation_Mark`
278 | `Quotation_Mark`
279 |
281 |
283 | `QMark`
282 |
284 |
287 | `Radical`
285 | `Radical`
286 |
288 |
291 | `Regional_Indicator`
289 | `Regional_Indicator`
290 |
292 |
294 | `RI`
293 |
295 |
298 | `Sentence_Terminal`
296 | `Sentence_Terminal`
297 |
299 |
301 | `STerm`
300 |
302 |
305 | `Soft_Dotted`
303 | `Soft_Dotted`
304 |
306 |
308 | `SD`
307 |
309 |
312 | `Terminal_Punctuation`
310 | `Terminal_Punctuation`
311 |
313 |
315 | `Term`
314 |
316 |
319 | `Unified_Ideograph`
317 | `Unified_Ideograph`
318 |
320 |
322 | `UIdeo`
321 |
323 |
326 | `Uppercase`
324 | `Uppercase`
325 |
327 |
329 | `Upper`
328 |
330 |
333 | `Variation_Selector`
331 | `Variation_Selector`
332 |
334 |
336 | `VS`
335 |
337 |
340 | `White_Space`
338 | `White_Space`
339 |
341 |
343 | `space`
342 |
344 |
347 | `XID_Continue`
345 | `XID_Continue`
346 |
348 |
350 | `XIDC`
349 |
351 |
354 | `XID_Start`
352 | `XID_Start`
353 |
355 |
357 | `XIDS`
356 |
4 |
5 |
1091 |
6 |
9 |
10 | Property value and aliases
7 | Canonical property value
8 |
11 |
14 | `Adlam`
12 | `Adlam`
13 |
15 |
17 | `Adlm`
16 |
18 |
21 | `Ahom`
19 | `Ahom`
20 |
22 |
25 | `Anatolian_Hieroglyphs`
23 | `Anatolian_Hieroglyphs`
24 |
26 |
28 | `Hluw`
27 |
29 |
32 | `Arabic`
30 | `Arabic`
31 |
33 |
35 | `Arab`
34 |
36 |
39 | `Armenian`
37 | `Armenian`
38 |
40 |
42 | `Armn`
41 |
43 |
46 | `Avestan`
44 | `Avestan`
45 |
47 |
49 | `Avst`
48 |
50 |
53 | `Balinese`
51 | `Balinese`
52 |
54 |
56 | `Bali`
55 |
57 |
60 | `Bamum`
58 | `Bamum`
59 |
61 |
63 | `Bamu`
62 |
64 |
67 | `Bassa_Vah`
65 | `Bassa_Vah`
66 |
68 |
70 | `Bass`
69 |
71 |
74 | `Batak`
72 | `Batak`
73 |
75 |
77 | `Batk`
76 |
78 |
81 | `Bengali`
79 | `Bengali`
80 |
82 |
84 | `Beng`
83 |
85 |
88 | `Bhaiksuki`
86 | `Bhaiksuki`
87 |
89 |
91 | `Bhks`
90 |
92 |
95 | `Bopomofo`
93 | `Bopomofo`
94 |
96 |
98 | `Bopo`
97 |
99 |
102 | `Brahmi`
100 | `Brahmi`
101 |
103 |
105 | `Brah`
104 |
106 |
109 | `Braille`
107 | `Braille`
108 |
110 |
112 | `Brai`
111 |
113 |
116 | `Buginese`
114 | `Buginese`
115 |
117 |
119 | `Bugi`
118 |
120 |
123 | `Buhid`
121 | `Buhid`
122 |
124 |
126 | `Buhd`
125 |
127 |
130 | `Canadian_Aboriginal`
128 | `Canadian_Aboriginal`
129 |
131 |
133 | `Cans`
132 |
134 |
137 | `Carian`
135 | `Carian`
136 |
138 |
140 | `Cari`
139 |
141 |
144 | `Caucasian_Albanian`
142 | `Caucasian_Albanian`
143 |
145 |
147 | `Aghb`
146 |
148 |
151 | `Chakma`
149 | `Chakma`
150 |
152 |
154 | `Cakm`
153 |
155 |
158 | `Cham`
156 | `Cham`
157 |
159 |
162 | `Chorasmian`
160 | `Chorasmian`
161 |
163 |
165 | `Chrs`
164 |
166 |
169 | `Cherokee`
167 | `Cherokee`
168 |
170 |
172 | `Cher`
171 |
173 |
176 | `Common`
174 | `Common`
175 |
177 |
179 | `Zyyy`
178 |
180 |
183 | `Coptic`
181 | `Coptic`
182 |
184 |
186 | `Copt`
185 |
187 |
189 | `Qaac`
188 |
190 |
193 | `Cuneiform`
191 | `Cuneiform`
192 |
194 |
196 | `Xsux`
195 |
197 |
200 | `Cypriot`
198 | `Cypriot`
199 |
201 |
203 | `Cprt`
202 |
204 |
207 | `Cyrillic`
205 | `Cyrillic`
206 |
208 |
210 | `Cyrl`
209 |
211 |
214 | `Deseret`
212 | `Deseret`
213 |
215 |
217 | `Dsrt`
216 |
218 |
221 | `Devanagari`
219 | `Devanagari`
220 |
222 |
224 | `Deva`
223 |
225 |
228 | `Dives_Akuru`
226 | `Dives_Akuru`
227 |
229 |
231 | `Diak`
230 |
232 |
235 | `Dogra`
233 | `Dogra`
234 |
236 |
238 | `Dogr`
237 |
239 |
242 | `Duployan`
240 | `Duployan`
241 |
243 |
245 | `Dupl`
244 |
246 |
249 | `Egyptian_Hieroglyphs`
247 | `Egyptian_Hieroglyphs`
248 |
250 |
252 | `Egyp`
251 |
253 |
256 | `Elbasan`
254 | `Elbasan`
255 |
257 |
259 | `Elba`
258 |
260 |
263 | `Elymaic`
261 | `Elymaic`
262 |
264 |
266 | `Elym`
265 |
267 |
270 | `Ethiopic`
268 | `Ethiopic`
269 |
271 |
273 | `Ethi`
272 |
274 |
277 | `Georgian`
275 | `Georgian`
276 |
278 |
280 | `Geor`
279 |
281 |
284 | `Glagolitic`
282 | `Glagolitic`
283 |
285 |
287 | `Glag`
286 |
288 |
291 | `Gothic`
289 | `Gothic`
290 |
292 |
294 | `Goth`
293 |
295 |
298 | `Grantha`
296 | `Grantha`
297 |
299 |
301 | `Gran`
300 |
302 |
305 | `Greek`
303 | `Greek`
304 |
306 |
308 | `Grek`
307 |
309 |
312 | `Gujarati`
310 | `Gujarati`
311 |
313 |
315 | `Gujr`
314 |
316 |
319 | `Gunjala_Gondi`
317 | `Gunjala_Gondi`
318 |
320 |
322 | `Gong`
321 |
323 |
326 | `Gurmukhi`
324 | `Gurmukhi`
325 |
327 |
329 | `Guru`
328 |
330 |
333 | `Han`
331 | `Han`
332 |
334 |
336 | `Hani`
335 |
337 |
340 | `Hangul`
338 | `Hangul`
339 |
341 |
343 | `Hang`
342 |
344 |
347 | `Hanifi_Rohingya`
345 | `Hanifi_Rohingya`
346 |
348 |
350 | `Rohg`
349 |
351 |
354 | `Hanunoo`
352 | `Hanunoo`
353 |
355 |
357 | `Hano`
356 |
358 |
361 | `Hatran`
359 | `Hatran`
360 |
362 |
364 | `Hatr`
363 |
365 |
368 | `Hebrew`
366 | `Hebrew`
367 |
369 |
371 | `Hebr`
370 |
372 |
375 | `Hiragana`
373 | `Hiragana`
374 |
376 |
378 | `Hira`
377 |
379 |
382 | `Imperial_Aramaic`
380 | `Imperial_Aramaic`
381 |
383 |
385 | `Armi`
384 |
386 |
389 | `Inherited`
387 | `Inherited`
388 |
390 |
392 | `Zinh`
391 |
393 |
395 | `Qaai`
394 |
396 |
399 | `Inscriptional_Pahlavi`
397 | `Inscriptional_Pahlavi`
398 |
400 |
402 | `Phli`
401 |
403 |
406 | `Inscriptional_Parthian`
404 | `Inscriptional_Parthian`
405 |
407 |
409 | `Prti`
408 |
410 |
413 | `Javanese`
411 | `Javanese`
412 |
414 |
416 | `Java`
415 |
417 |
420 | `Kaithi`
418 | `Kaithi`
419 |
421 |
423 | `Kthi`
422 |
424 |
427 | `Kannada`
425 | `Kannada`
426 |
428 |
430 | `Knda`
429 |
431 |
434 | `Katakana`
432 | `Katakana`
433 |
435 |
437 | `Kana`
436 |
438 |
441 | `Kayah_Li`
439 | `Kayah_Li`
440 |
442 |
444 | `Kali`
443 |
445 |
448 | `Kharoshthi`
446 | `Kharoshthi`
447 |
449 |
451 | `Khar`
450 |
452 |
455 | `Khitan_Small_Script`
453 | `Khitan_Small_Script`
454 |
456 |
458 | `Kits`
457 |
459 |
462 | `Khmer`
460 | `Khmer`
461 |
463 |
465 | `Khmr`
464 |
466 |
469 | `Khojki`
467 | `Khojki`
468 |
470 |
472 | `Khoj`
471 |
473 |
476 | `Khudawadi`
474 | `Khudawadi`
475 |
477 |
479 | `Sind`
478 |
480 |
483 | `Lao`
481 | `Lao`
482 |
484 |
486 | `Laoo`
485 |
487 |
490 | `Latin`
488 | `Latin`
489 |
491 |
493 | `Latn`
492 |
494 |
497 | `Lepcha`
495 | `Lepcha`
496 |
498 |
500 | `Lepc`
499 |
501 |
504 | `Limbu`
502 | `Limbu`
503 |
505 |
507 | `Limb`
506 |
508 |
511 | `Linear_A`
509 | `Linear_A`
510 |
512 |
514 | `Lina`
513 |
515 |
518 | `Linear_B`
516 | `Linear_B`
517 |
519 |
521 | `Linb`
520 |
522 |
525 | `Lisu`
523 | `Lisu`
524 |
526 |
529 | `Lycian`
527 | `Lycian`
528 |
530 |
532 | `Lyci`
531 |
533 |
536 | `Lydian`
534 | `Lydian`
535 |
537 |
539 | `Lydi`
538 |
540 |
543 | `Mahajani`
541 | `Mahajani`
542 |
544 |
546 | `Mahj`
545 |
547 |
550 | `Makasar`
548 | `Makasar`
549 |
551 |
553 | `Maka`
552 |
554 |
557 | `Malayalam`
555 | `Malayalam`
556 |
558 |
560 | `Mlym`
559 |
561 |
564 | `Mandaic`
562 | `Mandaic`
563 |
565 |
567 | `Mand`
566 |
568 |
571 | `Manichaean`
569 | `Manichaean`
570 |
572 |
574 | `Mani`
573 |
575 |
578 | `Marchen`
576 | `Marchen`
577 |
579 |
581 | `Marc`
580 |
582 |
585 | `Medefaidrin`
583 | `Medefaidrin`
584 |
586 |
588 | `Medf`
587 |
589 |
592 | `Masaram_Gondi`
590 | `Masaram_Gondi`
591 |
593 |
595 | `Gonm`
594 |
596 |
599 | `Meetei_Mayek`
597 | `Meetei_Mayek`
598 |
600 |
602 | `Mtei`
601 |
603 |
606 | `Mende_Kikakui`
604 | `Mende_Kikakui`
605 |
607 |
609 | `Mend`
608 |
610 |
613 | `Meroitic_Cursive`
611 | `Meroitic_Cursive`
612 |
614 |
616 | `Merc`
615 |
617 |
620 | `Meroitic_Hieroglyphs`
618 | `Meroitic_Hieroglyphs`
619 |
621 |
623 | `Mero`
622 |
624 |
627 | `Miao`
625 | `Miao`
626 |
628 |
630 | `Plrd`
629 |
631 |
634 | `Modi`
632 | `Modi`
633 |
635 |
638 | `Mongolian`
636 | `Mongolian`
637 |
639 |
641 | `Mong`
640 |
642 |
645 | `Mro`
643 | `Mro`
644 |
646 |
648 | `Mroo`
647 |
649 |
652 | `Multani`
650 | `Multani`
651 |
653 |
655 | `Mult`
654 |
656 |
659 | `Myanmar`
657 | `Myanmar`
658 |
660 |
662 | `Mymr`
661 |
663 |
666 | `Nabataean`
664 | `Nabataean`
665 |
667 |
669 | `Nbat`
668 |
670 |
673 | `Nandinagari`
671 | `Nandinagari`
672 |
674 |
676 | `Nand`
675 |
677 |
680 | `New_Tai_Lue`
678 | `New_Tai_Lue`
679 |
681 |
683 | `Talu`
682 |
684 |
687 | `Newa`
685 | `Newa`
686 |
688 |
691 | `Nko`
689 | `Nko`
690 |
692 |
694 | `Nkoo`
693 |
695 |
698 | `Nushu`
696 | `Nushu`
697 |
699 |
701 | `Nshu`
700 |
702 |
705 | `Nyiakeng_Puachue_Hmong`
703 | `Nyiakeng_Puachue_Hmong`
704 |
706 |
708 | `Hmnp`
707 |
709 |
712 | `Ogham`
710 | `Ogham`
711 |
713 |
715 | `Ogam`
714 |
716 |
719 | `Ol_Chiki`
717 | `Ol_Chiki`
718 |
720 |
722 | `Olck`
721 |
723 |
726 | `Old_Hungarian`
724 | `Old_Hungarian`
725 |
727 |
729 | `Hung`
728 |
730 |
733 | `Old_Italic`
731 | `Old_Italic`
732 |
734 |
736 | `Ital`
735 |
737 |
740 | `Old_North_Arabian`
738 | `Old_North_Arabian`
739 |
741 |
743 | `Narb`
742 |
744 |
747 | `Old_Permic`
745 | `Old_Permic`
746 |
748 |
750 | `Perm`
749 |
751 |
754 | `Old_Persian`
752 | `Old_Persian`
753 |
755 |
757 | `Xpeo`
756 |
758 |
761 | `Old_Sogdian`
759 | `Old_Sogdian`
760 |
762 |
764 | `Sogo`
763 |
765 |
768 | `Old_South_Arabian`
766 | `Old_South_Arabian`
767 |
769 |
771 | `Sarb`
770 |
772 |
775 | `Old_Turkic`
773 | `Old_Turkic`
774 |
776 |
778 | `Orkh`
777 |
779 |
782 | `Oriya`
780 | `Oriya`
781 |
783 |
785 | `Orya`
784 |
786 |
789 | `Osage`
787 | `Osage`
788 |
790 |
792 | `Osge`
791 |
793 |
796 | `Osmanya`
794 | `Osmanya`
795 |
797 |
799 | `Osma`
798 |
800 |
803 | `Pahawh_Hmong`
801 | `Pahawh_Hmong`
802 |
804 |
806 | `Hmng`
805 |
807 |
810 | `Palmyrene`
808 | `Palmyrene`
809 |
811 |
813 | `Palm`
812 |
814 |
817 | `Pau_Cin_Hau`
815 | `Pau_Cin_Hau`
816 |
818 |
820 | `Pauc`
819 |
821 |
824 | `Phags_Pa`
822 | `Phags_Pa`
823 |
825 |
827 | `Phag`
826 |
828 |
831 | `Phoenician`
829 | `Phoenician`
830 |
832 |
834 | `Phnx`
833 |
835 |
838 | `Psalter_Pahlavi`
836 | `Psalter_Pahlavi`
837 |
839 |
841 | `Phlp`
840 |
842 |
845 | `Rejang`
843 | `Rejang`
844 |
846 |
848 | `Rjng`
847 |
849 |
852 | `Runic`
850 | `Runic`
851 |
853 |
855 | `Runr`
854 |
856 |
859 | `Samaritan`
857 | `Samaritan`
858 |
860 |
862 | `Samr`
861 |
863 |
866 | `Saurashtra`
864 | `Saurashtra`
865 |
867 |
869 | `Saur`
868 |
870 |
873 | `Sharada`
871 | `Sharada`
872 |
874 |
876 | `Shrd`
875 |
877 |
880 | `Shavian`
878 | `Shavian`
879 |
881 |
883 | `Shaw`
882 |
884 |
887 | `Siddham`
885 | `Siddham`
886 |
888 |
890 | `Sidd`
889 |
891 |
894 | `SignWriting`
892 | `SignWriting`
893 |
895 |
897 | `Sgnw`
896 |
898 |
901 | `Sinhala`
899 | `Sinhala`
900 |
902 |
904 | `Sinh`
903 |
905 |
908 | `Sogdian`
906 | `Sogdian`
907 |
909 |
911 | `Sogd`
910 |
912 |
915 | `Sora_Sompeng`
913 | `Sora_Sompeng`
914 |
916 |
918 | `Sora`
917 |
919 |
922 | `Soyombo`
920 | `Soyombo`
921 |
923 |
925 | `Soyo`
924 |
926 |
929 | `Sundanese`
927 | `Sundanese`
928 |
930 |
932 | `Sund`
931 |
933 |
936 | `Syloti_Nagri`
934 | `Syloti_Nagri`
935 |
937 |
939 | `Sylo`
938 |
940 |
943 | `Syriac`
941 | `Syriac`
942 |
944 |
946 | `Syrc`
945 |
947 |
950 | `Tagalog`
948 | `Tagalog`
949 |
951 |
953 | `Tglg`
952 |
954 |
957 | `Tagbanwa`
955 | `Tagbanwa`
956 |
958 |
960 | `Tagb`
959 |
961 |
964 | `Tai_Le`
962 | `Tai_Le`
963 |
965 |
967 | `Tale`
966 |
968 |
971 | `Tai_Tham`
969 | `Tai_Tham`
970 |
972 |
974 | `Lana`
973 |
975 |
978 | `Tai_Viet`
976 | `Tai_Viet`
977 |
979 |
981 | `Tavt`
980 |
982 |
985 | `Takri`
983 | `Takri`
984 |
986 |
988 | `Takr`
987 |
989 |
992 | `Tamil`
990 | `Tamil`
991 |
993 |
995 | `Taml`
994 |
996 |
999 | `Tangut`
997 | `Tangut`
998 |
1000 |
1002 | `Tang`
1001 |
1003 |
1006 | `Telugu`
1004 | `Telugu`
1005 |
1007 |
1009 | `Telu`
1008 |
1010 |
1013 | `Thaana`
1011 | `Thaana`
1012 |
1014 |
1016 | `Thaa`
1015 |
1017 |
1020 | `Thai`
1018 | `Thai`
1019 |
1021 |
1024 | `Tibetan`
1022 | `Tibetan`
1023 |
1025 |
1027 | `Tibt`
1026 |
1028 |
1031 | `Tifinagh`
1029 | `Tifinagh`
1030 |
1032 |
1034 | `Tfng`
1033 |
1035 |
1038 | `Tirhuta`
1036 | `Tirhuta`
1037 |
1039 |
1041 | `Tirh`
1040 |
1042 |
1045 | `Ugaritic`
1043 | `Ugaritic`
1044 |
1046 |
1048 | `Ugar`
1047 |
1049 |
1052 | `Vai`
1050 | `Vai`
1051 |
1053 |
1055 | `Vaii`
1054 |
1056 |
1059 | `Wancho`
1057 | `Wancho`
1058 |
1060 |
1062 | `Wcho`
1061 |
1063 |
1066 | `Warang_Citi`
1064 | `Warang_Citi`
1065 |
1067 |
1069 | `Wara`
1068 |
1070 |
1073 | `Yezidi`
1071 | `Yezidi`
1072 |
1074 |
1076 | `Yezi`
1075 |
1077 |
1080 | `Yi`
1078 | `Yi`
1079 |
1081 |
1083 | `Yiii`
1082 |
1084 |
1087 | `Zanabazar_Square`
1085 | `Zanabazar_Square`
1086 |
1088 |
1090 | `Zanb`
1089 |
6 | title: Regular Expression Pattern Modifiers for ECMAScript 7 | stage: 3 8 | contributors: Ron Buckton, Ecma International 9 |10 | 11 |
See the proposal repository for background material and discussion.
16 |A RegExp object contains a regular expression and the associated flags.
24 |The form and functionality of regular expressions is modelled after the regular expression facility in the Perl 5 programming language.
26 |The RegExp constructor applies the following grammar to the input pattern String. An error occurs if the grammar cannot interpret the String as an expansion of |Pattern|.
31 |Each `\\u` |HexTrailSurrogate| for which the choice of associated `u` |HexLeadSurrogate| is ambiguous shall be associated with the nearest possible `u` |HexLeadSurrogate| that would otherwise have no corresponding `\\u` |HexTrailSurrogate|.
144 |A number of productions in this section are given alternative definitions in section
The descriptions below use the following aliases:
242 |Furthermore, the descriptions below use the following internal data structures:
269 |A Modifiers Record is a Record value used to encapsulate information about the regular expression flags that apply to a subpattern.
307 |Modifiers Records have the fields listed in
| Field Name | 312 |Value | 313 |Meaning | 314 |
|---|---|---|
| [[DotAll]] | 317 |a Boolean | 318 |Indicates whether the *"s"* flag is currently enabled. | 319 |
| [[IgnoreCase]] | 322 |a Boolean | 323 |Indicates whether the *"i"* flag is currently enabled. | 324 |
| [[Multiline]] | 327 |a Boolean | 328 |Indicates whether the *"m"* flag is currently enabled. | 329 |
A Pattern compiles to an Abstract Closure value. RegExpBuiltinExec can then apply this procedure to a String and an offset within the String to determine whether the pattern would match starting at exactly that offset within the String, and, if it does match, what the values of the capturing parentheses would be. The algorithms in
This section is amended in B.1.2.4.
374 |The `|` regular expression operator separates two alternatives. The pattern first tries to match the left |Alternative| (followed by the sequel of the regular expression); if it fails, it tries to match the right |Disjunction| (followed by the sequel of the regular expression). If the left |Alternative|, the right |Disjunction|, and the sequel all have choice points, all choices in the sequel are tried before moving on to the next choice in the left |Alternative|. If choices in the left |Alternative| are exhausted, the right |Disjunction| is tried instead of the left |Alternative|. Any capturing parentheses inside a portion of the pattern skipped by `|` produce *undefined* values instead of Strings. Thus, for example,
390 |/a|ab/.exec("abc")
391 | returns the result *"a"* and not *"ab"*. Moreover,
392 |/((a)|(ab))((c)|(bc))/.exec("abc")
393 | returns the array
394 |["abc", "a", "a", undefined, "bc", undefined, "bc"]
395 | and not
396 |["abc", "ab", undefined, "ab", "c", "c", undefined]
397 | The order in which the two alternatives are tried is independent of the value of _direction_.
398 |Consecutive |Term|s try to simultaneously match consecutive portions of _Input_. When _direction_ is ~forward~, if the left |Alternative|, the right |Term|, and the sequel of the regular expression all have choice points, all choices in the sequel are tried before moving on to the next choice in the right |Term|, and all choices in the right |Term| are tried before moving on to the next choice in the left |Alternative|. When _direction_ is ~backward~, the evaluation order of |Alternative| and |Term| are reversed.
432 |The resulting Matcher is independent of _direction_.
441 |This section is amended in B.1.2.5.
470 |Even when the `y` flag is used with a pattern, `^` always matches only at the beginning of _Input_, or (if _Multiline__modifiers_.[[Multiline]] is *true*) at the beginning of a line.
This section is amended in B.1.2.6.
607 |An escape sequence of the form `\\` followed by a non-zero decimal number _n_ matches the result of the _n_th set of capturing parentheses (
Parentheses of the form `(` |Disjunction| `)` serve both to group the components of the |Disjunction| pattern together and to save the result of the match. The result can be used either in a backreference (`\\` followed by a non-zero decimal number), referenced in a replace String, or returned as part of an array from the regular expression matching Abstract Closure. To inhibit the capturing behaviour of parentheses, use the form `(?:` |Disjunction| `)` instead.
793 |The form `(?=` |Disjunction| `)` specifies a zero-width positive lookahead. In order for it to succeed, the pattern inside |Disjunction| must match at the current position, but the current position is not advanced before matching the sequel. If |Disjunction| can match at the current position in several ways, only the first one is tried. Unlike other regular expression operators, there is no backtracking into a `(?=` form (this unusual behaviour is inherited from Perl). This only matters when the |Disjunction| contains capturing parentheses and the sequel of the pattern contains backreferences to those captures.
796 |For example,
797 |/(?=(a+))/.exec("baaabac")
798 | matches the empty String immediately after the first `b` and therefore returns the array:
799 |["", "aaa"]
800 | To illustrate the lack of backtracking into the lookahead, consider:
801 |/(?=(a+))a*b\1/.exec("baaabac")
802 | This expression returns
803 |["aba", "a"]
804 | and not:
805 |["aaaba", "a"]
806 | The form `(?!` |Disjunction| `)` specifies a zero-width negative lookahead. In order for it to succeed, the pattern inside |Disjunction| must fail to match at the current position. The current position is not advanced before matching the sequel. |Disjunction| can contain capturing parentheses, but backreferences to them only make sense from within |Disjunction| itself. Backreferences to these capturing parentheses from elsewhere in the pattern always return *undefined* because the negative lookahead must fail for the pattern to succeed. For example,
809 |/(.*?)a(?!(a+)b\2c)\2(.*)/.exec("baaabaac")
810 | looks for an `a` not immediately followed by some positive number n of `a`'s, a `b`, another n `a`'s (specified by the first `\\2`) and a `c`. The second `\\2` is outside the negative lookahead, so it matches against *undefined* and therefore always succeeds. The whole expression returns the array:
811 |["baaabaac", "ba", undefined, "abaac"]
812 | In case-insignificant matches when _Unicode_ is *true*, all characters are implicitly case-folded using the simple mapping provided by the Unicode standard immediately before they are compared. The simple mapping always maps to a single code point, so it does not map, for example, `ß` (U+00DF) to `SS`. It may however map a code point outside the Basic Latin range to a character within, for example, `ſ` (U+017F) to `s`. Such characters are not mapped if _Unicode_ is *false*. This prevents Unicode code points such as U+017F and U+212A from matching regular expressions such as `/[a-z]/i`, but they will match `/[a-z]/ui`.
815 |This section is amended in
|ClassRanges| can expand into a single |ClassAtom| and/or ranges of two |ClassAtom| separated by dashes. In the latter case the |ClassRanges| includes all characters between the first |ClassAtom| and the second |ClassAtom|, inclusive; an error occurs if either |ClassAtom| does not represent a single character (for example, if one is \w) or if the first |ClassAtom|'s character value is greater than the second |ClassAtom|'s character value.
866 |Even if the pattern ignores case, the case of the two ends of a range is significant in determining which characters belong to the range. Thus, for example, the pattern `/[E-F]/i` matches only the letters `E`, `F`, `e`, and `f`, while the pattern `/[E-f]/i` matches all upper and lower-case letters in the Unicode Basic Latin block as well as the symbols `[`, `\\`, `]`, `^`, `_`, and `.
A `-` character can be treated literally or it can denote a range. It is treated literally if it is the first or last character of |ClassRanges|, the beginning or end limit of a range specification, or immediately follows a range specification.
872 |A |ClassAtom| can use any of the escape sequences that are allowed in the rest of the regular expression except for `\\b`, `\\B`, and backreferences. Inside a |CharacterClass|, `\\b` means the backspace character, while `\\B` and backreferences raise errors. Using a backreference inside a |ClassAtom| causes an error.
901 |The syntax of
This alternative pattern grammar and semantics only changes the syntax and semantics of BMP patterns. The following grammar extensions include productions parameterized with the [UnicodeMode] parameter. However, none of these extensions change the syntax of Unicode patterns recognized when parsing with the [UnicodeMode] parameter present on the goal symbol.
1016 |When the same left-hand sides occurs with both [+UnicodeMode] and [\~UnicodeMode] guards it is to control the disambiguation priority.
1105 |