├── .gitignore
├── tests
├── runtests
├── RegExpTest.js
├── NFATest.js
├── parseTest.js
├── KitTest.js
└── testData.js
├── README.md
├── LICENSE
├── src
├── NFA.js
├── Kit.js
├── RegExp.js
├── visualize.js
└── parse.js
└── index.html
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 |
--------------------------------------------------------------------------------
/tests/runtests:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | for i in *.js;do
4 | node "$i";
5 | done;
6 |
--------------------------------------------------------------------------------
/tests/RegExpTest.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['../src/Kit','../src/RegExp','./testData','assert'],function (K,MyRegExp,testData,assert) {
3 | var reMatchCases=testData.reMatchCases;
4 |
5 |
6 | reMatchCases.forEach(function (c) {
7 | var re=c[0],strings=typeof c[1]==='string'?[c[1]]:c[1];
8 | var myRe=new MyRegExp(re.source,re);
9 | strings.forEach(function (s) {
10 | var result=re.exec(s),myResult=myRe.exec(s);
11 | try {
12 | assert.deepEqual(myResult,result,re);
13 | } catch(e) {
14 | re.debug=true;
15 | myRe=new MyRegExp(re.source,re);
16 | myResult=myRe.exec(s);
17 | K.log(re,myResult,result);
18 | throw e;
19 | }
20 | });
21 |
22 | });
23 |
24 | console.log('RegExp Test OK');
25 |
26 | });
27 |
28 |
--------------------------------------------------------------------------------
/tests/NFATest.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['../src/Kit','../src/NFA','assert'],function (K,NFA,assert) {
3 |
4 | testNFA();
5 |
6 | console.log('NFA Test OK');
7 | function testNFA() {
8 | var a=NFA({
9 | compact:true,accepts:'start',
10 | trans:[
11 | ['start>start','0369'],['start>q1','147'],['start>q2','258'],
12 | ['q1>q1','0369'],['q1>q2','147'],['q1>start','258'],
13 | ['q2>q2','0369'],['q2>q1','258'],['q2>start','147'],
14 | ]
15 | });
16 | var result,i,n;
17 | ['','0','00','000','012','03','3','6','9','12'].forEach(function (n) {
18 | assert.ok(a.input(n).acceptable);
19 | });
20 |
21 | i=500;nums=[];
22 | while (i--) {
23 | n=Math.ceil(Math.random()*1E15)*3 ;
24 | n=K.repeats(n+"",10);
25 | assert.ok(a.input(n).acceptable,n);
26 | assert.ifError(a.input(n+1).acceptable);
27 | assert.ifError(a.input(n+2).acceptable);
28 | }
29 |
30 | }
31 |
32 |
33 |
34 |
35 | });
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Regulex
2 | =======
3 |
4 | JavaScript Regular Expression Parser & Visualizer.
5 |
6 | Visualizer : http://jex.im/regulex/
7 |
8 | ###Features:
9 | - Written in pure JavaScript. No backend needed.
10 | - You can embed the graph in you own site through html iframe element.
11 | - Detailed error message. In most cases it can point out the precise syntax error position.
12 | - No support for octal escape. Yes it is a feature. ECMAScript strict mode doesn't support octal escape in string,but many browsers still support octal escape in regex. I make things easier. In regulex, DecimalEscape will always be treated as back reference. If the back reference is invalid, e.g. `/\1/`、`/(\1)/`、`/(a)\2/`,or DecimalEscape appears in charset(because in this case it can't be explained as back reference, e.g. `/(ab)[\1]/`). Regulex will always throw an error.
13 |
14 |
15 |
16 |
17 | API:
18 | ```javascript
19 | var parse = require('regulex/parse');
20 | var re = /var\s+([a-zA-Z_]\w*);/ ;
21 | console.log(parse(re));
22 | ```
23 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2014 Jex
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/tests/parseTest.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['../src/parse','../src/Kit','./testData','assert'],function (parse,K,testData,assert) {
3 | var expectedPass=testData.expectedPass;
4 | var expectedFail=testData.expectedFail;
5 | var re2ast=testData.re2ast;
6 |
7 | parse.getNFAParser().assertDFA();
8 |
9 | testSyntax();
10 | testAST();
11 | console.log('Parse Test OK');
12 |
13 | function testAST() {
14 | re2ast.forEach(function (ast) {
15 | try {
16 | assert.deepEqual(parse(ast.raw),ast);
17 | } catch(e) {
18 | if (e instanceof assert.AssertionError) {
19 | K.log(parse(ast.raw));
20 | K.log(ast);
21 | }
22 | throw e;
23 | }
24 | })
25 | return;
26 | }
27 |
28 | function testSyntax() {
29 | expectedPass.forEach(function (v) {
30 | var ast;
31 | try {
32 | ast=parse(v);
33 | } catch(e) {
34 | if (e instanceof parse.RegexSyntaxError) {
35 | console.log(e.message);
36 | console.log(v);
37 | console.log(K.repeats(" ",e.lastIndex)+"^");
38 | K.log(e);
39 | parse(v,true);
40 | } else {
41 | K.log(v);
42 | parse(v,true);
43 | }
44 | throw e;
45 | }
46 | });
47 |
48 | expectedFail.forEach(function (v) {
49 | var ast;
50 | try {
51 | ast=parse(v);
52 | console.error("Expected to fail but passed!");
53 | K.log(v);
54 | ast=parse(v,true);
55 | K.log(ast);
56 | } catch (e) {
57 | if (e instanceof parse.RegexSyntaxError) {
58 | return true;
59 | }
60 | throw e;
61 | }
62 | });
63 | }
64 |
65 |
66 |
67 |
68 |
69 | });
70 |
71 |
--------------------------------------------------------------------------------
/tests/KitTest.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['../src/Kit','assert'],function (K,assert) {
3 |
4 | testCoalesce();
5 | testIdUnique();
6 | testKSet();
7 | testClassify();
8 | testParseCharset();
9 | testHashUnique();
10 |
11 | console.log('Kit Test OK');
12 |
13 | function testCoalesce() {
14 | var ranges,results;
15 |
16 | ranges=K.classify(['az','AZ','09']).ranges;
17 | results=K.coalesce(ranges);
18 | assert.deepEqual(results,ranges);
19 |
20 | ranges=K.classify(['az','ez','z','a']).ranges;
21 | results=K.coalesce(ranges);
22 | assert.deepEqual(results,['az']);
23 |
24 | ranges=K.classify(['Aa','AZ','az']).ranges;
25 | results=K.coalesce(ranges);
26 | assert.deepEqual(results,['Az']);
27 |
28 | ranges=K.classify(K.negate(['Aa','az'])).ranges;
29 | results=K.coalesce(ranges);
30 | assert.deepEqual(results,K.negate(['Az']));
31 | }
32 |
33 | function testIdUnique() {
34 | var a=[console,testIdUnique,testKSet,testClassify,testParseCharset,this];
35 | var b=K.idUnique(a.concat(a));
36 | assert.ok(b.length===a.length);
37 | }
38 |
39 | function testHashUnique() {
40 | var a=[],i=100,min=K.ord('A'),max=K.ord('Z'),c,hash={};
41 | while (i--) {
42 | c=K.chr(Math.random()*(max-min)+min);
43 | a.push(c);
44 | hash[c]=1;
45 | }
46 | var expected=Object.keys(hash);//what? really?
47 | assert.deepEqual(K.hashUnique(a),expected);
48 | }
49 |
50 | function testKSet() {
51 | var n=200;
52 | for (var i=0;i','@Z',']','_z','}\uffff' ];
184 | assert.deepEqual(ranges,expected);
185 |
186 | c='^acdf';
187 | ranges=parseCharset(c);
188 | expected=[ '\u0000`', 'b','e', 'g\uffff' ];
189 | assert.deepEqual(ranges,expected);
190 | }
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 | });
199 |
--------------------------------------------------------------------------------
/src/NFA.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['./Kit'],function (K) {
3 |
4 | /**
5 | A Naive NFA Implementation
6 |
7 | Start state is always named 'start'
8 | @param {NFAConfig|CompactNFAConfig} a
9 | type NFAConfig = {compact:false,accepts:StateSet,trans:[Transition]}
10 | type State = String
11 | type StateSet = [State]
12 | type Tranisition = {from:StateSet,to:StateSet,charset:Charset,action:Action,assert:Assert}
13 | type Charset = String|[Range]
14 | Charset is similar to regex charset,supports negation and range but metacharacters
15 | Examples:
16 | includes: 'abc0-9','[^]'
17 | excludes: '^c-z0-9','^a^' //excluded 'a' and '^' two chars
18 | any char: '\0-\uffff'
19 | Or set charset to processed disjoint ranges:['ac','d','eh']
20 | Set `charset` to empty string to enable empty move(ε-moves).
21 |
22 | Action:
23 | Function(stack:Array,c:String,i:Int,state:String,inputs:String):Array
24 | stack: storage stack
25 | c: current char
26 | i: current index
27 | state: current state
28 | inputs: whole input string
29 | Optional return new stack
30 |
31 | Only eMove transition allow `assert`
32 | Actions and Asserts of eMove transition always execute before non-eMove transitions on current path.
33 | Assert:
34 | Function(stack:Array,c:String,i:Int,state:String,inputs:String):Boolean
35 | Return True if assertion just success,if fail return false
36 | If success and need skip num chars,
37 | return the Int count to increase `i`,this feature is designed for backref.
38 |
39 | Stack modifications in action only allow shift,unshift and return new stack.
40 |
41 | NFAConfig example used to recognize numbers:{
42 | compact:false,accepts:'start'.
43 | trans:[{from:'start',to:'start',charset:'0-9'}]
44 | }
45 |
46 | CompactNFAConfig example,see `structure` function.
47 | An automaton used to recognize triples:{
48 | compact:true,accepts:'start',
49 | trans:[
50 | ['start>start','0369'],['start>q1','147'],['start>q2','258'],
51 | ['q1>q1','0369'],['q1>q2','147'],['q1>start','258'],
52 | ['q2>q2','0369'],['q2>q1','258'],['q2>start','147'],
53 | ]
54 | };
55 |
56 | @return {
57 | input:Function
58 | }
59 | */
60 | function NFA(a,_debug) {
61 | this._debug=_debug;
62 | a=a.compact?structure(a):a;
63 | var accepts={},i,trans=a.trans,
64 | // FMap={toState:Function}
65 | router={/*
66 | fromState : {
67 | eMove:[{to:State,action:Function,assert:Function,eMove:Bool}],
68 | eMoveStates:[State],// ε-move dest states
69 | charMove:{
70 | // expanded to include eMove
71 | Range:[{to:State,action:Function,assert:Function,eMove:Bool}],
72 | Char:[{to:State,action:Function,assert:Function,eMove:Bool}]
73 | },
74 | ranges:Set([Range]),
75 | // all trans keep original order in transitions list
76 | trans:[Transition]
77 | }
78 | */};
79 |
80 | for (i=0,n=a.accepts.length;i1) {
149 | throw new Error("DFA Assertion Fail!\nFrom state `"+fromStates[i]+"` can goto to multi ε-move states!");
150 | }
151 | var charMove=path.charMove;
152 | var ranges=Object.keys(charMove);
153 | for (var k=0,n=ranges.length;k"+t.to);
214 | if (j===n-1) {
215 | startIndex+=advanceIndex;
216 | fromState=t.to;
217 | continue recur; // Human flesh tail call optimize?
218 | } else {
219 | ret=_input(s,startIndex+advanceIndex,t.to,stack,lastIndex);
220 | }
221 | if (ret.acceptable) return ret;
222 | lastResult=ret;
223 | }
224 | if (lastResult) return lastResult;
225 | break;
226 | } while (true);
227 |
228 | return {
229 | stack:stack,lastIndex:lastIndex,lastState:fromState,
230 | acceptable:_this.accept(fromState)
231 | };
232 | }
233 | }
234 |
235 |
236 |
237 | /** ε-closure
238 | return closureMap {fromState:[toState]}
239 | eMoveMap = {fromState:{to:[State]}}
240 | */
241 | function eClosure(eMoves,eMoveMap) {
242 | var closureMap={};
243 | eMoves.forEach(function (state) { // FK forEach pass extra args
244 | closure(state);
245 | });
246 | return closureMap;
247 |
248 | function closure(state,_chain) {
249 | if (closureMap.hasOwnProperty(state)) return closureMap[state];
250 | if (!eMoveMap.hasOwnProperty(state)) return false;
251 | _chain=_chain||[state];
252 | var dest=eMoveMap[state],
253 | queue=dest.to.slice(),
254 | toStates=[state],s,clos;
255 | while (queue.length) {
256 | s=queue.shift();
257 | if (~_chain.indexOf(s)) {
258 | throw new Error("Recursive ε-move:"+_chain.join(">")+">"+s+"!");
259 | }
260 | clos=closure(s,_chain);
261 | if (clos) queue=clos.slice(1).concat(queue);
262 | toStates.push(s);
263 | }
264 | return closureMap[state]=toStates;
265 | }
266 | }
267 |
268 |
269 | function findRange(ranges,c/*:Char*/) {
270 | var i=ranges.indexOf(c,cmpRange);
271 | if (!~i) return false;
272 | return ranges[i];
273 | }
274 |
275 | function cmpRange(c,rg) {
276 | var head=rg[0],tail=rg[1];
277 | if (c>tail) return 1;
278 | if (c"+ToStateSet.join(",")
289 | */
290 | function structure(a) {
291 | a.accepts=a.accepts.split(',');
292 | var ts=a.trans,
293 | i=ts.length,t,s,from,to;
294 | while (i--) {
295 | t=ts[i];
296 | s=t[0].split('>');
297 | from=s[0].split(',');
298 | to=s[1].split(',');
299 | ts[i]={from:from,to:to,charset:t[1],action:t[2],assert:t[3]};
300 | }
301 | a.compact=false;
302 | return a;
303 | }
304 |
305 |
306 | return NFA;
307 |
308 |
309 | });
310 |
--------------------------------------------------------------------------------
/src/Kit.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(function () {
3 | /*Kit*/
4 |
5 | var AP=Array.prototype,
6 | slice=AP.slice,
7 | isBrowser=(function () {
8 | return this.toString()==="[object Window]";
9 | })();
10 |
11 |
12 | /**
13 | Build sorted Set from array.
14 | This function will corrupt the original array
15 | Proper usage:a=Set(a);
16 | @param {ArrayLike} a
17 | @return {Set} return new ArrayLike Set
18 | */
19 | function Set(a,_sorted) {
20 | if (a._Set) return a;
21 | if (!_sorted) a=sortUnique(a);
22 |
23 | //@returns Boolean. Detect if x is in set.
24 | //`cmp` is custom compare functions return -1,0,1.
25 | // function cmp(x,item):Ordering(LT=-1|EQ=0|GT=1);
26 | a.contains=function (x,cmp) {return !!~bsearch(a,x,cmp)};
27 | a.indexOf=function (x,cmp) {return bsearch(a,x,cmp)};
28 | a.toArray=function () {return copyArray(a);};
29 |
30 | /** Union with another Set
31 | @param {Set|Array} b If b is an array,it will be corrupted by sortUnqiue
32 | @return {Set} return new Set */
33 | a.union=function (b) {
34 | b=Set(b);
35 | var n=a.length+b.length,c=new a.constructor(n);
36 | for (var i=0,j=0,k=0;k>1);
61 | c=cmp(x,a[pivot]);
62 | if (c===EQ) return pivot;
63 | if (c===LT) hi=pivot-1;
64 | else lo=pivot+1;
65 | } while (lo<=hi);
66 | return -1;
67 | }
68 |
69 | /**
70 | Return sorted Set.
71 | This function will corrupt the original array
72 | Proper usage: a=sortUnique(a);
73 | @param {ArrayLike} a
74 | @return {ArrayLike} new unique sorted array
75 | */
76 | function sortUnique(a) {
77 | var n=a.length;
78 | if (n<=1) return a;
79 | //do a shell sort
80 | var k=1,hi=n/3|0,i,j,tmp;
81 | while (k < hi) k=k*3+1;
82 | while (k > 0) {
83 | for (i=k;i=k && a[j] {
156 | ranges:['a','b','cz','09'],
157 | map:{'az':['a','b','cz'],'09':['09'],'a':['a'],'b':['b']}
158 | }
159 | */
160 | function classify(ranges) {
161 | ranges=ranges.map(function (c) {return (!c[1])?c+c:c;});
162 | var i,j,k,l,r,n;
163 | ranges=sortUnique(ranges); n=ranges.length;
164 | var singleMap={},headMap={},tailMap={},head,tail;
165 | for (i=0;i=tail) {
170 | if (head===tail) singleMap[tail]=true;
171 | break;
172 | }
173 | }
174 | }
175 | var chars=sortUnique(ranges.join('').split('')),
176 | results=Object.keys(singleMap),
177 | c=chars[0],tmpMap={},map={};
178 | for (i=0;ic) break;
184 | }
185 | }
186 | for (i=0,k=0,l=chars.length-1;itail) break;
195 | if (r[0]<=head && tail<=r[1]) tmpMap[r].push(c),results.push(c);
196 | }
197 | }
198 | head=chars[i]; tail=chars[i+1]; //keep insert order,push single char later
199 | if (singleMap.hasOwnProperty(tail)) {
200 | for (j=k;jtail) break;
204 | if (r[0]<=tail && tail<=r[1]) tmpMap[r].push(tail);
205 | }
206 | }
207 | }
208 | results=sortUnique(results);
209 | for (k in tmpMap) map[k[0]===k[1]?k[0]:k]=tmpMap[k];
210 | return {ranges:results,map:map};
211 | }
212 |
213 |
214 | //@deprecated
215 | function ____classify(ranges) {
216 | var stack=[],map={},
217 | chars=sortUnique(ranges.join('').split(''));
218 | chars.reduce(function (prev,c) {
219 | var head,tail,choosed=[];
220 | ranges=ranges.filter(function (rg) {//side affects filter
221 | var start=rg[0],end=rg[1] || start;
222 | head = head || start==c;
223 | tail = tail || end==c;
224 | if (start<=c && c<=end) choosed.push(rg);
225 | if (end >= c ) return true;
226 | });
227 | if (!choosed.length) return c;
228 | var last=stack[stack.length-1],valid,newRange,
229 | start=(last && (last[1] || last[0])==prev)?succ(prev):prev,
230 | end=head?pred(c):c;
231 | if (start<=end) {
232 | newRange=start==end?start:start+end;
233 | choosed.forEach(function (rg) {
234 | if (rg[0]<=start && rg.slice(-1)>=end) {
235 | (map[rg]=map[rg] || []).push(newRange);
236 | valid=true;
237 | }
238 | });
239 | if (valid) stack.push(newRange);
240 | }
241 | if (head && tail) {
242 | stack.push(c);
243 | choosed.forEach(function (rg) {(map[rg]=map[rg] || []).push(c)});
244 | }
245 | return c;
246 | },chars[0]);
247 |
248 | return {ranges:stack,map:map};
249 | }
250 |
251 |
252 | /**
253 | Convert exclude ranges to include ranges
254 | Example: ^b-y, ['by'] to ["\0a","z\uffff"]
255 | @param {[Range]}
256 | @return Sorted disjoint ranges
257 | */
258 | function negate(ranges /*:[Range rg]*/) {
259 | var MIN_CHAR="\u0000",MAX_CHAR="\uffff";
260 | ranges=classify(ranges).ranges;
261 | var negated=[];
262 | if (!ranges.length) return negated;
263 | if (ranges[0][0]!==MIN_CHAR) ranges.unshift(MAX_CHAR);
264 | var hi=ranges.length-1;
265 | if ((ranges[hi][1] || ranges[hi][0])!==MAX_CHAR) ranges.push(MIN_CHAR);
266 | ranges.reduce(function (acc,r) {
267 | var start=succ(acc[1] || acc[0]),end=pred(r[0]);
268 | if (start 1 && charset.shift();
285 | charset.forEach(function (c) {
286 | if (chars[0]=='-' && chars.length>1) {//chars=['-','a'],c=='z'
287 | if (chars[1] > c ) // z-a is invalid
288 | throw new Error('Charset range out of order:'+chars[1]+'-'+c+'!');
289 | ranges.push(chars[1]+c);
290 | chars.splice(0,2);
291 | } else chars.unshift(c);
292 | });
293 | ranges=ranges.concat(chars);
294 | //convert exclude to include
295 | return exclude?negate(ranges):classify(ranges).ranges;
296 | }
297 |
298 | /**
299 | Coalesce closed ranges.
300 | ['ac','d','ez'] will be coalesced to ['az']
301 | @param {[Range]} ranges Sorted disjoint ranges return by `classify`.
302 | @return {[Range]} Compressed ranges
303 | */
304 | function coalesce(ranges) {
305 | if (!ranges.length) return [];
306 | var results=[ranges[0]];
307 | ranges.reduce(function (a,b) {
308 | var prev=results.length-1;
309 | if (a[a.length-1]===pred(b[0])) {
310 | return results[prev]=results[prev][0]+b[b.length-1];
311 | }
312 | results.push(b);
313 | return b;
314 | });
315 | return results;
316 | }
317 |
318 | function chr(n) {return String.fromCharCode(n)}
319 | function ord(c) {return c.charCodeAt(0)}
320 | function pred(c) {return String.fromCharCode(c.charCodeAt(0)-1)}
321 | function succ(c) {return String.fromCharCode(c.charCodeAt(0)+1)}
322 |
323 | var printEscapeMap={
324 | "\n":"\\n","\t":"\\t","\f":"\\f",
325 | "\r":"\\r"," ":" ","\\":"\\\\"
326 | };
327 | // Convert string to printable,replace all control chars and unicode to hex escape
328 | function toPrint(s) {
329 | var ctrl=/[\x00-\x1F\x7F-\x9F]/,unicode=/[\u009F-\uFFFF]/;
330 | if (ctrl.test(s) || unicode.test(s)) {
331 | s=s.split('').map(function (c) {
332 | if (printEscapeMap.hasOwnProperty(c)) return printEscapeMap[c];
333 | else if (ctrl.test(c)) return '\\x'+ord(c).toString(16).toUpperCase();
334 | else if (unicode.test(c)) return '\\u'+('00'+ord(c).toString(16)).slice(-4);
335 | return c;
336 | }).join('');
337 | }
338 | return s;
339 | }
340 | //flatten two-dimensional array to one-dimension
341 | function flatten2(a) {return [].concat.apply([],a)}
342 | function repeats(s,n) {return new Array(n+1).join(s)}
343 |
344 | function log() {
345 | var a=slice.call(arguments);
346 | if (isBrowser) {
347 | Function.prototype.apply.apply(console.log,[console,a]);
348 | } else {//Assume it is Node.js
349 | var util=require('util');
350 | a.forEach(function (x) {
351 | console.log(util.inspect(x,{
352 | showHidden:false,customInspect:true,
353 | depth:64,colors:true
354 | }));
355 | });
356 |
357 | }
358 | }
359 |
360 | function locals(f) {
361 | var src=f.toString();
362 | var re=/^\s+function\s+([a-zA-Z]\w+)\s*\(/mg;
363 | var fns=[],match;
364 | while (match=re.exec(src)) fns.push(match[1]);
365 | var methods=[],f;
366 | while (f=fns.pop()) methods.push(f+':'+f);
367 | return '{\n'+methods.join(',\n')+'\n}';
368 | }
369 |
370 | return {
371 | sortUnique:sortUnique,
372 | idUnique:idUnique,hashUnique:hashUnique,
373 | Set:Set, repeats:repeats,
374 | negate:negate,coalesce:coalesce,
375 | classify:classify,
376 | parseCharset:parseCharset,
377 | chr:chr,ord:ord,pred:pred,succ:succ,toPrint:toPrint,
378 | flatten2:flatten2,
379 | log:log,isBrowser:isBrowser,
380 | locals:locals
381 | };
382 |
383 | });
384 |
--------------------------------------------------------------------------------
/src/RegExp.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['./parse','./Kit','./NFA'],function (parse,K,NFA) {
3 | /**
4 | Mock RegExp class
5 | */
6 | parse.exportConstants();
7 | //options
8 | RegExp.DEBUG=RegExp.D=1;
9 | RegExp.MULTILINE=RegExp.M=2;
10 | RegExp.GLOBAL=RegExp.G=4;
11 | RegExp.IGNORECASE=RegExp.I=8;
12 | function RegExp(re,options) {
13 | if (!(this instanceof RegExp)) return new RegExp(re,options);
14 | re=re+'';
15 | var opts={};
16 | if (typeof options==='string') {
17 | options=options.toLowerCase();
18 | if (~options.indexOf('i')) opts.ignoreCase=true;
19 | if (~options.indexOf('m')) opts.multiline=true;
20 | if (~options.indexOf('g')) opts.global=true;
21 | if (~options.indexOf('d')) opts.debug=true;
22 | } else {
23 | opts=options;
24 | }
25 |
26 | var ast=this.ast=parse(re);
27 | this.source=re;
28 | this.multiline=!!opts.multiline;
29 | this.global=!!opts.global;
30 | this.ignoreCase=!!opts.ignoreCase;
31 | this.debug=!!opts.debug;
32 | this.flags='';
33 | if (this.multiline) this.flags+='m';
34 | if (this.ignoreCase) this.flags+='i';
35 | if (this.global) this.flags+='g';
36 | _readonly(this,['source','options','multiline','global','ignoreCase','flags','debug']);
37 |
38 | var ignoreCase=this.ignoreCase;
39 | ast.traverse(function (node) {explainCharset(node,ignoreCase)},CHARSET_NODE);
40 | ast.traverse(function (node) {explainExact(node,ignoreCase)},EXACT_NODE);
41 | if (this.multiline) ast.traverse(multilineAssert,ASSERT_NODE);
42 |
43 | }
44 |
45 | RegExp.prototype={
46 | toString:function () {return '/'+this.source+'/'+this.flags;},
47 | test:function(s) {
48 | return this.exec(s)!==null;
49 | },
50 | exec:function (s) {
51 | var nfa=this.getNFA(),ret;
52 | var startIndex=this.global?(this.lastIndex || 0):0,max=s.length;
53 | for (;startIndexmin;max--) {
347 | a=builder(node,from);
348 | moreTrans=moreTrans.concat(a.trans);
349 | from=a.accepts;
350 | accepts=accepts.concat(a.accepts);
351 | }
352 | } else {
353 | var beforeStates=from.slice();
354 | a=builder(node,from);
355 | moreTrans=moreTrans.concat(a.trans);
356 | accepts=accepts.concat(a.accepts);
357 | moreTrans.push({
358 | from:a.accepts,to:beforeStates,charset:false
359 | });
360 | }
361 | var endState=[newState()];
362 | if (repeat.nonGreedy) {
363 | trans.push({
364 | from:accepts,to:endState,charset:false
365 | });
366 | trans=trans.concat(moreTrans);
367 | } else {
368 | trans=trans.concat(moreTrans);
369 | trans.push({
370 | from:accepts,to:endState,charset:false
371 | });
372 | }
373 | return {accepts:endState,trans:trans};
374 | }
375 |
376 | function _readonly(obj,attrs) {
377 | attrs.forEach(function (a) {
378 | Object.defineProperty(obj,a,{writable:false,enumerable:true});
379 | });
380 | }
381 |
382 | return RegExp;
383 |
384 | });
385 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Regulex : JavaScript Regular Expression Visualizer.
6 |
166 |
167 |
168 |
195 | RegulexJavaScript Regular Expression Visualizer.
196 |
201 | Error Message
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
449 |
450 |
451 |
452 |
453 |
454 |
--------------------------------------------------------------------------------
/src/visualize.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['./Kit','./parse'],function (K,parse) {
3 | parse.exportConstants();
4 |
5 | var FONT_SIZE=16,LABEL_FONT_SIZE=14,PATH_LEN=16,
6 | FONT_FAMILY='DejaVu Sans Mono,monospace';
7 |
8 | var PAPER_MARGIN=10;
9 |
10 | var _charSizeCache={},_tmpText;
11 | function getCharSize(fontSize,fontBold) {
12 | fontBold=fontBold || 'normal';
13 | if (_charSizeCache[fontSize] && _charSizeCache[fontSize][fontBold])
14 | return _charSizeCache[fontSize][fontBold];
15 | _tmpText.attr({'font-size':fontSize,'font-weight':fontBold});
16 | var box=_tmpText.getBBox();
17 | _charSizeCache[fontSize]=_charSizeCache[fontSize] || {};
18 | return _charSizeCache[fontSize][fontBold]={
19 | width:box.width/((_tmpText.attr('text').length-1)/2),
20 | height:box.height/2
21 | };
22 | }
23 |
24 | function initTmpText(paper) {
25 | _tmpText=_tmpText || paper.text(-1000,-1000,"XgfTlM|.q\nXgfTlM|.q").attr('font-family',FONT_FAMILY);
26 | }
27 |
28 | /**
29 | @param {AST} re AST returned by `parse`
30 | */
31 | function visualize(re,paper) {
32 | paper.clear();
33 | initTmpText(paper);
34 |
35 | var texts=highlight(re.tree);
36 | texts.unshift(text("RegExp:"));
37 | var charSize=getCharSize(FONT_SIZE,'bold'),
38 | startX=PAPER_MARGIN,startY=charSize.height/2+PAPER_MARGIN,
39 | width,height;
40 | width=texts.reduce(function(x,t) {
41 | t.x=x;
42 | t.y=startY;
43 | var w=t.text.length*charSize.width;
44 | return x+w;
45 | },startX);
46 | width+=PAPER_MARGIN;
47 | height=charSize.height+PAPER_MARGIN*2;
48 | texts=paper.add(texts);
49 | paper.setSize(width,charSize.height+PAPER_MARGIN*2);
50 |
51 | var ret=plot(re.tree,0,0);
52 |
53 | height=Math.max(ret.height+3*PAPER_MARGIN+charSize.height,height);
54 | width=Math.max(ret.width+2*PAPER_MARGIN,width);
55 |
56 | paper.setSize(width,height);
57 | translate(ret.items,PAPER_MARGIN,PAPER_MARGIN*2+charSize.height-ret.y);
58 | paper.add(ret.items);
59 | }
60 |
61 |
62 |
63 | function plot(tree,x,y) {
64 | tree.unshift({type:'startPoint'});
65 | tree.push({type:'endPoint'});
66 | return plotTree(tree,x,y);
67 | }
68 |
69 | function translate(items,dx,dy) {
70 | items.forEach(function (t) {
71 | if (t._translate) t._translate(dx,dy);
72 | t.x+=dx;t.y+=dy;
73 | });
74 | }
75 |
76 | // return NodePlot config
77 | function plotTree(tree,x,y) {
78 | var results=[],items=[],
79 | width=0,height=0,
80 | fromX=x,top=y,bottom=y;
81 | if (!tree.length) return plotNode.empty(null,x,y);
82 | tree.forEach(function (node) {
83 | var ret;
84 | if (node.repeat) {
85 | ret=plotNode.repeat(node,fromX,y);
86 | } else {
87 | ret=plotNode[node.type](node,fromX,y);
88 | }
89 | results.push(ret);
90 | fromX+=ret.width+PATH_LEN;
91 | width+=ret.width;
92 | top=Math.min(top,ret.y);
93 | bottom=Math.max(bottom,ret.y+ret.height);
94 | items=items.concat(ret.items);
95 | });
96 |
97 | height=bottom-top;
98 |
99 | results.reduce(function (a,b) {
100 | width+=PATH_LEN;
101 | var p=hline(a.lineOutX,y,b.lineInX);
102 | items.push(p);
103 | return b;
104 | });
105 | var lineInX=results[0].lineInX,lineOutX=results[results.length-1].lineOutX;
106 | return {
107 | items:items,
108 | width:width,height:height,x:x,y:top,
109 | lineInX:lineInX,lineOutX:lineOutX
110 | };
111 | }
112 | // return NodePlot config
113 | function textRect(s,x,y,bgColor,textColor) {
114 | s=K.toPrint(s);
115 | var padding=6;
116 | var charSize=getCharSize(FONT_SIZE);
117 | var tw=s.length*charSize.width,h=charSize.height+padding*2,w=tw+padding*2;
118 | var rect={
119 | type:'rect',
120 | x:x,y:y-(h/2),
121 | width:w,height:h,
122 | stroke:'none',
123 | fill:bgColor || 'transparent'
124 | };
125 | var t={
126 | type:'text',
127 | x:x+w/2,y:y,
128 | text:s,
129 | 'font-size':FONT_SIZE,
130 | fill:textColor || 'black'
131 | };
132 | return {
133 | text:t,rect:rect,
134 | items:[rect,t],
135 | width:w,height:h,
136 | x:x,y:rect.y,
137 | lineInX:x,lineOutX:x+w
138 | };
139 | }
140 |
141 | // return LabelObject {lable:Element,x,y,width,height}
142 | function textLabel(x,y,s,color) {// x is center x ,y is bottom y
143 | var charSize=getCharSize(LABEL_FONT_SIZE);
144 | var lines=s.split("\n");
145 | var textHeight=lines.length*charSize.height;
146 | var textWidth;
147 | if (lines.length>1) {
148 | textWidth=Math.max.apply(Math,lines.map(function (a) {return a.length}));
149 | } else {
150 | textWidth=s.length;
151 | }
152 | textWidth=textWidth*charSize.width;
153 | var margin=4;
154 | var txt={
155 | type:'text',
156 | x:x,y:y-textHeight/2-margin,
157 | text:s,
158 | 'font-size':LABEL_FONT_SIZE,
159 | fill:color || '#444'
160 | };
161 | return {
162 | label:txt,
163 | x:x-textWidth/2,y:y-textHeight-margin,
164 | width:textWidth,height:textHeight+margin
165 | };
166 | }
167 | //return element config
168 | function hline(x,y,destX) {
169 | return {
170 | type:'path',
171 | x:x,y:y,
172 | path:["M",x,y,"H",destX],
173 | 'stroke-linecap':'butt',
174 | 'stroke-linejoin':'round',
175 | 'stroke':'#333',
176 | 'stroke-width':2,
177 | _translate:function (x,y) {
178 | var p=this.path;
179 | p[1]+=x;p[2]+=y;p[4]+=x;
180 | },
181 | };
182 | }
183 |
184 | //return element config
185 | function smoothLine(fromX,fromY,toX,toY) {
186 | var radius=10,p,_translate;
187 | var signX=fromX>toX?-1:1,signY=fromY>toY?-1:1;
188 | if (Math.abs(fromY-toY) 1 ? " to " : " or ") +_plural(repeat.max);
295 | } else {
296 | txt+=" or more times.";
297 | }
298 | }
299 |
300 | var r=padding;
301 | var rectW=ret.width+padding*2,rectH=ret.y+ret.height+padding-y;
302 |
303 | var py=y;
304 | var p={
305 | type:'path',
306 | path:['M',ret.lineInX+padding,py,
307 | 'Q',x,py,x,py+r,
308 | 'V',py+rectH-r,
309 | 'Q',x,py+rectH,x+r,py+rectH,
310 | 'H',x+rectW-r,
311 | 'Q',x+rectW,py+rectH,x+rectW,py+rectH-r,
312 | 'V',py+r,
313 | 'Q',x+rectW,py,ret.lineOutX+padding,py
314 | ],
315 | _translate:function (x,y) {
316 | var p=this.path;
317 | p[1]+=x;p[2]+=y;
318 | p[4]+=x;p[5]+=y;p[6]+=x;p[7]+=y;
319 | p[9]+=y;
320 | p[11]+=x;p[12]+=y;p[13]+=x;p[14]+=y;
321 | p[16]+=x;
322 | p[18]+=x;p[19]+=y;p[20]+=x;p[21]+=y;
323 | p[23]+=y;
324 | p[25]+=x;p[26]+=y;p[27]+=x;p[28]+=y;
325 | },
326 | stroke:'maroon',
327 | 'stroke-width':2
328 | };
329 |
330 | if (repeat.nonGreedy) {
331 | txt+="(NonGreedy!)";
332 | p.stroke="Brown";
333 | p['stroke-dasharray']="-";
334 | }
335 |
336 | var tl=textLabel(x+rectW/2,y,txt);
337 | translate([tl.label],0,rectH+tl.height+LABEL_MARGIN); //bottom label
338 |
339 | var width=Math.max(tl.width,rectW);
340 | var offsetX=(width-rectW)/2;
341 | if (offsetX) translate([p,tl.label],offsetX,0);
342 | translate(ret.items,padding+offsetX,0);
343 | ret.items.unshift(p);
344 | ret.items.push(tl.label);
345 | return {
346 | items:ret.items,
347 | width:width,height:ret.height+padding+tl.height+LABEL_MARGIN,
348 | x:offsetX+padding+x,y:ret.y,
349 | lineInX:ret.lineInX+padding+offsetX,
350 | lineOutX:ret.lineOutX+padding+offsetX
351 | };
352 |
353 | function _plural(n) {
354 | return n+ ((n<2)? " time.":" times.");
355 | }
356 | },
357 | choice:function (node,x,y) {
358 | var marginX=20,spacing=6,paddingY=4,height=0,width=0;
359 | var branches=node.branches.map(function (branch) {
360 | var ret=plotTree(branch,x,y);
361 | height+=ret.height;
362 | width=Math.max(width,ret.width);
363 | return ret;
364 | });
365 | height+=(branches.length-1)*spacing+paddingY*2;
366 | width+=marginX*2;
367 |
368 | var centerX=x+width/2,dy=y-height/2+paddingY,lineOutX=x+width,
369 | items=[];
370 | branches.forEach(function (a) {
371 | var dx=centerX-a.width/2;
372 | translate(a.items,dx-a.x,dy-a.y);
373 | var p1=smoothLine(x,y,dx-a.x+a.lineInX,y+dy-a.y);
374 | var p2=smoothLine(lineOutX,y,a.lineOutX+dx-a.x,y+dy-a.y);
375 | items=items.concat(a.items);
376 | items.push(p1,p2);
377 | dy+=a.height+spacing;
378 | });
379 |
380 | return {
381 | items:items,
382 | width:width,height:height,
383 | x:x,y:y-height/2,
384 | lineInX:x,lineOutX:lineOutX
385 | };
386 |
387 | },
388 | charset:function (node,x,y) {
389 | var padding=6,spacing=4;
390 | var clsDesc={d:'Digit',D:'NonDigit',w:'Word',W:'NonWord',s:'WhiteSpace',S:'NonWhiteSpace'};
391 | var charBgColor='LightSkyBlue',charTextColor='black',
392 | clsBgColor='Green',clsTextColor='white',
393 | rangeBgColor='teal',rangeTextColor='white',
394 | boxColor=node.exclude?'Pink':'Khaki',
395 | labelColor=node.exclude?'#C00':'';
396 | var simple=onlyCharClass(node);
397 | if (simple) {
398 | var a=textRect(clsDesc[node.classes[0]],x,y,clsBgColor,clsTextColor);
399 | a.rect.r=5;
400 | if (!node.exclude) {
401 | return a;
402 | } else {
403 | var tl=textLabel(a.x+a.width/2,a.y,'None of:',labelColor);
404 | var items=a.items;
405 | items.push(tl.label);
406 | var oldWidth=a.width;
407 | var width=Math.max(tl.width,a.width);
408 | var offsetX=(width-oldWidth)/2;//ajust label text
409 | translate(items,offsetX,0);
410 | return {
411 | items:items,
412 | width:width,height:a.height+tl.height,
413 | x:Math.min(tl.x,a.x),y:tl.y,
414 | lineInX:offsetX+a.x,lineOutX:offsetX+a.x+a.width
415 | };
416 | }
417 | }
418 | if (!node.chars && !node.ranges.length && !node.classes.length) {
419 | // It must be exclude charset here
420 | var a= textRect('AnyChar',x,y,'green','white');
421 | a.rect.r=5;
422 | return a;
423 | }
424 | var packs=[],ret,width=0,height=0,singleBoxHeight;
425 | if (node.chars) {
426 | ret=textRect(node.chars,x,y,charBgColor,charTextColor);
427 | ret.rect.r=5;
428 | packs.push(ret);
429 | width=ret.width;
430 | }
431 | node.ranges.forEach(function (rg) {
432 | rg=rg.split('').join('-');
433 | var ret=textRect(rg,x,y,rangeBgColor,rangeTextColor);
434 | ret.rect.r=5;
435 | packs.push(ret);
436 | width=Math.max(ret.width,width);
437 | });
438 | node.classes.forEach(function (cls) {
439 | var ret=textRect(clsDesc[cls],x,y,clsBgColor,clsTextColor);
440 | ret.rect.r=5;
441 | packs.push(ret);
442 | width=Math.max(ret.width,width);
443 | });
444 |
445 | singleBoxHeight=packs[0].height;
446 |
447 | var pack1=[],pack2=[];
448 | packs.sort(function (a,b) {return b.width-a.width});
449 | packs.forEach(function (a) {
450 | if (a.width*2+spacing>width) pack1.push(a);
451 | else pack2.push(a); // can be inline
452 | });
453 | packs=pack1;
454 | var a1,a2;
455 | while (pack2.length) {
456 | a1=pack2.pop(); a2=pack2.pop();
457 | if (!a2) {packs.push(a1);break;}
458 | if (a1.width-a2.width > 2) {
459 | packs.push(a1);
460 | pack2.push(a2);
461 | continue;
462 | }
463 | translate(a2.items,a1.width+spacing,0);
464 | packs.push({
465 | items:a1.items.concat(a2.items),
466 | width:a1.width+a2.width+spacing,
467 | height:a1.height,
468 | x:a1.x,y:a1.y
469 | });
470 | height-=a1.height;
471 | }
472 |
473 | width+=padding*2;
474 | height=(packs.length-1)*spacing+packs.length*singleBoxHeight+padding*2;
475 |
476 | var rect={
477 | type:'rect',
478 | x:x,y:y-height/2,r:4,
479 | width:width,height:height,
480 | stroke:'none',fill:boxColor
481 | };
482 |
483 | var startY=rect.y+padding;
484 | var items=[rect];
485 |
486 | packs.forEach(function (a) {
487 | translate(a.items,x-a.x+(width-a.width)/2,startY-a.y);
488 | items=items.concat(a.items);
489 | startY+=a.height+spacing;
490 | });
491 | var tl=textLabel(rect.x+rect.width/2,rect.y,(node.exclude?'None':'One')+' of:',labelColor);
492 | items.push(tl.label);
493 | var oldWidth=width;
494 | width=Math.max(tl.width,width);
495 | var offsetX=(width-oldWidth)/2;//ajust label text
496 | translate(items,offsetX,0);
497 | return {
498 | items:items,
499 | width:width,height:height+tl.height,
500 | x:Math.min(tl.x,x),y:tl.y,
501 | lineInX:offsetX+x,lineOutX:offsetX+x+rect.width
502 | };
503 | },
504 | group:function (node,x,y) {
505 | var padding=10,lineColor='silver',strokeWidth=2;
506 | var sub=plotTree(node.sub,x,y);
507 | if (node.num) {
508 | translate(sub.items,padding,0);
509 | var rectW=sub.width+padding*2,rectH=sub.height+padding*2;
510 | var rect={
511 | type:'rect',
512 | x:x,y:sub.y-padding,r:6,
513 | width:rectW,height:rectH,
514 | 'stroke-dasharray':".",
515 | stroke:lineColor,
516 | 'stroke-width':strokeWidth
517 | };
518 | var tl=textLabel(rect.x+rect.width/2,rect.y-strokeWidth,'Group #'+node.num);
519 | var items=sub.items.concat([rect,tl.label]);
520 | var width=Math.max(tl.width,rectW);
521 | var offsetX=(width-rectW)/2;//ajust label text space
522 | if (offsetX) translate(items,offsetX,0);
523 | return {
524 | items:items,
525 | width:width,
526 | height:rectH+tl.height,
527 | x:x,y:tl.y,
528 | lineInX:offsetX+sub.lineInX+padding,lineOutX:offsetX+sub.lineOutX+padding
529 | };
530 | }
531 | return sub;
532 | },
533 | assert:function (node,x,y) {
534 | var simpleAssert={
535 | AssertNonWordBoundary:{bg:"maroon",fg:"white"},
536 | AssertWordBoundary:{bg:"purple",fg:"white"},
537 | AssertEnd:{bg:"Indigo",fg:"white"},
538 | AssertBegin:{bg:"Indigo",fg:"white"}
539 | };
540 | var conf,nat=node.assertionType,txt=nat.replace('Assert','')+'!';
541 | if (conf=simpleAssert[nat]) {
542 | return textRect(txt,x,y,conf.bg,conf.fg);
543 | }
544 |
545 | var lineColor,fg,padding=8;
546 | if (nat===AssertLookahead) {
547 | lineColor="CornflowerBlue";
548 | fg="darkgreen";
549 | txt="If followed by:";
550 | } else if (nat===AssertNegativeLookahead) {
551 | lineColor="#F63";
552 | fg="Purple";
553 | //txt="Negative\nLookahead!"; // break line
554 | txt="If not followed by:";
555 | }
556 |
557 | var sub=plotNode.group(node,x,y);
558 | var rectH=sub.height+padding*2,rectW=sub.width+padding*2;
559 | var rect={
560 | type:'rect',
561 | x:x,y:sub.y-padding,r:6,
562 | width:rectW,height:rectH,
563 | 'stroke-dasharray':"-",
564 | stroke:lineColor,
565 | 'stroke-width':2
566 | };
567 |
568 | var tl=textLabel(rect.x+rectW/2,rect.y,txt,fg);
569 | var width=Math.max(rectW,tl.width);
570 | var offsetX=(width-rectW)/2;//ajust label text
571 | translate(sub.items,offsetX+padding,0);
572 |
573 | if (offsetX) translate([rect,tl.label],offsetX,0);
574 | var items=sub.items.concat([rect,tl.label]);
575 | return {
576 | items:items,
577 | width:width,
578 | height:rect.height+tl.height,
579 | x:x,y:tl.y,
580 | lineInX:offsetX+sub.lineInX+padding,lineOutX:offsetX+sub.lineOutX+padding
581 | };
582 | }
583 | };
584 |
585 |
586 |
587 | var hlColorMap={
588 | exact:'#334',
589 | dot:'darkblue',
590 | backref:'teal',
591 | '$':'purple',
592 | '^':'purple',
593 | '\\b':'#F30',
594 | '\\B':'#F30',
595 | '(':'blue',
596 | ')':'blue',
597 | '?=':'darkgreen',
598 | '?!':'red',
599 | '?:':'grey',
600 | '[':'navy',
601 | ']':'navy',
602 | '|':'blue',
603 | '{':'maroon',
604 | ',':'maroon',
605 | '}':'maroon',
606 | '*':'maroon',
607 | '+':'maroon',
608 | '?':'maroon',
609 | repeatNonGreedy:'#F61',
610 | defaults:'black',
611 | charsetRange:'olive',
612 | charsetClass:'navy',
613 | charsetExclude:'red',
614 | charsetChars:'#334'
615 | };
616 |
617 |
618 | /**
619 | @param {AST.tree} re AST.tree return by `parse`
620 | */
621 | function highlight(tree) {
622 | var texts=[];
623 | tree.forEach(function (node) {
624 | if (node.sub) {
625 | texts.push(text('('));
626 | if (node.type===ASSERT_NODE) {
627 | if (node.assertionType===AssertLookahead) {
628 | texts.push(text('?='));
629 | } else {
630 | texts.push(text('?!'));
631 | }
632 | } else if (node.nonCapture) {
633 | texts.push(text('?:'));
634 | }
635 | texts=texts.concat(highlight(node.sub));
636 | texts.push(text(')'));
637 | } else if (node.branches) {
638 | node.branches.map(highlight).forEach(function (ts) {
639 | texts=texts.concat(ts);
640 | texts.push(text('|'));
641 | });
642 | texts.pop();
643 | } else {
644 | var color=hlColorMap[node.type] || hlColorMap.defaults;
645 | switch (node.type) {
646 | case EXACT_NODE:
647 | texts.push(text(K.toPrint(node.chars),color));
648 | break;
649 | case DOT_NODE:
650 | texts.push(text('.',color));
651 | break;
652 | case BACKREF_NODE:
653 | texts.push(text("\\"+node.num,color));
654 | break;
655 | case ASSERT_NODE:
656 | texts.push(text(node.raw));
657 | break;
658 | case CHARSET_NODE:
659 | var simple=onlyCharClass(node);
660 | (!simple || node.exclude) && texts.push(text('['));
661 | if (node.exclude) texts.push(text('^',hlColorMap.charsetExclude));
662 | node.ranges.forEach(function (rg) {
663 | texts.push(text(K.toPrint(rg[0]+'-'+rg[1]),hlColorMap.charsetRange));
664 | });
665 | node.classes.forEach(function (cls) {
666 | texts.push(text("\\"+cls,hlColorMap.charsetClass));
667 | });
668 | texts.push(text(K.toPrint(node.chars),hlColorMap.charsetChars));
669 | (!simple || node.exclude) && texts.push(text(']'));
670 | break;
671 | }
672 | }
673 | if (node.repeat) {
674 | var min=node.repeat.min,max=node.repeat.max;
675 | if (min===0 && max===Infinity) texts.push(text('*'));
676 | else if (min===1 && max===Infinity) texts.push(text('+'));
677 | else if (min===0 && max===1) texts.push(text('?'));
678 | else {
679 | texts.push(text('{'));
680 | texts.push(text(min));
681 | if (min===max) texts.push(text('}'));
682 | else {
683 | texts.push(text(','));
684 | if (isFinite(max)) texts.push(text(max));
685 | texts.push(text('}'));
686 | }
687 | }
688 | if (node.repeat.nonGreedy) {
689 | texts.push(text('?',hlColorMap.repeatNonGreedy));
690 | }
691 | }
692 | });
693 | return texts;
694 | }
695 |
696 | function text(s,color) {
697 | color = color || hlColorMap[s] || hlColorMap.defaults;
698 | return {
699 | type:'text',
700 | 'font-size':FONT_SIZE,'font-family':FONT_FAMILY,
701 | text:s+"",fill:color,'text-anchor':'start','font-weight':'bold'
702 | };
703 | }
704 |
705 | function onlyCharClass(node) {
706 | return !node.chars && !node.ranges.length && node.classes.length===1;
707 | }
708 |
709 | return visualize;
710 |
711 | });
712 |
--------------------------------------------------------------------------------
/src/parse.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(['./NFA','./Kit'],function (NFA,K) {
3 | /**
4 | Parse Regex to AST
5 | parse:Function(re:String)
6 | parse.Constants
7 | parse.exportConstants:Function
8 | */
9 |
10 | var Constants={
11 | //Node Type Constants
12 | EXACT_NODE:"exact",
13 | CHARSET_NODE:"charset",
14 | CHOICE_NODE:"choice",
15 | GROUP_NODE:"group",
16 | ASSERT_NODE:"assert",
17 | DOT_NODE:"dot",
18 | BACKREF_NODE:"backref",
19 | EMPTY_NODE:"empty",
20 | //Assertion Type Constants
21 | AssertLookahead:"AssertLookahead",
22 | AssertNegativeLookahead:"AssertNegativeLookahead",
23 | AssertNonWordBoundary:"AssertNonWordBoundary",
24 | AssertWordBoundary:"AssertWordBoundary",
25 | AssertEnd:"AssertEnd",
26 | AssertBegin:"AssertBegin"
27 | };
28 |
29 | /**
30 | AST:
31 | Node = { // Base Node interface
32 | type:NodeType, // Node type string
33 | raw:String, // Raw regex string
34 | repeat:{
35 | min:Int,max:Int, // Repeat times. [min,max] means "{min,max}".
36 | // Set max=Infinity forms a "{min,}" range
37 | // Set max=undefined forms a "{min}" range
38 | nonGreedy:Boolean // If this repeat is non-greedy,viz. had a "?" quantifier
39 | },
40 | indices:[Int,Int] // Raw string in original regex index range [start,end)
41 | // You can use regexStr.slice(start,end) to retrieve node.raw string
42 | }
43 |
44 | NodeType = exact|dot|charset|choice|empty|group|assert|backref
45 |
46 | ExactNode = { // Literal match chars string
47 | type:"exact",
48 | chars:"c",
49 | raw:"c{1,2}" // When repeat or escape,raw will diff from chars
50 | }
51 | DotNode = {type:"dot"} //viz. "." , dot match any char but newline "\n\r"
52 |
53 | // Because of IgnoreCase flag,
54 | // The client code need to compute disjoint ranges itself.
55 | CharsetNode = {
56 | type:"charset",
57 | exclude:Boolean, // True only if it is "[^abc]" form
58 | classes:[Char], // Named character classes. e.g. [\d].
59 | // All names: d(Digit),D(Non-digit),w,W,s,S
60 | chars:String, // Literal chars. e.g. [abc] repr as 'abc'
61 | ranges:[Range] // Range: a-z repr as 'az'
62 | }
63 |
64 | ChoiceNode = {
65 | type:"choice",
66 | branches:[[Node]] // Choice more branches,e.g. /a|b|c/
67 | }
68 |
69 | EmptyNode = { // This node will match any input,include empty string
70 | type:"empty" //new RegExp("") will give an empty node. /a|/ will give branches with an empty node
71 | }
72 |
73 | GroupNode = {
74 | type:"group",
75 | nonCapture:false, // true means:"(?:abc)",default is false
76 | num:Int, // If capture is true.It is group's int index(>=1).
77 | endParenIndex:Int, // /(a)+/ will generate only one node,so indices is [0,4],endParenIndex is 3
78 | sub:[Node] // Sub pattern nodes
79 | }
80 |
81 | AssertNode = {
82 | type:"assert",
83 | assertionType:String, //See Assertion Type Constants
84 | sub:[Node] //Optional,\b \B ^ $ Assertion this property is empty
85 | }
86 | Only AssertLookahead,AssertNegativeLookahead has `sub` property
87 | "(?=(abc))" repr as {
88 | type:"assert", assertionType:AssertLookahead,
89 | sub:[{
90 | type:"group",
91 | sub:[{type:"exact",raw:"abc"}]
92 | }]
93 | }
94 |
95 | BackrefNode = {
96 | type:"backref",
97 | num:Int // Back references index.Correspond to group.num
98 | }
99 |
100 | */
101 |
102 | function exportConstants() {
103 | var code=Object.keys(Constants).map(function (k) {
104 | return k+"="+JSON.stringify(Constants[k]);
105 | }).join(";");
106 | var Global=(function () {
107 | return this;
108 | })();
109 | Global.eval(code);
110 | }
111 | exportConstants();
112 |
113 | function AST(a) {
114 | this.raw=a.raw;
115 | this.tree=a.tree;
116 | this.groupCount=a.groupCount;
117 | }
118 | /**
119 | @param {Function} f Visitor function accept node as one argument.
120 | @param {String} nodeType Give the node type you want to visit,or omitted to visit all
121 | */
122 | AST.prototype.traverse=function (f,nodeType) {
123 | travel(this.tree,f);
124 | function travel(stack,f) {
125 | stack.forEach(function (node) {
126 | if (!nodeType || node.type===nodeType) f(node);
127 | if (node.sub) travel(node.sub,f);
128 | else if (node.branches) node.branches.forEach(function (b) {travel(b,f)});
129 | });
130 | }
131 | };
132 |
133 |
134 | var G_DEBUG;
135 | /**
136 | @param {String} re input regex as string
137 | @param {Object} [options]
138 | @option {Boolean} options.debug If enable debug log
139 | @option {Boolean} options.strict If enable strict mode
140 | @return {Object}
141 | {
142 | raw:String, // original re
143 | groupCount:Int, //Total group count
144 | tree:Array // AST Tree Stack
145 | }
146 | */
147 | function parse(re,_debug) {
148 | G_DEBUG=_debug;
149 | var parser=getNFAParser();
150 |
151 | var ret,stack,lastState;
152 | ret=parser.input(re);
153 | stack=ret.stack;
154 | stack=actions.endChoice(stack); // e.g. /a|b/
155 | lastState=ret.lastState;
156 | var valid=ret.acceptable && ret.lastIndex===re.length-1;//just syntax valid regex
157 | if (!valid) {
158 | var error;
159 | switch (lastState) {
160 | case 'charsetRangeEndWithNullChar':
161 | error={
162 | type:'CharsetRangeEndWithNullChar',
163 | message:"Charset range end with NUL char does not make sense!\n"+
164 | "Because [a-\\0] is not a valid range.\n"+
165 | "And [\\0-\\0] should be rewritten into [\\0].",
166 | };
167 | break;
168 | case 'repeatErrorFinal':
169 | error={
170 | type:'NothingRepeat',
171 | message:"Nothing to repeat!"
172 | };
173 | break;
174 | case 'digitFollowNullError':
175 | error={
176 | type:'DigitFollowNullError',
177 | message:"The '\\0' represents the char and cannot be followed by a decimal digit!"
178 | };
179 | break;
180 | case 'charsetRangeEndClass':
181 | error={
182 | type:'CharsetRangeEndClass',
183 | message:'Charset range ends with class such as "\\w\\W\\d\\D\\s\\S" is invalid!'
184 | };
185 | break;
186 | case 'charsetOctEscape':
187 | error={
188 | type:'DecimalEscape',
189 | message:'Decimal escape appears in charset is invalid.Because it can\'t be explained as backreference.And octal escape is deprecated!'
190 | };
191 | break;
192 | default:
193 | if (lastState.indexOf('charset')===0) {
194 | error={
195 | type:'UnclosedCharset',
196 | message:'Unterminated character class!'
197 | };
198 | } else if (re[ret.lastIndex]===')') {
199 | error={
200 | type:'UnmatchedParen',
201 | message:'Unmatched end parenthesis!'
202 | };
203 | } else {
204 | error={
205 | type:'UnexpectedChar',
206 | message:'Unexpected char!'
207 | }
208 | }
209 | }
210 | if (error) {
211 | error.lastIndex=ret.lastIndex;
212 | error.astStack=ret.stack;
213 | error.lastState=lastState;
214 | throw new RegexSyntaxError(error);
215 | }
216 | }
217 |
218 | if (stack._parentGroup) {
219 | throw new RegexSyntaxError({
220 | type:"UnterminatedGroup",
221 | message:"Unterminated group!",
222 | lastIndex:stack._parentGroup.indices[0],
223 | lastState:lastState,
224 | astStack:stack
225 | });
226 | }
227 |
228 | if (valid) {
229 | var groupCount=stack.groupCounter?stack.groupCounter.i:0;
230 | delete stack.groupCounter;
231 | var ast=new AST({
232 | raw:re,
233 | groupCount:groupCount,
234 | tree:stack
235 | });
236 | _fixNodes(stack,re,re.length);
237 | // Check charset ranges out of order error.(Because of charsetRangeEndEscape)
238 | ast.traverse(_checkCharsetRange,CHARSET_NODE);
239 | // Check any repeats after assertion. e.g. /a(?=b)+/ doesn't make sense.
240 | ast.traverse(_checkRepeat,ASSERT_NODE);
241 | _coalesceExactNode(stack);
242 | G_DEBUG=false;
243 | return ast;
244 | }
245 |
246 |
247 |
248 | }
249 |
250 | parse.Constants=Constants;
251 | parse.exportConstants=exportConstants;
252 | parse.RegexSyntaxError=RegexSyntaxError;
253 | parse.getNFAParser=getNFAParser;
254 |
255 | var _NFAParser;
256 | function getNFAParser() {
257 | if (!_NFAParser) {
258 | _NFAParser=NFA(config,G_DEBUG);
259 | }
260 | return _NFAParser;
261 | }
262 |
263 | function _set(obj,prop,value) {
264 | Object.defineProperty(obj,prop,{
265 | value:value,enumerable:G_DEBUG,writable:true,configurable:true
266 | });
267 | }
268 |
269 | function _coalesceExactNode(stack) {
270 | var prev=stack[0];
271 | for (var i=1,j=1,l=stack.length,node;irange[1]) {
328 | throw new RegexSyntaxError({
329 | type:"OutOfOrder",
330 | lastIndex:range.lastIndex,
331 | message:"Range ["+range.join('-')+"] out of order in character class!"
332 | });
333 | }
334 | return range.join('');
335 | }));
336 | }
337 |
338 | function RegexSyntaxError(e) {
339 | this.name="RegexSyntaxError";
340 | this.type=e.type;
341 | this.lastIndex=e.lastIndex;
342 | this.lastState=e.lastState;
343 | this.astStack=e.astStack;
344 | this.message=e.message;
345 | Object.defineProperty(this,'stack',{
346 | value:new Error(e.message).stack,enumerable:false
347 | });
348 | }
349 | RegexSyntaxError.prototype.toString=function () {
350 | return this.name+' '+this.type+':'+this.message;
351 | };
352 |
353 |
354 |
355 | var escapeCharMap={n:"\n",r:"\r",t:"\t",v:"\v",f:"\f"};
356 |
357 | // All indices' end will be fixed later by stack[i].indices.push(stack[i+1].indices[0])
358 | // All raw string filled later by node.raw=s.slice(node.indices[0],node.indices[1])
359 | // All nodes are unshift to stack, so they're reverse order.
360 | var actions=(function _() {
361 |
362 | function exact(stack,c,i) { //any literal string.
363 | // ExactNode.chars will be filled later (than raw)
364 | // Escape actions and repeat actions will fill node.chars
365 | // node.chars = node.chars || node.raw
366 | var last=stack[0];
367 | if (!last || last.type!=EXACT_NODE || last.repeat || last.chars)
368 | stack.unshift({type:EXACT_NODE, indices:[i]});
369 | }
370 | function dot(stack,c,i) { // /./
371 | stack.unshift({type:DOT_NODE,indices:[i]});
372 | }
373 | function nullChar(stack,c,i) {
374 | c="\0";
375 | actions.exact.apply(this,arguments);
376 | }
377 | function assertBegin(stack,c,i) { // /^/
378 | stack.unshift({
379 | type:ASSERT_NODE,
380 | indices:[i],
381 | assertionType:AssertBegin
382 | });
383 | }
384 | function assertEnd(stack,c,i,state,s) {
385 | stack.unshift({
386 | type:ASSERT_NODE,
387 | indices:[i],
388 | assertionType:AssertEnd
389 | });
390 | }
391 | function assertWordBoundary(stack,c,i) {//\b \B assertion
392 | stack.unshift({
393 | type:ASSERT_NODE,
394 | indices:[i-1],
395 | assertionType: c=='b'?AssertWordBoundary:AssertNonWordBoundary
396 | });
397 | }
398 | function repeatnStart(stack,c,i) { // /a{/
399 | //Treat repeatn as normal exact node,do transfer in repeatnEnd action.
400 | //Because /a{+/ is valid.
401 | var last=stack[0];
402 | if (last.type===EXACT_NODE) {
403 | return;
404 | } else { // '[a-z]{' is valid
405 | stack.unshift({type:EXACT_NODE,indices:[i]});
406 | }
407 | }
408 | function repeatnComma(stack,c,i) { // /a{n,}/
409 | var last=stack[0];
410 | _set(last,'_commaIndex',i);
411 | }
412 | function repeatnEnd(stack,c,i,state,s) { // /a{n,m}/
413 | var last=stack[0],charEndIndex=s.lastIndexOf('{',i);
414 | var min=parseInt(s.slice(charEndIndex+1,last._commaIndex || i),10);
415 | var max;
416 | if (!last._commaIndex) { // /a{n}/
417 | max=min;
418 | } else {
419 | if (last._commaIndex+1==i) { // /a{n,}/
420 | max=Infinity;
421 | } else {
422 | max=parseInt(s.slice(last._commaIndex+1,i),10);
423 | }
424 | if (max < min) {
425 | throw new RegexSyntaxError({
426 | type:"OutOfOrder",lastState:state,
427 | lastIndex:i,astStack:stack,
428 | message:"Numbers out of order in {} quantifier!"
429 | });
430 | }
431 | delete last._commaIndex;
432 | }
433 | if (last.indices[0]===charEndIndex) { // '[a-z]{1,3}'
434 | stack.shift();
435 | }
436 | _repeat(stack,min,max,charEndIndex,s);
437 | }
438 | function repeat0(stack,c,i,state,s) { _repeat(stack,0,Infinity,i,s) } // e.g. /a*/
439 | function repeat01(stack,c,i,state,s) { _repeat(stack,0,1,i,s) } // e.g. /a?/
440 | function repeat1(stack,c,i,state,s) { _repeat(stack,1,Infinity,i,s) } // e.g. /a+/
441 | function _repeat(stack,min,max,charEndIndex,s) {
442 | var last=stack[0],repeat={min:min,max:max,nonGreedy:false},
443 | charIndex=charEndIndex-1;
444 | if (last.chars && last.chars.length===1) charIndex=last.indices[0];
445 | if (last.type===EXACT_NODE) { // exact node only repeat last char
446 | var a={
447 | type:EXACT_NODE,
448 | repeat:repeat,chars:last.chars?last.chars:s[charIndex],
449 | indices:[charIndex]
450 | };
451 | if (last.indices[0]===charIndex) stack.shift(); // e.g. /a{n}/ should be only single node
452 | stack.unshift(a);
453 | } else {
454 | last.repeat=repeat;
455 | }
456 | }
457 | function repeatNonGreedy(stack) { stack[0].repeat.nonGreedy=true}
458 | function normalEscape(stack,c,i) {
459 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c];
460 | stack.unshift({
461 | type:EXACT_NODE,chars:c,indices:[i-1]
462 | });
463 | }
464 | function charClassEscape(stack,c,i) {
465 | stack.unshift({
466 | type:CHARSET_NODE,indices:[i-1],chars:'',ranges:[],
467 | classes:[c],exclude:false
468 | });
469 | }
470 | function hexEscape(stack,c,i,state,s) {
471 | c=String.fromCharCode(parseInt(s[i-1]+c,16));
472 | stack.unshift({
473 | type:EXACT_NODE, chars:c,
474 | indices:[i-3] // \xAA length-1
475 | });
476 | }
477 | function unicodeEscape(stack,c,i,state,s) {
478 | c=String.fromCharCode(parseInt(s.slice(i-3,i+1),16));
479 | stack.unshift({
480 | type:EXACT_NODE, chars:c,
481 | indices:[i-5] // \u5409 length-1
482 | });
483 | }
484 | function groupStart(stack,c,i) {
485 | var counter=(stack.groupCounter=(stack.groupCounter || {i:0}));
486 | counter.i++;
487 | var group={
488 | type:GROUP_NODE,
489 | num: counter.i,
490 | sub:[], indices:[i],
491 | _parentStack:stack // Used to restore current stack when group end,viz. encounters ")"
492 | };
493 | stack=group.sub;
494 | _set(stack,'_parentGroup',group);
495 | stack.groupCounter=counter; //keep groupCounter persist and ref modifiable
496 | return stack;
497 | }
498 | function groupNonCapture(stack) { // /(?:)/\
499 | var group=stack._parentGroup
500 | group.nonCapture=true;
501 | group.num=undefined;
502 | stack.groupCounter.i--;
503 | }
504 | function groupToAssertion(stack,c,i) { // Convert /(?!)/,/(?=)/ to AssertNode
505 | var group=stack._parentGroup;
506 | group.type=ASSERT_NODE;
507 | group.assertionType= c=='=' ? AssertLookahead : AssertNegativeLookahead ;
508 | // Caveat!!! Assertion group no need to capture
509 | group.num=undefined;
510 | stack.groupCounter.i--;
511 | }
512 | function groupEnd(stack,c,i,state,s) {
513 | stack=endChoice(stack); // restore group's stack from choice
514 | var group=stack._parentGroup;
515 | if (!group) {
516 | throw new RegexSyntaxError({
517 | type:'UnexpectedChar',
518 | lastIndex:i,
519 | lastState:state,
520 | astStack:stack,
521 | message:"Unexpected end parenthesis!"
522 | });
523 | }
524 | delete stack._parentGroup; // Be generous,I don't care sparse object performance.
525 | delete stack.groupCounter; // clean
526 | stack=group._parentStack; // restore stack
527 | delete group._parentStack;
528 | stack.unshift(group);
529 | group.endParenIndex=i;
530 | return stack;
531 | }
532 | function choice(stack,c,i) { // encounters "|"
533 | //replace current stack with choices new branch stack
534 | var newStack=[],choice;
535 | if (stack._parentChoice) {
536 | choice=stack._parentChoice;
537 | choice.branches.unshift(newStack);
538 | _set(newStack,'_parentChoice',choice);
539 | _set(newStack,'_parentGroup',choice);
540 | newStack.groupCounter=stack.groupCounter; // keep track
541 | delete stack._parentChoice;
542 | delete stack.groupCounter; // This stack is in choice.branches,so clean it
543 | } else { // "/(a|)/" ,create new ChoiceNode
544 | var first=stack[stack.length-1]; // Because of stack is reverse order
545 | choice={
546 | type:CHOICE_NODE,indices:[(first?first.indices[0]:i-1)],
547 | branches:[]
548 | };
549 | _set(choice,'_parentStack',stack);
550 | choice.branches.unshift(stack.slice()); // contents before "|"
551 | stack.length=0;
552 | /* e.g. "/(a|b)/" is {
553 | type:'group',sub:[
554 | {type:'choice',branches:[
555 | [{type:'exact',chars:'a'}],
556 | [{type:'exact',chars:'b'}]
557 | ]}]}*/
558 | stack.unshift(choice); // must not clean groupCounter
559 |
560 | newStack.groupCounter=stack.groupCounter;
561 | _set(newStack,'_parentChoice',choice);
562 | _set(newStack,'_parentGroup',choice);
563 | choice.branches.unshift(newStack);
564 | }
565 | return newStack;
566 | }
567 | //if current stack is a choice's branch,return the original parent stack
568 | function endChoice(stack) {
569 | if (stack._parentChoice) {
570 | var choice=stack._parentChoice;
571 | delete stack._parentChoice;
572 | delete stack._parentGroup;
573 | delete stack.groupCounter;
574 | var parentStack=choice._parentStack;
575 | delete choice._parentStack;
576 | return parentStack;
577 | }
578 | return stack;
579 | }
580 | function charsetStart(stack,c,i) {
581 | stack.unshift({
582 | type:CHARSET_NODE,indices:[i],
583 | classes:[],ranges:[],chars:''
584 | });
585 | }
586 | function charsetExclude(stack) {stack[0].exclude=true}
587 | function charsetContent(stack,c,i) {stack[0].chars+=c}
588 | function charsetNormalEscape(stack,c,i) {
589 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c];
590 | stack[0].chars+=c;
591 | }
592 | function charsetNullChar(stack,c,i) {
593 | stack[0].chars+="\0";
594 | }
595 | function charsetClassEscape(stack,c) {
596 | stack[0].classes.push(c);
597 | }
598 | function charsetHexEscape(stack,c,i,state,s) {
599 | var last=stack[0];
600 | c=String.fromCharCode(parseInt(last.chars.slice(-1)+c,16));
601 | last.chars=last.chars.slice(0,-2); // also remove "xA"
602 | last.chars+=c;
603 | }
604 | function charsetUnicodeEscape(stack,c,i,state,s) {
605 | var last=stack[0];
606 | c=String.fromCharCode(parseInt(last.chars.slice(-3)+c,16));
607 | last.chars=last.chars.slice(0,-4); //remove "uABC"
608 | last.chars+=c;
609 | }
610 |
611 | function charsetRangeEnd(stack,c,i,state,s) {
612 | var charset=stack[0];
613 | var range=charset.chars.slice(-2);
614 | range=[range[0],c];
615 | range.lastIndex=i;
616 | charset.ranges.push(range);
617 | charset.chars=charset.chars.slice(0,-2);
618 | }
619 | function charsetRangeEndNormalEscape(stack,c) {
620 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c];
621 | charsetRangeEnd.apply(this,arguments);
622 | }
623 | // [\x30-\x78] first repr as {ranges:['\x30','x']}
624 | // [\u0000-\u4567] first repr as {ranges:['\0','u']}
625 | // If escape sequences are valid then replace range end with corrent char
626 | // stack[0].chars did not contain 'u' or 'x'
627 | function charsetRangeEndUnicodeEscape(stack,c,i) {
628 | var charset=stack[0];
629 | var code=charset.chars.slice(-3)+c;
630 | charset.chars=charset.chars.slice(0,-3); // So just remove previous three,no 'u'
631 | var range=charset.ranges.pop();
632 | c=String.fromCharCode(parseInt(code,16));
633 | range=[range[0],c];
634 | range.lastIndex=i;
635 | charset.ranges.push(range);
636 | }
637 | function charsetRangeEndHexEscape(stack,c,i) {
638 | var charset=stack[0];
639 | var code=charset.chars.slice(-1)+c;
640 | charset.chars=charset.chars.slice(0,-1); // last.chars does'nt contain 'x'
641 | var range=charset.ranges.pop();
642 | c=String.fromCharCode(parseInt(code,16));
643 | range=[range[0],c];
644 | range.lastIndex=i;
645 | charset.ranges.push(range);
646 | }
647 |
648 |
649 | /* Caveat!!!
650 | See:https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp
651 | \0 Matches a NUL character. Do not follow this with another digit.
652 | ECMA-262 Standard: 15.10.2.11 DecimalEscape
653 | NOTE
654 | If \ is followed by a decimal number n whose first digit is not 0, then the escape sequence is considered to be
655 | a backreference. It is an error if n is greater than the total number of left capturing parentheses in the entire regular
656 | expression. \0 represents the character and cannot be followed by a decimal digit.
657 |
658 | But in both Chrome and Firefox, /\077/ matches "\077",e.g. String.fromCharCode(parseInt("77",8))
659 | /(g)\1/ matches "gg",it's OK.
660 | But /(g)\14/ matches "g\14","\14" is String.fromCharCode(parseInt("14",8))
661 | And /(g)\1456/ matches "g\145"+"6",/(g)\19/ matches "g\1"+"9". Who knows WTF?
662 | Considering that ECMAScript StrictMode did not support OctEscape,
663 | I'm not going to implement OctEscape.
664 |
665 | I will make it conform the Standard.(Also keep code simple)
666 | */
667 | function backref(stack,c,i,state) {
668 | var last=stack[0],n=parseInt(c,10),
669 | isFirstNum=state==='escape',
670 | counter=stack.groupCounter,
671 | cn=(counter && counter.i) || 0;
672 |
673 | if (!isFirstNum) { //previous node must be backref node
674 | n=parseInt(last.num+""+n,10);
675 | } else {
676 | last={type:BACKREF_NODE,indices:[i-1]};
677 | stack.unshift(last);
678 | }
679 | var rn;
680 | if (n>cn) {
681 | throw new RegexSyntaxError({
682 | type:'InvalidBackReference',lastIndex:i,astStack:stack,lastState:state,
683 | message:'Back reference number('+n+') greater than current groups count('+cn+').'
684 | });
685 | } else if (rn=_isRecursive(n,stack)) {
686 | throw new RegexSyntaxError({
687 | type:'InvalidBackReference',lastIndex:i,astStack:stack,lastState:state,
688 | message:'Recursive back reference in group ('+rn+') itself.'
689 | });
690 | }
691 | last.num=n;
692 |
693 | function _isRecursive(n,stack) {
694 | if (!stack._parentGroup) return false;
695 | if (stack._parentGroup.num==n) return n;
696 | return _isRecursive(n,stack._parentGroup._parentStack);
697 | }
698 | }
699 |
700 | //console.log(K.locals(_));
701 |
702 | return {
703 | exact:exact,dot:dot,nullChar:nullChar,assertBegin:assertBegin,
704 | assertEnd:assertEnd,assertWordBoundary:assertWordBoundary,
705 | repeatnStart:repeatnStart,repeatnComma:repeatnComma,repeatNonGreedy:repeatNonGreedy,
706 | repeatnEnd:repeatnEnd,repeat1:repeat1,repeat01:repeat01,repeat0:repeat0,
707 | charClassEscape:charClassEscape,normalEscape:normalEscape,
708 | unicodeEscape:unicodeEscape,hexEscape:hexEscape,charClassEscape:charClassEscape,
709 | groupStart:groupStart,groupNonCapture:groupNonCapture,backref:backref,
710 | groupToAssertion:groupToAssertion,groupEnd:groupEnd,
711 | choice:choice,endChoice:endChoice,
712 | charsetStart:charsetStart,charsetExclude:charsetExclude,
713 | charsetContent:charsetContent,charsetNullChar:charsetNullChar,
714 | charsetClassEscape:charsetClassEscape,
715 | charsetHexEscape:charsetHexEscape,
716 | charsetUnicodeEscape:charsetUnicodeEscape,
717 | charsetRangeEnd:charsetRangeEnd,charsetNormalEscape:charsetNormalEscape,
718 | charsetRangeEndNormalEscape:charsetRangeEndNormalEscape,
719 | charsetRangeEndUnicodeEscape:charsetRangeEndUnicodeEscape,
720 | charsetRangeEndHexEscape:charsetRangeEndHexEscape
721 | };
722 |
723 | })();
724 |
725 | var digit='0-9';
726 | var hexDigit='0-9a-fA-F';
727 |
728 | //EX,It is an exclusive charset
729 | var exactEXCharset='^+*?^$.|(){[\\';
730 |
731 | var charClassEscape='dDwWsS';
732 | var unicodeEscape='u';
733 | var hexEscape='x';
734 | //var octDigit='0-7';
735 | //var octEscape='0-7'; Never TODO. JavaScript doesn't support string OctEscape in strict mode.
736 |
737 | // In charset,\b\B means "\b","\B",not word boundary
738 | // NULL Escape followed digit should throw error
739 | var normalEscapeInCharsetEX='^'+charClassEscape+unicodeEscape+hexEscape+'0-9';
740 |
741 | // 'rntvf\\' escape ,others return raw
742 | // Also need exclude \b\B assertion and backref
743 | var normalEscapeEX=normalEscapeInCharsetEX+'bB1-9';
744 |
745 | //var controlEscape;//Never TODO.Same reason as OctEscape.
746 |
747 |
748 | var repeatnStates='repeatnStart,repeatn_1,repeatn_2,repeatnErrorStart,repeatnError_1,repeatnError_2';
749 | var hexEscapeStates='hexEscape1,hexEscape2';
750 | var unicodeEscapeStates='unicodeEscape1,unicodeEscape2,unicodeEscape3,unicodeEscape4';
751 |
752 | var allHexEscapeStates=hexEscapeStates+','+unicodeEscapeStates;
753 |
754 | var charsetIncompleteEscapeStates='charsetUnicodeEscape1,charsetUnicodeEscape2,charsetUnicodeEscape3,charsetUnicodeEscape4,charsetHexEscape1,charsetHexEscape2';
755 |
756 | // [a-\u1z] means [a-u1z], [a-\u-z] means [-za-u]
757 | // [a-\u0-9] means [a-u0-9]. WTF!
758 | var charsetRangeEndIncompleteEscapeFirstStates='charsetRangeEndUnicodeEscape1,charsetRangeEndHexEscape1';
759 |
760 | var charsetRangeEndIncompleteEscapeRemainStates='charsetRangeEndUnicodeEscape2,charsetRangeEndUnicodeEscape3,charsetRangeEndUnicodeEscape4,charsetRangeEndHexEscape2';
761 |
762 | var charsetRangeEndIncompleteEscapeStates=charsetRangeEndIncompleteEscapeFirstStates+','+charsetRangeEndIncompleteEscapeRemainStates;
763 |
764 | var config={
765 | compact:true,
766 | accepts:'start,begin,end,repeat0,repeat1,exact,repeatn,repeat01,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates),
767 | trans:[
768 | ['start,begin,end,exact,repeatNonGreedy,repeat0,repeat1,repeat01,groupStart,groupQualifiedStart,choice,repeatn>exact',exactEXCharset,actions.exact],
769 | // e.g. /\u54/ means /u54/
770 | [allHexEscapeStates+'>exact',exactEXCharset+hexDigit,actions.exact],
771 | // e.g. /\0abc/ is exact "\0abc",but /\012/ is an error
772 | ['nullChar>exact',exactEXCharset+digit,actions.exact],
773 | //[(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>exact',exactEXCharset+'']
774 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+',start,begin,end,exact,repeatNonGreedy,repeat0,repeat1,repeat01,groupStart,groupQualifiedStart,choice,repeatn>exact','.',actions.dot],
775 | ['start,groupStart,groupQualifiedStart,end,begin,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates+'>begin','^',actions.assertBegin],
776 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+',exact>repeatnStart','{',actions.repeatnStart],
777 | ['start,begin,end,groupQualifiedStart,groupStart,repeat0,repeat1,repeatn,repeat01,repeatNonGreedy,choice>repeatnErrorStart','{',actions.exact],//No repeat,treat as exact char e.g. /{/,/^{/,/a|{/
778 | ['repeatnStart>repeatn_1',digit,actions.exact], // Now maybe /a{1/
779 | ['repeatn_1>repeatn_1',digit,actions.exact], // Could be /a{11/
780 | ['repeatn_1>repeatn_2',',',actions.repeatnComma], // Now maybe /a{1,/
781 | ['repeatn_2>repeatn_2',digit,actions.exact], // Now maybe /a{1,3/
782 | ['repeatn_1,repeatn_2>repeatn','}',actions.repeatnEnd], //Totally end /a{1,3}/
783 | //Repeat treat as exact chars
784 | ['repeatnStart,repeatnErrorStart>exact','}',actions.exact], // e.g. /{}/,/a{}/
785 | //Add exclusion 0-9 and "}", e.g. /a{a/,/a{,/ are valid exact match
786 | ['repeatnStart,repeatnErrorStart>exact',exactEXCharset+'0-9}',actions.exact],
787 |
788 | // "/{}/" is valid exact match but /{1,2}/ is error repeat.
789 | // So must track it with states repeatnError_1,repeatnError_2
790 | ['repeatnErrorStart>repeatnError_1',digit,actions.exact],
791 | ['repeatnError_1>repeatnError_1',digit,actions.exact],
792 | ['repeatnError_1>repeatnError_2',',',actions.exact],
793 | ['repeatnError_2>repeatnError_2',digit,actions.exact],
794 | // repeatErrorFinal is an unacceptable state. Nothing to repeat error should be throwed
795 | ['repeatnError_2,repeatnError_1>repeatErrorFinal','}'],
796 |
797 | // "/a{2a/" and "/{2a/" are valid exact match
798 | ['repeatn_1,repeatnError_1>exact',exactEXCharset+digit+',}',actions.exact],
799 | // "/a{2,a/" and "/{3,a" are valid
800 | ['repeatn_2,repeatnError_2>exact',exactEXCharset+digit+'}',actions.exact],
801 |
802 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat0','*',actions.repeat0],
803 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat1','+',actions.repeat1],
804 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat01','?',actions.repeat01],
805 | ['choice>repeatErrorFinal','*+?'],
806 | ['repeat0,repeat1,repeat01,repeatn>repeatNonGreedy','?',actions.repeatNonGreedy],
807 | ['repeat0,repeat1,repeat01,repeatn>repeatErrorFinal','+*'],
808 |
809 | // Escape
810 | ['start,begin,end,groupStart,groupQualifiedStart,exact,repeatNonGreedy,repeat0,repeat1,repeat01,repeatn,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>escape','\\'],
811 | ['escape>nullChar','0',actions.nullChar],
812 | ['nullChar>digitFollowNullError','0-9'], // "/\0123/" is invalid in standard
813 | ['escape>exact',normalEscapeEX,actions.normalEscape],
814 | ['escape>exact','bB',actions.assertWordBoundary],
815 | ['escape>exact',charClassEscape,actions.charClassEscape],
816 | ['escape>unicodeEscape1',unicodeEscape,actions.exact],
817 | ['unicodeEscape1>unicodeEscape2',hexDigit,actions.exact],
818 | ['unicodeEscape2>unicodeEscape3',hexDigit,actions.exact],
819 | ['unicodeEscape3>unicodeEscape4',hexDigit,actions.exact],
820 | ['unicodeEscape4>exact',hexDigit,actions.unicodeEscape],
821 | ['escape>hexEscape1',hexEscape,actions.exact],
822 | ['hexEscape1>hexEscape2',hexDigit,actions.exact],
823 | ['hexEscape2>exact',hexDigit,actions.hexEscape],
824 |
825 | ['escape>digitBackref','1-9',actions.backref],
826 | ['digitBackref>digitBackref',digit,actions.backref],
827 | ['digitBackref>exact',exactEXCharset+digit,actions.exact],
828 |
829 | // Group start
830 | ['exact,begin,end,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,start,groupStart,groupQualifiedStart,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>groupStart','(',actions.groupStart],
831 | ['groupStart>groupQualify','?'],
832 | ['groupQualify>groupQualifiedStart',':',actions.groupNonCapture],//group non-capturing
833 | ['groupQualify>groupQualifiedStart','=',actions.groupToAssertion],//group positive lookahead
834 | ['groupQualify>groupQualifiedStart','!',actions.groupToAssertion],//group negative lookahead
835 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'groupStart,groupQualifiedStart,end,exact,repeat1,repeat0,repeat01,repeatn,repeatNonGreedy,choice>exact',')',actions.groupEnd],//group end
836 |
837 | //choice
838 | ['start,begin,end,groupStart,groupQualifiedStart,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>choice','|', actions.choice],
839 |
840 | ['start,groupStart,groupQualifiedStart,begin,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>end','$',actions.assertEnd],
841 |
842 | // Charset [HA-HO]
843 | ['exact,begin,end,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,groupQualifiedStart,groupStart,start,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>charsetStart','[',actions.charsetStart],
844 | ['charsetStart>charsetExclude','^',actions.charsetExclude],
845 | ['charsetStart>charsetContent','^\\]^',actions.charsetContent],
846 | ['charsetExclude>charsetContent','^\\]',actions.charsetContent], // "[^^]" is valid
847 | ['charsetContent,charsetClass>charsetContent','^\\]-',actions.charsetContent],
848 | ['charsetClass>charsetContent','-',actions.charsetContent],
849 |
850 |
851 | // Charset Escape
852 | [charsetIncompleteEscapeStates+
853 | ',charsetStart,charsetContent,charsetClass,charsetExclude,charsetRangeEnd>charsetEscape','\\'],
854 | ['charsetEscape>charsetContent',normalEscapeInCharsetEX,actions.charsetNormalEscape],
855 | ['charsetEscape>charsetNullChar','0',actions.charsetNullChar],
856 |
857 | //Didn't allow oct escape
858 | ['charsetEscape>charsetOctEscape','1-9'],
859 | ['charsetRangeEndEscape>charsetOctEscape','1-9'],
860 | //Treat /[\012]/ as an error
861 | ['charsetNullChar>digitFollowNullError',digit],
862 | // Only null char not followed by digit is valid
863 | ['charsetNullChar>charsetContent','^0-9\\]-',actions.charsetContent],
864 |
865 | // charsetClass state should diff from charsetContent
866 | // Because /[\s-a]/ means /[-a\s]/
867 | ['charsetEscape>charsetClass',charClassEscape,actions.charsetClassEscape],
868 |
869 | ['charsetEscape>charsetUnicodeEscape1',unicodeEscape,actions.charsetContent],
870 | ['charsetUnicodeEscape1>charsetUnicodeEscape2',hexDigit,actions.charsetContent],
871 | ['charsetUnicodeEscape2>charsetUnicodeEscape3',hexDigit,actions.charsetContent],
872 | ['charsetUnicodeEscape3>charsetUnicodeEscape4',hexDigit,actions.charsetContent],
873 | ['charsetUnicodeEscape4>charsetContent',hexDigit,actions.charsetUnicodeEscape],
874 | ['charsetEscape>charsetHexEscape1',hexEscape,actions.charsetContent],
875 | ['charsetHexEscape1>charsetHexEscape2',hexDigit,actions.charsetContent],
876 | ['charsetHexEscape2>charsetContent',hexDigit,actions.charsetHexEscape],
877 |
878 | // [a\u54-9] should be treat as [4-9au5]
879 | [charsetIncompleteEscapeStates+'>charsetContent','^\\]'+hexDigit+'-',actions.charsetContent],
880 |
881 | [charsetIncompleteEscapeStates+',charsetNullChar,charsetContent>charsetRangeStart','-',actions.charsetContent],
882 | ['charsetRangeStart>charsetRangeEnd','^\\]',actions.charsetRangeEnd],
883 | ['charsetRangeEnd>charsetContent','^\\]',actions.charsetContent],
884 |
885 |
886 | // Some troubles here, [0-\x39] means [0-9]
887 | ['charsetRangeStart>charsetRangeEndEscape','\\'],
888 | ['charsetRangeEndEscape>charsetRangeEnd',normalEscapeEX,actions.charsetRangeEndNormalEscape],
889 | // No need to care [a-\0],it is not a valid range so will throw OutOfOrder error.
890 | // But what about [\0-\0]? Insane!
891 | ['charsetRangeEndEscape>charsetRangeEndWithNullChar','0'],
892 |
893 | ['charsetRangeEndEscape>charsetRangeEndUnicodeEscape1',unicodeEscape,actions.charsetRangeEnd],
894 | ['charsetRangeEndUnicodeEscape1>charsetRangeEndUnicodeEscape2',hexDigit,actions.charsetContent],
895 | ['charsetRangeEndUnicodeEscape2>charsetRangeEndUnicodeEscape3',hexDigit,actions.charsetContent],
896 | ['charsetRangeEndUnicodeEscape3>charsetRangeEndUnicodeEscape4',hexDigit,actions.charsetContent],
897 | ['charsetRangeEndUnicodeEscape4>charsetRangeEnd',hexDigit,actions.charsetRangeEndUnicodeEscape],
898 | ['charsetRangeEndEscape>charsetRangeEndHexEscape1',hexEscape,actions.charsetRangeEnd],
899 | ['charsetRangeEndHexEscape1>charsetRangeEndHexEscape2',hexDigit,actions.charsetContent],
900 | ['charsetRangeEndHexEscape2>charsetRangeEnd',hexDigit,actions.charsetRangeEndHexEscape],
901 | // [0-\w] means [-0\w]? Should throw error!
902 | ['charsetRangeEndEscape>charsetRangeEndClass',charClassEscape],
903 |
904 | // [a-\uz] means [za-u],[a-\u-z] means [-za-u]
905 | [charsetRangeEndIncompleteEscapeFirstStates+'>charsetContent','^\\]'+hexDigit,actions.charsetContent],
906 |
907 | // [a-\u0-9] means [0-9a-u]
908 | [charsetRangeEndIncompleteEscapeRemainStates+'>charsetRangeStart','-',actions.charsetContent],
909 | [charsetIncompleteEscapeStates+','
910 | +charsetRangeEndIncompleteEscapeStates
911 | +',charsetNullChar,charsetRangeStart,charsetContent'
912 | +',charsetClass,charsetExclude,charsetRangeEnd>exact',
913 | ']']
914 | ]
915 | };
916 |
917 |
918 | return parse;
919 | });
920 |
--------------------------------------------------------------------------------
/tests/testData.js:
--------------------------------------------------------------------------------
1 | if (typeof define !== 'function') var define = require('amdefine')(module);
2 | define(function() {
3 | function str(v) {
4 | return (typeof v === 'string') ? v : v.source
5 | }
6 |
7 | var reMatchCases=[
8 | //[RegExp,input:String]
9 | [/abc/,'abc'],
10 | [/abc/i,'ABC'],
11 | [/Abc/i,'aBC'],
12 | [/^abc$/,'abcdef'],
13 | [/^Abc$/im,'def\nabc\ndef'],
14 | [/[a-z]{3}/,'--abc--'],
15 | [/[^A-H]/i,'abchijk'],
16 | [/[A-H]+/,'AAAA'],
17 | [/[A-H]+?/,'AAAA'],
18 | [/\w\d\s/,'A1 '],
19 | [/(\w|\d|\s)+/,'A1 B2\n'],
20 | [/[\w\d\s]+/,'A1 B2\r'],
21 | [/[\W\D\S]+/,'+-&*'],
22 | [/[^\W\D\S]+/,'+-&*'],
23 | [/(\d+|^a)$/,'def123'],
24 | [/(\d+|^a)$/,'a'],
25 | [/([a-z]{3}|\d+$)+/,'abc'],
26 | [/([a-z]{3}|\d+$)+/,'123'],
27 | [/^([a-zA-Z0-9])(([-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$/,'alan.dot@jackson.com'],
28 | [/\d+(?=ab)/,'123-456ab'],
29 | [/\d*(?=ab)/,'ab-456ab'],
30 | [/\d*?(?=ab)/,'ab-456ab'],
31 | [/https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)/,'http://www.google.com'],
32 | [/https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)/,'https://www.google.com'],
33 | [/^https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)$/,'http://www.google.com'],
34 | [/^https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)$/,'https://www.google.com/'],
35 | [/<(\w+)\s\w+="(.+?)">(.*?)<\/\1>/,'abc
'],
36 | [/abc(\d+)1{2,}?\1def/,'abc12311123def'],
37 | [/(\w+)+\1+/,'abc123abc123!'],
38 | [/((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)/,
39 | ['127.0.0.1','255.255.255.0','192.168.11.12']
40 | ],
41 | [/\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/,'barzar-hall@ruby-lang.com'],
42 | [/\d{4}-\d{1,2}-\d{1,2}/,'1990-10-1'],
43 | [/\b((?!abc)\w)+\b/,'babcue'],
44 | [/\b((?!abc)\w)+\b/,'babbcue'],
45 | [/^\w{1,15}(?:@(?!-))(?:(?:[a-z0-9-]*)(?:[a-z0-9](?!-))(?:\.(?!-)))+[a-z]{2,4}$/,
46 | [
47 | 'abc@def.com',
48 | 'jelly_bean@google.com.hk',
49 | 'snow_bear@snow-bear.com.cn',
50 | 'i@jex.im',
51 | 'i@jex-cn.com.im',
52 | 'i@jex-cn.bear',
53 | 'i@123cn.bear',
54 | 'dollar@cn.com',
55 | 'dollar@-cn.com',
56 | 'dollar@cn-.com',
57 | 'snow.bear@bear.com'
58 | ]
59 | ],
60 | [/^(a?b)?[a-z]+X?$/,['bb','abb','bbX']],
61 | [
62 | new RegExp('http://([\\w-]+\\.)+[\\w-]+(/[\\w- ./?%&=]*)?'),
63 | ['http://jex.im/','http://163.com','https://github.com/JexCheng/regulex']
64 | ],
65 | [
66 | /^<([a-z]+)([^<]+)*(?:>(.*)<\/\1>|\s+\/>)$/ ,
67 | ['','![]()
']
68 | ]
69 |
70 | ];
71 |
72 |
73 | var expectedPass = [
74 | /[^<]+|<(!(--([^-]-([^-][^-]-)->?)?|\[CDATA\[([^]]]([^]]+])]+([^]>][^]]]([^]]+])]+)>)?|DOCTYPE([ \n\t\r]+([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])|"[^"]"|'[^']'))([ \n\t\r]+)?(\[(<(!(--[^-]-([^-][^-]-)->|[^-]([^]"'><]+|"[^"]"|'[^']')>)|\?([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])(\?>|[\n\r\t ][^?]\?+([^>?][^?]\?+)>))|%([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F]);|[ \n\t\r]+)]([ \n\t\r]+)?)?>?)?)?|\?(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])(\?>|[\n\r\t ][^?]\?+([^>?][^?]\?+)>)?)?|\/(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+)?>?)?|(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+)?=([ \n\t\r]+)?("[^<"]"|'[^<']'))*([ \n\t\r]+)?\/?>?)?)/,
75 |
76 | 'ab+(1|0)?[a-z][^0-9]',
77 | /[\0-\n]/,
78 | '/abc/',
79 | '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]',
80 | '[abc-\\u540-\\x69]',
81 | "^abc+d*e+?\\?[\\n-\\rbcd]{3,110}?(?:(a|b)+|(d|[e-z]?(?!abc)))$",
82 | "aa+b*?c{0PP{,{10}ab+?",
83 | "abc(d|e)f(c(a|(?:a|b|[a-z]|a(?=def)))|b|)",
84 | "abc+abc",
85 | "abc*abc",
86 | "ab+\\+c*abc",
87 | "ab[abc]+",
88 | "ab[abc-d]+",
89 | "ab[^abc-d]*",
90 | "ab[^c-d]*",
91 | "ab[[]*",
92 | "ab[\\]]*",
93 | "ab[\\]-a]*",
94 | "ab[^]*",
95 | "ab[-]*",
96 | "ab[a-]*",
97 | "ab[-b]*",
98 | "ab[[]",
99 | "]",
100 | "[a-z0-1]",
101 | "[a-z-b]",
102 | "(abc(def)+(a)((a),(b),(c,(d))))",
103 | "([a-z]+,[abc]444,[^a-b])+,(a(t)o(a[0-1]+b,(a[0-1]+)) )",
104 | '[a-zA-z]+://[^\\s]*',
105 | '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)',
106 | '\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*',
107 | '[a-zA-z]{}://[^\\s]*?',
108 | 'a{1,2}{}',
109 | 'a{1,2}{1,2,4}',
110 | 'a{1,2}{{4}',
111 | 'a+{1,{4}',
112 | 'a+{1a}',
113 | 'a+{1|3}',
114 | 'a+{1\\}',
115 | 'a+{\\}',
116 | 'a+{34,45{}',
117 | '{}{4}{5',
118 | '}{4}{5',
119 | '{{4}{5(a|b)}',
120 | '{{4}{5[a-z]}',
121 | '{{4}{[0-9]}',
122 | /((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)/,
123 | /\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/,
124 | /[1-9]\d{4,}/,
125 | /<(.*)(.*)>.*<\/\1>|<(.*) \/>/,
126 | /(?=^.{8,}$)(?=.*\d)(?=.*\W+)(?=.*[A-Z])(?=.*[a-z])(?!.*\n).*$/,
127 | /(\d{4}|\d{2})-((1[0-2])|(0?[1-9]))-(([12][0-9])|(3[01])|(0?[1-9]))/,
128 | /((1[0-2])|(0?[1-9]))\/(([12][0-9])|(3[01])|(0?[1-9]))\/(\d{4}|\d{2})/,
129 | /((1|0?)[0-9]|2[0-3]):([0-5][0-9])/,
130 | /[\u4e00-\u9fa5]/,
131 | /[\u3000-\u301e\ufe10-\ufe19\ufe30-\ufe44\ufe50-\ufe6b\uff01-\uffee]/,
132 | /(\d{4}-|\d{3}-)?(\d{8}|\d{7})/,
133 | /1\d{10}/,
134 | /[1-9]\d{5}/,
135 | /\d{15}(\d\d[0-9xX])?/,
136 | /\d+/,
137 | /[0-9]*[1-9][0-9]*/,
138 | /-[0-9]*[1-9][0-9]*/,
139 | /-?\d+/,
140 | '[a-b](a|b)+{4,5def',
141 | /(-?\d+)(\.\d+)?$\nabc/,
142 | /\b((?!abc)\w)+\b/,
143 | 'a(?=b){4,'
144 | ].map(str);
145 |
146 | var expectedFail = [
147 | 'a(?=b)+','a(?=b)?','a(?=b){4}',
148 | '{}{4}{5}', '[a-b][z-a]{2,6}',
149 | '[z-\\n]',
150 | '[a-zA-z]+{3}',
151 | 'abc{3,7}+',
152 | 'a?{1,2}',
153 | 'a+{1,2}',
154 | 'a*{1,2}',
155 | 'a{1}{1,2}',
156 | 'a{1,4}{1,2}',
157 | "abc(def,([a-z],[0-6],([0-5]def),aaa)",
158 | "ab[abc",
159 | "abc*+abc",
160 | "ab++c*abc",
161 | "\\",
162 | 'abc{42,13}'
163 | ].map(str);
164 |
165 | var re2ast =[{
166 | raw: 'ab+(1|0)?[a-z][^0-9]a\\nb\\rc\\td',
167 | groupCount: 1,
168 | tree: [{
169 | type: 'exact',
170 | indices: [0, 1],
171 | raw: 'a',
172 | chars: 'a'
173 | }, {
174 | type: 'exact',
175 | repeat: {
176 | min: 1,
177 | max: Infinity,
178 | nonGreedy: false
179 | },
180 | chars: 'b',
181 | indices: [1, 3],
182 | raw: 'b+'
183 | }, {
184 | type: 'group',
185 | num: 1,
186 | sub: [{
187 | type: 'choice',
188 | indices: [4, 7],
189 | branches: [
190 | [{
191 | type: 'exact',
192 | indices: [4, 5],
193 | raw: '1',
194 | chars: '1'
195 | }],
196 | [{
197 | type: 'exact',
198 | indices: [6, 7],
199 | raw: '0',
200 | chars: '0'
201 | }]
202 | ],
203 | raw: '1|0'
204 | }],
205 | indices: [3, 9],
206 | endParenIndex: 7,
207 | repeat: {
208 | min: 0,
209 | max: 1,
210 | nonGreedy: false
211 | },
212 | raw: '(1|0)?'
213 | }, {
214 | type: 'charset',
215 | indices: [9, 14],
216 | classes: [],
217 | ranges: ['az'],
218 | chars: '',
219 | raw: '[a-z]'
220 | }, {
221 | type: 'charset',
222 | indices: [14, 20],
223 | classes: [],
224 | ranges: ['09'],
225 | chars: '',
226 | exclude: true,
227 | raw: '[^0-9]'
228 | },{
229 | type: 'exact',
230 | raw:'a\\nb\\rc\\td',
231 | chars:'a\nb\rc\td',
232 | indices:[20,30]
233 | }]
234 | }, {
235 | raw: '[\\0-\\n]',
236 | groupCount: 0,
237 | tree: [{
238 | type: 'charset',
239 | indices: [0, 7],
240 | classes: [],
241 | ranges: ['\u0000\n'],
242 | chars: '',
243 | raw: '[\\0-\\n]'
244 | }]
245 | }, {
246 | raw: '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]',
247 | groupCount: 0,
248 | tree: [{
249 | type: 'charset',
250 | indices: [0, 37],
251 | classes: ['w'],
252 | ranges: ['0u', '5h', 'az', 'z吉'],
253 | chars: 'abcdef54',
254 | raw: '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]'
255 | }]
256 | }, {
257 | raw: '[abc-\\u540-\\x69]',
258 | groupCount: 0,
259 | tree: [{
260 | type: 'charset',
261 | indices: [0, 16],
262 | classes: [],
263 | ranges: ['0i', 'cu'],
264 | chars: 'ab54',
265 | raw: '[abc-\\u540-\\x69]'
266 | }]
267 | }, {
268 | raw: '^abc+d*e+?\\?[\\n-\\rbcd]{3,110}?(?:(a|b)+|(d|[e-z]?(?!abc)))$',
269 | groupCount: 2,
270 | tree: [{
271 | type: 'assert',
272 | indices: [0, 1],
273 | assertionType: 'AssertBegin',
274 | raw: '^'
275 | }, {
276 | type: 'exact',
277 | indices: [1, 3],
278 | raw: 'ab',
279 | chars: 'ab'
280 | }, {
281 | type: 'exact',
282 | repeat: {
283 | min: 1,
284 | max: Infinity,
285 | nonGreedy: false
286 | },
287 | chars: 'c',
288 | indices: [3, 5],
289 | raw: 'c+'
290 | }, {
291 | type: 'exact',
292 | repeat: {
293 | min: 0,
294 | max: Infinity,
295 | nonGreedy: false
296 | },
297 | chars: 'd',
298 | indices: [5, 7],
299 | raw: 'd*'
300 | }, {
301 | type: 'exact',
302 | repeat: {
303 | min: 1,
304 | max: Infinity,
305 | nonGreedy: true
306 | },
307 | chars: 'e',
308 | indices: [7, 10],
309 | raw: 'e+?'
310 | }, {
311 | type: 'exact',
312 | chars: '?',
313 | indices: [10, 12],
314 | raw: '\\?'
315 | }, {
316 | type: 'charset',
317 | indices: [12, 30],
318 | classes: [],
319 | ranges: ['\n\r'],
320 | chars: 'bcd',
321 | repeat: {
322 | min: 3,
323 | max: 110,
324 | nonGreedy: true
325 | },
326 | raw: '[\\n-\\rbcd]{3,110}?'
327 | }, {
328 | type: 'group',
329 | num: undefined,
330 | sub: [{
331 | type: 'choice',
332 | indices: [33, 57],
333 | branches: [
334 | [{
335 | type: 'group',
336 | num: 1,
337 | sub: [{
338 | type: 'choice',
339 | indices: [34, 37],
340 | branches: [
341 | [{
342 | type: 'exact',
343 | indices: [34, 35],
344 | raw: 'a',
345 | chars: 'a'
346 | }],
347 | [{
348 | type: 'exact',
349 | indices: [36, 37],
350 | raw: 'b',
351 | chars: 'b'
352 | }]
353 | ],
354 | raw: 'a|b'
355 | }],
356 | indices: [33, 39],
357 | endParenIndex: 37,
358 | repeat: {
359 | min: 1,
360 | max: Infinity,
361 | nonGreedy: false
362 | },
363 | raw: '(a|b)+'
364 | }],
365 | [{
366 | type: 'group',
367 | num: 2,
368 | sub: [{
369 | type: 'choice',
370 | indices: [41, 56],
371 | branches: [
372 | [{
373 | type: 'exact',
374 | indices: [41, 42],
375 | raw: 'd',
376 | chars: 'd'
377 | }],
378 | [{
379 | type: 'charset',
380 | indices: [43, 49],
381 | classes: [],
382 | ranges: ['ez'],
383 | chars: '',
384 | repeat: {
385 | min: 0,
386 | max: 1,
387 | nonGreedy: false
388 | },
389 | raw: '[e-z]?'
390 | }, {
391 | type: 'assert',
392 | num: undefined,
393 | sub: [{
394 | type: 'exact',
395 | indices: [52, 55],
396 | raw: 'abc',
397 | chars: 'abc'
398 | }],
399 | indices: [49, 56],
400 | assertionType: 'AssertNegativeLookahead',
401 | endParenIndex: 55,
402 | raw: '(?!abc)'
403 | }]
404 | ],
405 | raw: 'd|[e-z]?(?!abc)'
406 | }],
407 | indices: [40, 57],
408 | endParenIndex: 56,
409 | raw: '(d|[e-z]?(?!abc))'
410 | }]
411 | ],
412 | raw: '(a|b)+|(d|[e-z]?(?!abc))'
413 | }],
414 | indices: [30, 58],
415 | nonCapture: true,
416 | endParenIndex: 57,
417 | raw: '(?:(a|b)+|(d|[e-z]?(?!abc)))'
418 | }, {
419 | type: 'assert',
420 | indices: [58, 59],
421 | assertionType: 'AssertEnd',
422 | raw: '$'
423 | }]
424 | }, {
425 | raw: 'aa+b*?c{0PP{,{10}ab+?',
426 | groupCount: 0,
427 | tree: [{
428 | type: 'exact',
429 | indices: [0, 1],
430 | raw: 'a',
431 | chars: 'a'
432 | }, {
433 | type: 'exact',
434 | repeat: {
435 | min: 1,
436 | max: Infinity,
437 | nonGreedy: false
438 | },
439 | chars: 'a',
440 | indices: [1, 3],
441 | raw: 'a+'
442 | }, {
443 | type: 'exact',
444 | repeat: {
445 | min: 0,
446 | max: Infinity,
447 | nonGreedy: true
448 | },
449 | chars: 'b',
450 | indices: [3, 6],
451 | raw: 'b*?'
452 | }, {
453 | type: 'exact',
454 | indices: [6, 12],
455 | raw: 'c{0PP{',
456 | chars: 'c{0PP{'
457 | }, {
458 | type: 'exact',
459 | repeat: {
460 | min: 10,
461 | max: 10,
462 | nonGreedy: false
463 | },
464 | chars: ',',
465 | indices: [12, 17],
466 | raw: ',{10}'
467 | }, {
468 | type: 'exact',
469 | indices: [17, 18],
470 | raw: 'a',
471 | chars: 'a'
472 | }, {
473 | type: 'exact',
474 | repeat: {
475 | min: 1,
476 | max: Infinity,
477 | nonGreedy: true
478 | },
479 | chars: 'b',
480 | indices: [18, 21],
481 | raw: 'b+?'
482 | }]
483 | }, {
484 | raw: 'ab[\\]-a]*',
485 | groupCount: 0,
486 | tree: [{
487 | type: 'exact',
488 | indices: [0, 2],
489 | raw: 'ab',
490 | chars: 'ab'
491 | }, {
492 | type: 'charset',
493 | indices: [2, 9],
494 | classes: [],
495 | ranges: [']a'],
496 | chars: '',
497 | repeat: {
498 | min: 0,
499 | max: Infinity,
500 | nonGreedy: false
501 | },
502 | raw: '[\\]-a]*'
503 | }]
504 | }, {
505 | raw: 'ab[^]*',
506 | groupCount: 0,
507 | tree: [{
508 | type: 'exact',
509 | indices: [0, 2],
510 | raw: 'ab',
511 | chars: 'ab'
512 | }, {
513 | type: 'charset',
514 | indices: [2, 6],
515 | classes: [],
516 | ranges: [],
517 | chars: '',
518 | exclude: true,
519 | repeat: {
520 | min: 0,
521 | max: Infinity,
522 | nonGreedy: false
523 | },
524 | raw: '[^]*'
525 | }]
526 | }, {
527 | raw: 'ab[-]*',
528 | groupCount: 0,
529 | tree: [{
530 | type: 'exact',
531 | indices: [0, 2],
532 | raw: 'ab',
533 | chars: 'ab'
534 | }, {
535 | type: 'charset',
536 | indices: [2, 6],
537 | classes: [],
538 | ranges: [],
539 | chars: '-',
540 | repeat: {
541 | min: 0,
542 | max: Infinity,
543 | nonGreedy: false
544 | },
545 | raw: '[-]*'
546 | }]
547 | }, {
548 | raw: 'ab[a-]*',
549 | groupCount: 0,
550 | tree: [{
551 | type: 'exact',
552 | indices: [0, 2],
553 | raw: 'ab',
554 | chars: 'ab'
555 | }, {
556 | type: 'charset',
557 | indices: [2, 7],
558 | classes: [],
559 | ranges: [],
560 | chars: 'a-',
561 | repeat: {
562 | min: 0,
563 | max: Infinity,
564 | nonGreedy: false
565 | },
566 | raw: '[a-]*'
567 | }]
568 | }, {
569 | raw: '[a-z-b]',
570 | groupCount: 0,
571 | tree: [{
572 | type: 'charset',
573 | indices: [0, 7],
574 | classes: [],
575 | ranges: ['az'],
576 | chars: '-b',
577 | raw: '[a-z-b]'
578 | }]
579 | }, {
580 | raw: '(abc(def)+(a)((a),(b),(c,(d))))',
581 | groupCount: 8,
582 | tree: [{
583 | type: 'group',
584 | num: 1,
585 | sub: [{
586 | type: 'exact',
587 | indices: [1, 4],
588 | raw: 'abc',
589 | chars: 'abc'
590 | }, {
591 | type: 'group',
592 | num: 2,
593 | sub: [{
594 | type: 'exact',
595 | indices: [5, 8],
596 | raw: 'def',
597 | chars: 'def'
598 | }],
599 | indices: [4, 10],
600 | endParenIndex: 8,
601 | repeat: {
602 | min: 1,
603 | max: Infinity,
604 | nonGreedy: false
605 | },
606 | raw: '(def)+'
607 | }, {
608 | type: 'group',
609 | num: 3,
610 | sub: [{
611 | type: 'exact',
612 | indices: [11, 12],
613 | raw: 'a',
614 | chars: 'a'
615 | }],
616 | indices: [10, 13],
617 | endParenIndex: 12,
618 | raw: '(a)'
619 | }, {
620 | type: 'group',
621 | num: 4,
622 | sub: [{
623 | type: 'group',
624 | num: 5,
625 | sub: [{
626 | type: 'exact',
627 | indices: [15, 16],
628 | raw: 'a',
629 | chars: 'a'
630 | }],
631 | indices: [14, 17],
632 | endParenIndex: 16,
633 | raw: '(a)'
634 | }, {
635 | type: 'exact',
636 | indices: [17, 18],
637 | raw: ',',
638 | chars: ','
639 | }, {
640 | type: 'group',
641 | num: 6,
642 | sub: [{
643 | type: 'exact',
644 | indices: [19, 20],
645 | raw: 'b',
646 | chars: 'b'
647 | }],
648 | indices: [18, 21],
649 | endParenIndex: 20,
650 | raw: '(b)'
651 | }, {
652 | type: 'exact',
653 | indices: [21, 22],
654 | raw: ',',
655 | chars: ','
656 | }, {
657 | type: 'group',
658 | num: 7,
659 | sub: [{
660 | type: 'exact',
661 | indices: [23, 25],
662 | raw: 'c,',
663 | chars: 'c,'
664 | }, {
665 | type: 'group',
666 | num: 8,
667 | sub: [{
668 | type: 'exact',
669 | indices: [26, 27],
670 | raw: 'd',
671 | chars: 'd'
672 | }],
673 | indices: [25, 28],
674 | endParenIndex: 27,
675 | raw: '(d)'
676 | }],
677 | indices: [22, 29],
678 | endParenIndex: 28,
679 | raw: '(c,(d))'
680 | }],
681 | indices: [13, 30],
682 | endParenIndex: 29,
683 | raw: '((a),(b),(c,(d)))'
684 | }],
685 | indices: [0, 31],
686 | endParenIndex: 30,
687 | raw: '(abc(def)+(a)((a),(b),(c,(d))))'
688 | }]
689 | }, {
690 | raw: '([a-z]+,[abc]444,[^a-b])+,(a(t)o(a[0-1]+b,(a[0-1]+)) )',
691 | groupCount: 5,
692 | tree: [{
693 | type: 'group',
694 | num: 1,
695 | sub: [{
696 | type: 'charset',
697 | indices: [1, 7],
698 | classes: [],
699 | ranges: ['az'],
700 | chars: '',
701 | repeat: {
702 | min: 1,
703 | max: Infinity,
704 | nonGreedy: false
705 | },
706 | raw: '[a-z]+'
707 | }, {
708 | type: 'exact',
709 | indices: [7, 8],
710 | raw: ',',
711 | chars: ','
712 | }, {
713 | type: 'charset',
714 | indices: [8, 13],
715 | classes: [],
716 | ranges: [],
717 | chars: 'abc',
718 | raw: '[abc]'
719 | }, {
720 | type: 'exact',
721 | indices: [13, 17],
722 | raw: '444,',
723 | chars: '444,'
724 | }, {
725 | type: 'charset',
726 | indices: [17, 23],
727 | classes: [],
728 | ranges: ['ab'],
729 | chars: '',
730 | exclude: true,
731 | raw: '[^a-b]'
732 | }],
733 | indices: [0, 25],
734 | endParenIndex: 23,
735 | repeat: {
736 | min: 1,
737 | max: Infinity,
738 | nonGreedy: false
739 | },
740 | raw: '([a-z]+,[abc]444,[^a-b])+'
741 | }, {
742 | type: 'exact',
743 | indices: [25, 26],
744 | raw: ',',
745 | chars: ','
746 | }, {
747 | type: 'group',
748 | num: 2,
749 | sub: [{
750 | type: 'exact',
751 | indices: [27, 28],
752 | raw: 'a',
753 | chars: 'a'
754 | }, {
755 | type: 'group',
756 | num: 3,
757 | sub: [{
758 | type: 'exact',
759 | indices: [29, 30],
760 | raw: 't',
761 | chars: 't'
762 | }],
763 | indices: [28, 31],
764 | endParenIndex: 30,
765 | raw: '(t)'
766 | }, {
767 | type: 'exact',
768 | indices: [31, 32],
769 | raw: 'o',
770 | chars: 'o'
771 | }, {
772 | type: 'group',
773 | num: 4,
774 | sub: [{
775 | type: 'exact',
776 | indices: [33, 34],
777 | raw: 'a',
778 | chars: 'a'
779 | }, {
780 | type: 'charset',
781 | indices: [34, 40],
782 | classes: [],
783 | ranges: ['01'],
784 | chars: '',
785 | repeat: {
786 | min: 1,
787 | max: Infinity,
788 | nonGreedy: false
789 | },
790 | raw: '[0-1]+'
791 | }, {
792 | type: 'exact',
793 | indices: [40, 42],
794 | raw: 'b,',
795 | chars: 'b,'
796 | }, {
797 | type: 'group',
798 | num: 5,
799 | sub: [{
800 | type: 'exact',
801 | indices: [43, 44],
802 | raw: 'a',
803 | chars: 'a'
804 | }, {
805 | type: 'charset',
806 | indices: [44, 50],
807 | classes: [],
808 | ranges: ['01'],
809 | chars: '',
810 | repeat: {
811 | min: 1,
812 | max: Infinity,
813 | nonGreedy: false
814 | },
815 | raw: '[0-1]+'
816 | }],
817 | indices: [42, 51],
818 | endParenIndex: 50,
819 | raw: '(a[0-1]+)'
820 | }],
821 | indices: [32, 52],
822 | endParenIndex: 51,
823 | raw: '(a[0-1]+b,(a[0-1]+))'
824 | }, {
825 | type: 'exact',
826 | indices: [52, 53],
827 | raw: ' ',
828 | chars: ' '
829 | }],
830 | indices: [26, 54],
831 | endParenIndex: 53,
832 | raw: '(a(t)o(a[0-1]+b,(a[0-1]+)) )'
833 | }]
834 | }, {
835 | raw: '[a-zA-z]+://[^\\s]*',
836 | groupCount: 0,
837 | tree: [{
838 | type: 'charset',
839 | indices: [0, 9],
840 | classes: [],
841 | ranges: ['Az', 'az'],
842 | chars: '',
843 | repeat: {
844 | min: 1,
845 | max: Infinity,
846 | nonGreedy: false
847 | },
848 | raw: '[a-zA-z]+'
849 | }, {
850 | type: 'exact',
851 | indices: [9, 12],
852 | raw: '://',
853 | chars: '://'
854 | }, {
855 | type: 'charset',
856 | indices: [12, 18],
857 | classes: ['s'],
858 | ranges: [],
859 | chars: '',
860 | exclude: true,
861 | repeat: {
862 | min: 0,
863 | max: Infinity,
864 | nonGreedy: false
865 | },
866 | raw: '[^\\s]*'
867 | }]
868 | }, {
869 | raw: '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)',
870 | groupCount: 3,
871 | tree: [{
872 | type: 'group',
873 | num: 1,
874 | sub: [{
875 | type: 'group',
876 | num: 2,
877 | sub: [{
878 | type: 'choice',
879 | indices: [2, 29],
880 | branches: [
881 | [{
882 | type: 'exact',
883 | indices: [2, 3],
884 | raw: '2',
885 | chars: '2'
886 | }, {
887 | type: 'charset',
888 | indices: [3, 8],
889 | classes: [],
890 | ranges: ['04'],
891 | chars: '',
892 | raw: '[0-4]'
893 | }, {
894 | type: 'charset',
895 | indices: [8, 10],
896 | chars: '',
897 | ranges: [],
898 | classes: ['d'],
899 | exclude: false,
900 | raw: '\\d'
901 | }],
902 | [{
903 | type: 'exact',
904 | indices: [11, 13],
905 | raw: '25',
906 | chars: '25'
907 | }, {
908 | type: 'charset',
909 | indices: [13, 18],
910 | classes: [],
911 | ranges: ['05'],
912 | chars: '',
913 | raw: '[0-5]'
914 | }],
915 | [{
916 | type: 'charset',
917 | indices: [19, 24],
918 | classes: [],
919 | ranges: [],
920 | chars: '01',
921 | repeat: {
922 | min: 0,
923 | max: 1,
924 | nonGreedy: false
925 | },
926 | raw: '[01]?'
927 | }, {
928 | type: 'charset',
929 | indices: [24, 26],
930 | chars: '',
931 | ranges: [],
932 | classes: ['d'],
933 | exclude: false,
934 | raw: '\\d'
935 | }, {
936 | type: 'charset',
937 | indices: [26, 29],
938 | chars: '',
939 | ranges: [],
940 | classes: ['d'],
941 | exclude: false,
942 | repeat: {
943 | min: 0,
944 | max: 1,
945 | nonGreedy: false
946 | },
947 | raw: '\\d?'
948 | }]
949 | ],
950 | raw: '2[0-4]\\d|25[0-5]|[01]?\\d\\d?'
951 | }],
952 | indices: [1, 30],
953 | endParenIndex: 29,
954 | raw: '(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)'
955 | }, {
956 | type: 'exact',
957 | chars: '.',
958 | indices: [30, 32],
959 | raw: '\\.'
960 | }],
961 | indices: [0, 36],
962 | endParenIndex: 32,
963 | repeat: {
964 | min: 3,
965 | max: 3,
966 | nonGreedy: false
967 | },
968 | raw: '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}'
969 | }, {
970 | type: 'group',
971 | num: 3,
972 | sub: [{
973 | type: 'choice',
974 | indices: [37, 64],
975 | branches: [
976 | [{
977 | type: 'exact',
978 | indices: [37, 38],
979 | raw: '2',
980 | chars: '2'
981 | }, {
982 | type: 'charset',
983 | indices: [38, 43],
984 | classes: [],
985 | ranges: ['04'],
986 | chars: '',
987 | raw: '[0-4]'
988 | }, {
989 | type: 'charset',
990 | indices: [43, 45],
991 | chars: '',
992 | ranges: [],
993 | classes: ['d'],
994 | exclude: false,
995 | raw: '\\d'
996 | }],
997 | [{
998 | type: 'exact',
999 | indices: [46, 48],
1000 | raw: '25',
1001 | chars: '25'
1002 | }, {
1003 | type: 'charset',
1004 | indices: [48, 53],
1005 | classes: [],
1006 | ranges: ['05'],
1007 | chars: '',
1008 | raw: '[0-5]'
1009 | }],
1010 | [{
1011 | type: 'charset',
1012 | indices: [54, 59],
1013 | classes: [],
1014 | ranges: [],
1015 | chars: '01',
1016 | repeat: {
1017 | min: 0,
1018 | max: 1,
1019 | nonGreedy: false
1020 | },
1021 | raw: '[01]?'
1022 | }, {
1023 | type: 'charset',
1024 | indices: [59, 61],
1025 | chars: '',
1026 | ranges: [],
1027 | classes: ['d'],
1028 | exclude: false,
1029 | raw: '\\d'
1030 | }, {
1031 | type: 'charset',
1032 | indices: [61, 64],
1033 | chars: '',
1034 | ranges: [],
1035 | classes: ['d'],
1036 | exclude: false,
1037 | repeat: {
1038 | min: 0,
1039 | max: 1,
1040 | nonGreedy: false
1041 | },
1042 | raw: '\\d?'
1043 | }]
1044 | ],
1045 | raw: '2[0-4]\\d|25[0-5]|[01]?\\d\\d?'
1046 | }],
1047 | indices: [36, 65],
1048 | endParenIndex: 64,
1049 | raw: '(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)'
1050 | }]
1051 | }, {
1052 | raw: '\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*',
1053 | groupCount: 3,
1054 | tree: [{
1055 | type: 'charset',
1056 | indices: [0, 3],
1057 | chars: '',
1058 | ranges: [],
1059 | classes: ['w'],
1060 | exclude: false,
1061 | repeat: {
1062 | min: 1,
1063 | max: Infinity,
1064 | nonGreedy: false
1065 | },
1066 | raw: '\\w+'
1067 | }, {
1068 | type: 'group',
1069 | num: 1,
1070 | sub: [{
1071 | type: 'charset',
1072 | indices: [4, 9],
1073 | classes: [],
1074 | ranges: [],
1075 | chars: '-+.',
1076 | raw: '[-+.]'
1077 | }, {
1078 | type: 'charset',
1079 | indices: [9, 12],
1080 | chars: '',
1081 | ranges: [],
1082 | classes: ['w'],
1083 | exclude: false,
1084 | repeat: {
1085 | min: 1,
1086 | max: Infinity,
1087 | nonGreedy: false
1088 | },
1089 | raw: '\\w+'
1090 | }],
1091 | indices: [3, 14],
1092 | endParenIndex: 12,
1093 | repeat: {
1094 | min: 0,
1095 | max: Infinity,
1096 | nonGreedy: false
1097 | },
1098 | raw: '([-+.]\\w+)*'
1099 | }, {
1100 | type: 'exact',
1101 | indices: [14, 15],
1102 | raw: '@',
1103 | chars: '@'
1104 | }, {
1105 | type: 'charset',
1106 | indices: [15, 18],
1107 | chars: '',
1108 | ranges: [],
1109 | classes: ['w'],
1110 | exclude: false,
1111 | repeat: {
1112 | min: 1,
1113 | max: Infinity,
1114 | nonGreedy: false
1115 | },
1116 | raw: '\\w+'
1117 | }, {
1118 | type: 'group',
1119 | num: 2,
1120 | sub: [{
1121 | type: 'charset',
1122 | indices: [19, 23],
1123 | classes: [],
1124 | ranges: [],
1125 | chars: '-.',
1126 | raw: '[-.]'
1127 | }, {
1128 | type: 'charset',
1129 | indices: [23, 26],
1130 | chars: '',
1131 | ranges: [],
1132 | classes: ['w'],
1133 | exclude: false,
1134 | repeat: {
1135 | min: 1,
1136 | max: Infinity,
1137 | nonGreedy: false
1138 | },
1139 | raw: '\\w+'
1140 | }],
1141 | indices: [18, 28],
1142 | endParenIndex: 26,
1143 | repeat: {
1144 | min: 0,
1145 | max: Infinity,
1146 | nonGreedy: false
1147 | },
1148 | raw: '([-.]\\w+)*'
1149 | }, {
1150 | type: 'exact',
1151 | chars: '.',
1152 | indices: [28, 30],
1153 | raw: '\\.'
1154 | }, {
1155 | type: 'charset',
1156 | indices: [30, 33],
1157 | chars: '',
1158 | ranges: [],
1159 | classes: ['w'],
1160 | exclude: false,
1161 | repeat: {
1162 | min: 1,
1163 | max: Infinity,
1164 | nonGreedy: false
1165 | },
1166 | raw: '\\w+'
1167 | }, {
1168 | type: 'group',
1169 | num: 3,
1170 | sub: [{
1171 | type: 'charset',
1172 | indices: [34, 38],
1173 | classes: [],
1174 | ranges: [],
1175 | chars: '-.',
1176 | raw: '[-.]'
1177 | }, {
1178 | type: 'charset',
1179 | indices: [38, 41],
1180 | chars: '',
1181 | ranges: [],
1182 | classes: ['w'],
1183 | exclude: false,
1184 | repeat: {
1185 | min: 1,
1186 | max: Infinity,
1187 | nonGreedy: false
1188 | },
1189 | raw: '\\w+'
1190 | }],
1191 | indices: [33, 43],
1192 | endParenIndex: 41,
1193 | repeat: {
1194 | min: 0,
1195 | max: Infinity,
1196 | nonGreedy: false
1197 | },
1198 | raw: '([-.]\\w+)*'
1199 | }]
1200 | }, {
1201 | raw: 'a{1,2}{}',
1202 | groupCount: 0,
1203 | tree: [{
1204 | type: 'exact',
1205 | repeat: {
1206 | min: 1,
1207 | max: 2,
1208 | nonGreedy: false
1209 | },
1210 | chars: 'a',
1211 | indices: [0, 6],
1212 | raw: 'a{1,2}'
1213 | }, {
1214 | type: 'exact',
1215 | indices: [6, 8],
1216 | raw: '{}',
1217 | chars: '{}'
1218 | }]
1219 | }, {
1220 | raw: 'a{1,2}{1,2,4}',
1221 | groupCount: 0,
1222 | tree: [{
1223 | type: 'exact',
1224 | repeat: {
1225 | min: 1,
1226 | max: 2,
1227 | nonGreedy: false
1228 | },
1229 | chars: 'a',
1230 | indices: [0, 6],
1231 | raw: 'a{1,2}'
1232 | }, {
1233 | type: 'exact',
1234 | indices: [6, 13],
1235 | raw: '{1,2,4}',
1236 | chars: '{1,2,4}'
1237 | }]
1238 | }, {
1239 | raw: 'a{1,2}{{4}',
1240 | groupCount: 0,
1241 | tree: [{
1242 | type: 'exact',
1243 | repeat: {
1244 | min: 1,
1245 | max: 2,
1246 | nonGreedy: false
1247 | },
1248 | chars: 'a',
1249 | indices: [0, 6],
1250 | raw: 'a{1,2}'
1251 | }, {
1252 | type: 'exact',
1253 | repeat: {
1254 | min: 4,
1255 | max: 4,
1256 | nonGreedy: false
1257 | },
1258 | chars: '{',
1259 | indices: [6, 10],
1260 | raw: '{{4}'
1261 | }]
1262 | }, {
1263 | raw: 'a+{1,{4}',
1264 | groupCount: 0,
1265 | tree: [{
1266 | type: 'exact',
1267 | repeat: {
1268 | min: 1,
1269 | max: Infinity,
1270 | nonGreedy: false
1271 | },
1272 | chars: 'a',
1273 | indices: [0, 2],
1274 | raw: 'a+'
1275 | }, {
1276 | type: 'exact',
1277 | indices: [2, 4],
1278 | raw: '{1',
1279 | chars: '{1'
1280 | }, {
1281 | type: 'exact',
1282 | repeat: {
1283 | min: 4,
1284 | max: 4,
1285 | nonGreedy: false
1286 | },
1287 | chars: ',',
1288 | indices: [4, 8],
1289 | raw: ',{4}'
1290 | }]
1291 | }, {
1292 | raw: '<(.*)(.*)>.*<\\/\\1>|<(.*) \\/>',
1293 | groupCount: 3,
1294 | tree: [{
1295 | type: 'choice',
1296 | indices: [0, 28],
1297 | branches: [
1298 | [{
1299 | type: 'exact',
1300 | indices: [0, 1],
1301 | raw: '<',
1302 | chars: '<'
1303 | }, {
1304 | type: 'group',
1305 | num: 1,
1306 | sub: [{
1307 | type: 'dot',
1308 | indices: [2, 4],
1309 | repeat: {
1310 | min: 0,
1311 | max: Infinity,
1312 | nonGreedy: false
1313 | },
1314 | raw: '.*'
1315 | }],
1316 | indices: [1, 5],
1317 | endParenIndex: 4,
1318 | raw: '(.*)'
1319 | }, {
1320 | type: 'group',
1321 | num: 2,
1322 | sub: [{
1323 | type: 'dot',
1324 | indices: [6, 8],
1325 | repeat: {
1326 | min: 0,
1327 | max: Infinity,
1328 | nonGreedy: false
1329 | },
1330 | raw: '.*'
1331 | }],
1332 | indices: [5, 9],
1333 | endParenIndex: 8,
1334 | raw: '(.*)'
1335 | }, {
1336 | type: 'exact',
1337 | indices: [9, 10],
1338 | raw: '>',
1339 | chars: '>'
1340 | }, {
1341 | type: 'dot',
1342 | indices: [10, 12],
1343 | repeat: {
1344 | min: 0,
1345 | max: Infinity,
1346 | nonGreedy: false
1347 | },
1348 | raw: '.*'
1349 | }, {
1350 | type: 'exact',
1351 | indices: [12, 13],
1352 | raw: '<',
1353 | chars: '<'
1354 | }, {
1355 | type: 'exact',
1356 | chars: '/',
1357 | indices: [13, 15],
1358 | raw: '\\/'
1359 | }, {
1360 | type: 'backref',
1361 | indices: [15, 17],
1362 | num: 1,
1363 | raw: '\\1'
1364 | }, {
1365 | type: 'exact',
1366 | indices: [17, 18],
1367 | raw: '>',
1368 | chars: '>'
1369 | }],
1370 | [{
1371 | type: 'exact',
1372 | indices: [19, 20],
1373 | raw: '<',
1374 | chars: '<'
1375 | }, {
1376 | type: 'group',
1377 | num: 3,
1378 | sub: [{
1379 | type: 'dot',
1380 | indices: [21, 23],
1381 | repeat: {
1382 | min: 0,
1383 | max: Infinity,
1384 | nonGreedy: false
1385 | },
1386 | raw: '.*'
1387 | }],
1388 | indices: [20, 24],
1389 | endParenIndex: 23,
1390 | raw: '(.*)'
1391 | }, {
1392 | type: 'exact',
1393 | indices: [24, 25],
1394 | raw: ' ',
1395 | chars: ' '
1396 | }, {
1397 | type: 'exact',
1398 | chars: '/',
1399 | indices: [25, 27],
1400 | raw: '\\/'
1401 | }, {
1402 | type: 'exact',
1403 | indices: [27, 28],
1404 | raw: '>',
1405 | chars: '>'
1406 | }]
1407 | ],
1408 | raw: '<(.*)(.*)>.*<\\/\\1>|<(.*) \\/>'
1409 | }]
1410 | }, {
1411 | raw: '(?=^.{8,}$)(?=.*\\d)(?=.*\\W+)(?=.*[A-Z])(?=.*[a-z])(?!.*\\n).*$',
1412 | groupCount: 0,
1413 | tree: [{
1414 | type: 'assert',
1415 | num: undefined,
1416 | sub: [{
1417 | type: 'assert',
1418 | indices: [3, 4],
1419 | assertionType: 'AssertBegin',
1420 | raw: '^'
1421 | }, {
1422 | type: 'dot',
1423 | indices: [4, 9],
1424 | repeat: {
1425 | min: 8,
1426 | max: Infinity,
1427 | nonGreedy: false
1428 | },
1429 | raw: '.{8,}'
1430 | }, {
1431 | type: 'assert',
1432 | indices: [9, 10],
1433 | assertionType: 'AssertEnd',
1434 | raw: '$'
1435 | }],
1436 | indices: [0, 11],
1437 | assertionType: 'AssertLookahead',
1438 | endParenIndex: 10,
1439 | raw: '(?=^.{8,}$)'
1440 | }, {
1441 | type: 'assert',
1442 | num: undefined,
1443 | sub: [{
1444 | type: 'dot',
1445 | indices: [14, 16],
1446 | repeat: {
1447 | min: 0,
1448 | max: Infinity,
1449 | nonGreedy: false
1450 | },
1451 | raw: '.*'
1452 | }, {
1453 | type: 'charset',
1454 | indices: [16, 18],
1455 | chars: '',
1456 | ranges: [],
1457 | classes: ['d'],
1458 | exclude: false,
1459 | raw: '\\d'
1460 | }],
1461 | indices: [11, 19],
1462 | assertionType: 'AssertLookahead',
1463 | endParenIndex: 18,
1464 | raw: '(?=.*\\d)'
1465 | }, {
1466 | type: 'assert',
1467 | num: undefined,
1468 | sub: [{
1469 | type: 'dot',
1470 | indices: [22, 24],
1471 | repeat: {
1472 | min: 0,
1473 | max: Infinity,
1474 | nonGreedy: false
1475 | },
1476 | raw: '.*'
1477 | }, {
1478 | type: 'charset',
1479 | indices: [24, 27],
1480 | chars: '',
1481 | ranges: [],
1482 | classes: ['W'],
1483 | exclude: false,
1484 | repeat: {
1485 | min: 1,
1486 | max: Infinity,
1487 | nonGreedy: false
1488 | },
1489 | raw: '\\W+'
1490 | }],
1491 | indices: [19, 28],
1492 | assertionType: 'AssertLookahead',
1493 | endParenIndex: 27,
1494 | raw: '(?=.*\\W+)'
1495 | }, {
1496 | type: 'assert',
1497 | num: undefined,
1498 | sub: [{
1499 | type: 'dot',
1500 | indices: [31, 33],
1501 | repeat: {
1502 | min: 0,
1503 | max: Infinity,
1504 | nonGreedy: false
1505 | },
1506 | raw: '.*'
1507 | }, {
1508 | type: 'charset',
1509 | indices: [33, 38],
1510 | classes: [],
1511 | ranges: ['AZ'],
1512 | chars: '',
1513 | raw: '[A-Z]'
1514 | }],
1515 | indices: [28, 39],
1516 | assertionType: 'AssertLookahead',
1517 | endParenIndex: 38,
1518 | raw: '(?=.*[A-Z])'
1519 | }, {
1520 | type: 'assert',
1521 | num: undefined,
1522 | sub: [{
1523 | type: 'dot',
1524 | indices: [42, 44],
1525 | repeat: {
1526 | min: 0,
1527 | max: Infinity,
1528 | nonGreedy: false
1529 | },
1530 | raw: '.*'
1531 | }, {
1532 | type: 'charset',
1533 | indices: [44, 49],
1534 | classes: [],
1535 | ranges: ['az'],
1536 | chars: '',
1537 | raw: '[a-z]'
1538 | }],
1539 | indices: [39, 50],
1540 | assertionType: 'AssertLookahead',
1541 | endParenIndex: 49,
1542 | raw: '(?=.*[a-z])'
1543 | }, {
1544 | type: 'assert',
1545 | num: undefined,
1546 | sub: [{
1547 | type: 'dot',
1548 | indices: [53, 55],
1549 | repeat: {
1550 | min: 0,
1551 | max: Infinity,
1552 | nonGreedy: false
1553 | },
1554 | raw: '.*'
1555 | }, {
1556 | type: 'exact',
1557 | chars: '\n',
1558 | indices: [55, 57],
1559 | raw: '\\n'
1560 | }],
1561 | indices: [50, 58],
1562 | assertionType: 'AssertNegativeLookahead',
1563 | endParenIndex: 57,
1564 | raw: '(?!.*\\n)'
1565 | }, {
1566 | type: 'dot',
1567 | indices: [58, 60],
1568 | repeat: {
1569 | min: 0,
1570 | max: Infinity,
1571 | nonGreedy: false
1572 | },
1573 | raw: '.*'
1574 | }, {
1575 | type: 'assert',
1576 | indices: [60, 61],
1577 | assertionType: 'AssertEnd',
1578 | raw: '$'
1579 | }]
1580 | }, {
1581 | raw: '(\\d{4}|\\d{2})-((1[0-2])|(0?[1-9]))-(([12][0-9])|(3[01])|(0?[1-9]))',
1582 | groupCount: 8,
1583 | tree: [{
1584 | type: 'group',
1585 | num: 1,
1586 | sub: [{
1587 | type: 'choice',
1588 | indices: [1, 12],
1589 | branches: [
1590 | [{
1591 | type: 'charset',
1592 | indices: [1, 6],
1593 | chars: '',
1594 | ranges: [],
1595 | classes: ['d'],
1596 | exclude: false,
1597 | repeat: {
1598 | min: 4,
1599 | max: 4,
1600 | nonGreedy: false
1601 | },
1602 | raw: '\\d{4}'
1603 | }],
1604 | [{
1605 | type: 'charset',
1606 | indices: [7, 12],
1607 | chars: '',
1608 | ranges: [],
1609 | classes: ['d'],
1610 | exclude: false,
1611 | repeat: {
1612 | min: 2,
1613 | max: 2,
1614 | nonGreedy: false
1615 | },
1616 | raw: '\\d{2}'
1617 | }]
1618 | ],
1619 | raw: '\\d{4}|\\d{2}'
1620 | }],
1621 | indices: [0, 13],
1622 | endParenIndex: 12,
1623 | raw: '(\\d{4}|\\d{2})'
1624 | }, {
1625 | type: 'exact',
1626 | indices: [13, 14],
1627 | raw: '-',
1628 | chars: '-'
1629 | }, {
1630 | type: 'group',
1631 | num: 2,
1632 | sub: [{
1633 | type: 'choice',
1634 | indices: [15, 33],
1635 | branches: [
1636 | [{
1637 | type: 'group',
1638 | num: 3,
1639 | sub: [{
1640 | type: 'exact',
1641 | indices: [16, 17],
1642 | raw: '1',
1643 | chars: '1'
1644 | }, {
1645 | type: 'charset',
1646 | indices: [17, 22],
1647 | classes: [],
1648 | ranges: ['02'],
1649 | chars: '',
1650 | raw: '[0-2]'
1651 | }],
1652 | indices: [15, 23],
1653 | endParenIndex: 22,
1654 | raw: '(1[0-2])'
1655 | }],
1656 | [{
1657 | type: 'group',
1658 | num: 4,
1659 | sub: [{
1660 | type: 'exact',
1661 | repeat: {
1662 | min: 0,
1663 | max: 1,
1664 | nonGreedy: false
1665 | },
1666 | chars: '0',
1667 | indices: [25, 27],
1668 | raw: '0?'
1669 | }, {
1670 | type: 'charset',
1671 | indices: [27, 32],
1672 | classes: [],
1673 | ranges: ['19'],
1674 | chars: '',
1675 | raw: '[1-9]'
1676 | }],
1677 | indices: [24, 33],
1678 | endParenIndex: 32,
1679 | raw: '(0?[1-9])'
1680 | }]
1681 | ],
1682 | raw: '(1[0-2])|(0?[1-9])'
1683 | }],
1684 | indices: [14, 34],
1685 | endParenIndex: 33,
1686 | raw: '((1[0-2])|(0?[1-9]))'
1687 | }, {
1688 | type: 'exact',
1689 | indices: [34, 35],
1690 | raw: '-',
1691 | chars: '-'
1692 | }, {
1693 | type: 'group',
1694 | num: 5,
1695 | sub: [{
1696 | type: 'choice',
1697 | indices: [36, 65],
1698 | branches: [
1699 | [{
1700 | type: 'group',
1701 | num: 6,
1702 | sub: [{
1703 | type: 'charset',
1704 | indices: [37, 41],
1705 | classes: [],
1706 | ranges: [],
1707 | chars: '12',
1708 | raw: '[12]'
1709 | }, {
1710 | type: 'charset',
1711 | indices: [41, 46],
1712 | classes: [],
1713 | ranges: ['09'],
1714 | chars: '',
1715 | raw: '[0-9]'
1716 | }],
1717 | indices: [36, 47],
1718 | endParenIndex: 46,
1719 | raw: '([12][0-9])'
1720 | }],
1721 | [{
1722 | type: 'group',
1723 | num: 7,
1724 | sub: [{
1725 | type: 'exact',
1726 | indices: [49, 50],
1727 | raw: '3',
1728 | chars: '3'
1729 | }, {
1730 | type: 'charset',
1731 | indices: [50, 54],
1732 | classes: [],
1733 | ranges: [],
1734 | chars: '01',
1735 | raw: '[01]'
1736 | }],
1737 | indices: [48, 55],
1738 | endParenIndex: 54,
1739 | raw: '(3[01])'
1740 | }],
1741 | [{
1742 | type: 'group',
1743 | num: 8,
1744 | sub: [{
1745 | type: 'exact',
1746 | repeat: {
1747 | min: 0,
1748 | max: 1,
1749 | nonGreedy: false
1750 | },
1751 | chars: '0',
1752 | indices: [57, 59],
1753 | raw: '0?'
1754 | }, {
1755 | type: 'charset',
1756 | indices: [59, 64],
1757 | classes: [],
1758 | ranges: ['19'],
1759 | chars: '',
1760 | raw: '[1-9]'
1761 | }],
1762 | indices: [56, 65],
1763 | endParenIndex: 64,
1764 | raw: '(0?[1-9])'
1765 | }]
1766 | ],
1767 | raw: '([12][0-9])|(3[01])|(0?[1-9])'
1768 | }],
1769 | indices: [35, 66],
1770 | endParenIndex: 65,
1771 | raw: '(([12][0-9])|(3[01])|(0?[1-9]))'
1772 | }]
1773 | }, {
1774 | raw: '[\\u4e00-\\u9fa5]',
1775 | groupCount: 0,
1776 | tree: [{
1777 | type: 'charset',
1778 | indices: [0, 15],
1779 | classes: [],
1780 | ranges: ['\u4e00\u9fa5'],
1781 | chars: '',
1782 | raw: '[\\u4e00-\\u9fa5]'
1783 | }]
1784 | }];
1785 |
1786 |
1787 |
1788 |
1789 | return {
1790 | expectedFail: expectedFail,
1791 | expectedPass: expectedPass,
1792 | re2ast: re2ast,
1793 | reMatchCases:reMatchCases
1794 | };
1795 |
1796 |
1797 | });
1798 |
--------------------------------------------------------------------------------