├── .gitignore ├── tests ├── runtests ├── RegExpTest.js ├── NFATest.js ├── parseTest.js ├── KitTest.js └── testData.js ├── README.md ├── LICENSE ├── src ├── NFA.js ├── Kit.js ├── RegExp.js ├── visualize.js └── parse.js └── index.html /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /tests/runtests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in *.js;do 4 | node "$i"; 5 | done; 6 | -------------------------------------------------------------------------------- /tests/RegExpTest.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['../src/Kit','../src/RegExp','./testData','assert'],function (K,MyRegExp,testData,assert) { 3 | var reMatchCases=testData.reMatchCases; 4 | 5 | 6 | reMatchCases.forEach(function (c) { 7 | var re=c[0],strings=typeof c[1]==='string'?[c[1]]:c[1]; 8 | var myRe=new MyRegExp(re.source,re); 9 | strings.forEach(function (s) { 10 | var result=re.exec(s),myResult=myRe.exec(s); 11 | try { 12 | assert.deepEqual(myResult,result,re); 13 | } catch(e) { 14 | re.debug=true; 15 | myRe=new MyRegExp(re.source,re); 16 | myResult=myRe.exec(s); 17 | K.log(re,myResult,result); 18 | throw e; 19 | } 20 | }); 21 | 22 | }); 23 | 24 | console.log('RegExp Test OK'); 25 | 26 | }); 27 | 28 | -------------------------------------------------------------------------------- /tests/NFATest.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['../src/Kit','../src/NFA','assert'],function (K,NFA,assert) { 3 | 4 | testNFA(); 5 | 6 | console.log('NFA Test OK'); 7 | function testNFA() { 8 | var a=NFA({ 9 | compact:true,accepts:'start', 10 | trans:[ 11 | ['start>start','0369'],['start>q1','147'],['start>q2','258'], 12 | ['q1>q1','0369'],['q1>q2','147'],['q1>start','258'], 13 | ['q2>q2','0369'],['q2>q1','258'],['q2>start','147'], 14 | ] 15 | }); 16 | var result,i,n; 17 | ['','0','00','000','012','03','3','6','9','12'].forEach(function (n) { 18 | assert.ok(a.input(n).acceptable); 19 | }); 20 | 21 | i=500;nums=[]; 22 | while (i--) { 23 | n=Math.ceil(Math.random()*1E15)*3 ; 24 | n=K.repeats(n+"",10); 25 | assert.ok(a.input(n).acceptable,n); 26 | assert.ifError(a.input(n+1).acceptable); 27 | assert.ifError(a.input(n+2).acceptable); 28 | } 29 | 30 | } 31 | 32 | 33 | 34 | 35 | }); 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Regulex 2 | ======= 3 | 4 | JavaScript Regular Expression Parser & Visualizer. 5 | 6 | Visualizer : http://jex.im/regulex/ 7 | 8 | ###Features: 9 | - Written in pure JavaScript. No backend needed. 10 | - You can embed the graph in you own site through html iframe element. 11 | - Detailed error message. In most cases it can point out the precise syntax error position. 12 | - No support for octal escape. Yes it is a feature. ECMAScript strict mode doesn't support octal escape in string,but many browsers still support octal escape in regex. I make things easier. In regulex, DecimalEscape will always be treated as back reference. If the back reference is invalid, e.g. `/\1/`、`/(\1)/`、`/(a)\2/`,or DecimalEscape appears in charset(because in this case it can't be explained as back reference, e.g. `/(ab)[\1]/`). Regulex will always throw an error. 13 | 14 | 15 | 16 | 17 | API: 18 | ```javascript 19 | var parse = require('regulex/parse'); 20 | var re = /var\s+([a-zA-Z_]\w*);/ ; 21 | console.log(parse(re)); 22 | ``` 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Jex 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /tests/parseTest.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['../src/parse','../src/Kit','./testData','assert'],function (parse,K,testData,assert) { 3 | var expectedPass=testData.expectedPass; 4 | var expectedFail=testData.expectedFail; 5 | var re2ast=testData.re2ast; 6 | 7 | parse.getNFAParser().assertDFA(); 8 | 9 | testSyntax(); 10 | testAST(); 11 | console.log('Parse Test OK'); 12 | 13 | function testAST() { 14 | re2ast.forEach(function (ast) { 15 | try { 16 | assert.deepEqual(parse(ast.raw),ast); 17 | } catch(e) { 18 | if (e instanceof assert.AssertionError) { 19 | K.log(parse(ast.raw)); 20 | K.log(ast); 21 | } 22 | throw e; 23 | } 24 | }) 25 | return; 26 | } 27 | 28 | function testSyntax() { 29 | expectedPass.forEach(function (v) { 30 | var ast; 31 | try { 32 | ast=parse(v); 33 | } catch(e) { 34 | if (e instanceof parse.RegexSyntaxError) { 35 | console.log(e.message); 36 | console.log(v); 37 | console.log(K.repeats(" ",e.lastIndex)+"^"); 38 | K.log(e); 39 | parse(v,true); 40 | } else { 41 | K.log(v); 42 | parse(v,true); 43 | } 44 | throw e; 45 | } 46 | }); 47 | 48 | expectedFail.forEach(function (v) { 49 | var ast; 50 | try { 51 | ast=parse(v); 52 | console.error("Expected to fail but passed!"); 53 | K.log(v); 54 | ast=parse(v,true); 55 | K.log(ast); 56 | } catch (e) { 57 | if (e instanceof parse.RegexSyntaxError) { 58 | return true; 59 | } 60 | throw e; 61 | } 62 | }); 63 | } 64 | 65 | 66 | 67 | 68 | 69 | }); 70 | 71 | -------------------------------------------------------------------------------- /tests/KitTest.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['../src/Kit','assert'],function (K,assert) { 3 | 4 | testCoalesce(); 5 | testIdUnique(); 6 | testKSet(); 7 | testClassify(); 8 | testParseCharset(); 9 | testHashUnique(); 10 | 11 | console.log('Kit Test OK'); 12 | 13 | function testCoalesce() { 14 | var ranges,results; 15 | 16 | ranges=K.classify(['az','AZ','09']).ranges; 17 | results=K.coalesce(ranges); 18 | assert.deepEqual(results,ranges); 19 | 20 | ranges=K.classify(['az','ez','z','a']).ranges; 21 | results=K.coalesce(ranges); 22 | assert.deepEqual(results,['az']); 23 | 24 | ranges=K.classify(['Aa','AZ','az']).ranges; 25 | results=K.coalesce(ranges); 26 | assert.deepEqual(results,['Az']); 27 | 28 | ranges=K.classify(K.negate(['Aa','az'])).ranges; 29 | results=K.coalesce(ranges); 30 | assert.deepEqual(results,K.negate(['Az'])); 31 | } 32 | 33 | function testIdUnique() { 34 | var a=[console,testIdUnique,testKSet,testClassify,testParseCharset,this]; 35 | var b=K.idUnique(a.concat(a)); 36 | assert.ok(b.length===a.length); 37 | } 38 | 39 | function testHashUnique() { 40 | var a=[],i=100,min=K.ord('A'),max=K.ord('Z'),c,hash={}; 41 | while (i--) { 42 | c=K.chr(Math.random()*(max-min)+min); 43 | a.push(c); 44 | hash[c]=1; 45 | } 46 | var expected=Object.keys(hash);//what? really? 47 | assert.deepEqual(K.hashUnique(a),expected); 48 | } 49 | 50 | function testKSet() { 51 | var n=200; 52 | for (var i=0;i','@Z',']','_z','}\uffff' ]; 184 | assert.deepEqual(ranges,expected); 185 | 186 | c='^acdf'; 187 | ranges=parseCharset(c); 188 | expected=[ '\u0000`', 'b','e', 'g\uffff' ]; 189 | assert.deepEqual(ranges,expected); 190 | } 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | }); 199 | -------------------------------------------------------------------------------- /src/NFA.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['./Kit'],function (K) { 3 | 4 | /** 5 | A Naive NFA Implementation 6 | 7 | Start state is always named 'start' 8 | @param {NFAConfig|CompactNFAConfig} a 9 | type NFAConfig = {compact:false,accepts:StateSet,trans:[Transition]} 10 | type State = String 11 | type StateSet = [State] 12 | type Tranisition = {from:StateSet,to:StateSet,charset:Charset,action:Action,assert:Assert} 13 | type Charset = String|[Range] 14 | Charset is similar to regex charset,supports negation and range but metacharacters 15 | Examples: 16 | includes: 'abc0-9','[^]' 17 | excludes: '^c-z0-9','^a^' //excluded 'a' and '^' two chars 18 | any char: '\0-\uffff' 19 | Or set charset to processed disjoint ranges:['ac','d','eh'] 20 | Set `charset` to empty string to enable empty move(ε-moves). 21 | 22 | Action: 23 | Function(stack:Array,c:String,i:Int,state:String,inputs:String):Array 24 | stack: storage stack 25 | c: current char 26 | i: current index 27 | state: current state 28 | inputs: whole input string 29 | Optional return new stack 30 | 31 | Only eMove transition allow `assert` 32 | Actions and Asserts of eMove transition always execute before non-eMove transitions on current path. 33 | Assert: 34 | Function(stack:Array,c:String,i:Int,state:String,inputs:String):Boolean 35 | Return True if assertion just success,if fail return false 36 | If success and need skip num chars, 37 | return the Int count to increase `i`,this feature is designed for backref. 38 | 39 | Stack modifications in action only allow shift,unshift and return new stack. 40 | 41 | NFAConfig example used to recognize numbers:{ 42 | compact:false,accepts:'start'. 43 | trans:[{from:'start',to:'start',charset:'0-9'}] 44 | } 45 | 46 | CompactNFAConfig example,see `structure` function. 47 | An automaton used to recognize triples:{ 48 | compact:true,accepts:'start', 49 | trans:[ 50 | ['start>start','0369'],['start>q1','147'],['start>q2','258'], 51 | ['q1>q1','0369'],['q1>q2','147'],['q1>start','258'], 52 | ['q2>q2','0369'],['q2>q1','258'],['q2>start','147'], 53 | ] 54 | }; 55 | 56 | @return { 57 | input:Function 58 | } 59 | */ 60 | function NFA(a,_debug) { 61 | this._debug=_debug; 62 | a=a.compact?structure(a):a; 63 | var accepts={},i,trans=a.trans, 64 | // FMap={toState:Function} 65 | router={/* 66 | fromState : { 67 | eMove:[{to:State,action:Function,assert:Function,eMove:Bool}], 68 | eMoveStates:[State],// ε-move dest states 69 | charMove:{ 70 | // expanded to include eMove 71 | Range:[{to:State,action:Function,assert:Function,eMove:Bool}], 72 | Char:[{to:State,action:Function,assert:Function,eMove:Bool}] 73 | }, 74 | ranges:Set([Range]), 75 | // all trans keep original order in transitions list 76 | trans:[Transition] 77 | } 78 | */}; 79 | 80 | for (i=0,n=a.accepts.length;i1) { 149 | throw new Error("DFA Assertion Fail!\nFrom state `"+fromStates[i]+"` can goto to multi ε-move states!"); 150 | } 151 | var charMove=path.charMove; 152 | var ranges=Object.keys(charMove); 153 | for (var k=0,n=ranges.length;k"+t.to); 214 | if (j===n-1) { 215 | startIndex+=advanceIndex; 216 | fromState=t.to; 217 | continue recur; // Human flesh tail call optimize? 218 | } else { 219 | ret=_input(s,startIndex+advanceIndex,t.to,stack,lastIndex); 220 | } 221 | if (ret.acceptable) return ret; 222 | lastResult=ret; 223 | } 224 | if (lastResult) return lastResult; 225 | break; 226 | } while (true); 227 | 228 | return { 229 | stack:stack,lastIndex:lastIndex,lastState:fromState, 230 | acceptable:_this.accept(fromState) 231 | }; 232 | } 233 | } 234 | 235 | 236 | 237 | /** ε-closure 238 | return closureMap {fromState:[toState]} 239 | eMoveMap = {fromState:{to:[State]}} 240 | */ 241 | function eClosure(eMoves,eMoveMap) { 242 | var closureMap={}; 243 | eMoves.forEach(function (state) { // FK forEach pass extra args 244 | closure(state); 245 | }); 246 | return closureMap; 247 | 248 | function closure(state,_chain) { 249 | if (closureMap.hasOwnProperty(state)) return closureMap[state]; 250 | if (!eMoveMap.hasOwnProperty(state)) return false; 251 | _chain=_chain||[state]; 252 | var dest=eMoveMap[state], 253 | queue=dest.to.slice(), 254 | toStates=[state],s,clos; 255 | while (queue.length) { 256 | s=queue.shift(); 257 | if (~_chain.indexOf(s)) { 258 | throw new Error("Recursive ε-move:"+_chain.join(">")+">"+s+"!"); 259 | } 260 | clos=closure(s,_chain); 261 | if (clos) queue=clos.slice(1).concat(queue); 262 | toStates.push(s); 263 | } 264 | return closureMap[state]=toStates; 265 | } 266 | } 267 | 268 | 269 | function findRange(ranges,c/*:Char*/) { 270 | var i=ranges.indexOf(c,cmpRange); 271 | if (!~i) return false; 272 | return ranges[i]; 273 | } 274 | 275 | function cmpRange(c,rg) { 276 | var head=rg[0],tail=rg[1]; 277 | if (c>tail) return 1; 278 | if (c"+ToStateSet.join(",") 289 | */ 290 | function structure(a) { 291 | a.accepts=a.accepts.split(','); 292 | var ts=a.trans, 293 | i=ts.length,t,s,from,to; 294 | while (i--) { 295 | t=ts[i]; 296 | s=t[0].split('>'); 297 | from=s[0].split(','); 298 | to=s[1].split(','); 299 | ts[i]={from:from,to:to,charset:t[1],action:t[2],assert:t[3]}; 300 | } 301 | a.compact=false; 302 | return a; 303 | } 304 | 305 | 306 | return NFA; 307 | 308 | 309 | }); 310 | -------------------------------------------------------------------------------- /src/Kit.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(function () { 3 | /*Kit*/ 4 | 5 | var AP=Array.prototype, 6 | slice=AP.slice, 7 | isBrowser=(function () { 8 | return this.toString()==="[object Window]"; 9 | })(); 10 | 11 | 12 | /** 13 | Build sorted Set from array. 14 | This function will corrupt the original array 15 | Proper usage:a=Set(a); 16 | @param {ArrayLike} a 17 | @return {Set} return new ArrayLike Set 18 | */ 19 | function Set(a,_sorted) { 20 | if (a._Set) return a; 21 | if (!_sorted) a=sortUnique(a); 22 | 23 | //@returns Boolean. Detect if x is in set. 24 | //`cmp` is custom compare functions return -1,0,1. 25 | // function cmp(x,item):Ordering(LT=-1|EQ=0|GT=1); 26 | a.contains=function (x,cmp) {return !!~bsearch(a,x,cmp)}; 27 | a.indexOf=function (x,cmp) {return bsearch(a,x,cmp)}; 28 | a.toArray=function () {return copyArray(a);}; 29 | 30 | /** Union with another Set 31 | @param {Set|Array} b If b is an array,it will be corrupted by sortUnqiue 32 | @return {Set} return new Set */ 33 | a.union=function (b) { 34 | b=Set(b); 35 | var n=a.length+b.length,c=new a.constructor(n); 36 | for (var i=0,j=0,k=0;k>1); 61 | c=cmp(x,a[pivot]); 62 | if (c===EQ) return pivot; 63 | if (c===LT) hi=pivot-1; 64 | else lo=pivot+1; 65 | } while (lo<=hi); 66 | return -1; 67 | } 68 | 69 | /** 70 | Return sorted Set. 71 | This function will corrupt the original array 72 | Proper usage: a=sortUnique(a); 73 | @param {ArrayLike} a 74 | @return {ArrayLike} new unique sorted array 75 | */ 76 | function sortUnique(a) { 77 | var n=a.length; 78 | if (n<=1) return a; 79 | //do a shell sort 80 | var k=1,hi=n/3|0,i,j,tmp; 81 | while (k < hi) k=k*3+1; 82 | while (k > 0) { 83 | for (i=k;i=k && a[j] { 156 | ranges:['a','b','cz','09'], 157 | map:{'az':['a','b','cz'],'09':['09'],'a':['a'],'b':['b']} 158 | } 159 | */ 160 | function classify(ranges) { 161 | ranges=ranges.map(function (c) {return (!c[1])?c+c:c;}); 162 | var i,j,k,l,r,n; 163 | ranges=sortUnique(ranges); n=ranges.length; 164 | var singleMap={},headMap={},tailMap={},head,tail; 165 | for (i=0;i=tail) { 170 | if (head===tail) singleMap[tail]=true; 171 | break; 172 | } 173 | } 174 | } 175 | var chars=sortUnique(ranges.join('').split('')), 176 | results=Object.keys(singleMap), 177 | c=chars[0],tmpMap={},map={}; 178 | for (i=0;ic) break; 184 | } 185 | } 186 | for (i=0,k=0,l=chars.length-1;itail) break; 195 | if (r[0]<=head && tail<=r[1]) tmpMap[r].push(c),results.push(c); 196 | } 197 | } 198 | head=chars[i]; tail=chars[i+1]; //keep insert order,push single char later 199 | if (singleMap.hasOwnProperty(tail)) { 200 | for (j=k;jtail) break; 204 | if (r[0]<=tail && tail<=r[1]) tmpMap[r].push(tail); 205 | } 206 | } 207 | } 208 | results=sortUnique(results); 209 | for (k in tmpMap) map[k[0]===k[1]?k[0]:k]=tmpMap[k]; 210 | return {ranges:results,map:map}; 211 | } 212 | 213 | 214 | //@deprecated 215 | function ____classify(ranges) { 216 | var stack=[],map={}, 217 | chars=sortUnique(ranges.join('').split('')); 218 | chars.reduce(function (prev,c) { 219 | var head,tail,choosed=[]; 220 | ranges=ranges.filter(function (rg) {//side affects filter 221 | var start=rg[0],end=rg[1] || start; 222 | head = head || start==c; 223 | tail = tail || end==c; 224 | if (start<=c && c<=end) choosed.push(rg); 225 | if (end >= c ) return true; 226 | }); 227 | if (!choosed.length) return c; 228 | var last=stack[stack.length-1],valid,newRange, 229 | start=(last && (last[1] || last[0])==prev)?succ(prev):prev, 230 | end=head?pred(c):c; 231 | if (start<=end) { 232 | newRange=start==end?start:start+end; 233 | choosed.forEach(function (rg) { 234 | if (rg[0]<=start && rg.slice(-1)>=end) { 235 | (map[rg]=map[rg] || []).push(newRange); 236 | valid=true; 237 | } 238 | }); 239 | if (valid) stack.push(newRange); 240 | } 241 | if (head && tail) { 242 | stack.push(c); 243 | choosed.forEach(function (rg) {(map[rg]=map[rg] || []).push(c)}); 244 | } 245 | return c; 246 | },chars[0]); 247 | 248 | return {ranges:stack,map:map}; 249 | } 250 | 251 | 252 | /** 253 | Convert exclude ranges to include ranges 254 | Example: ^b-y, ['by'] to ["\0a","z\uffff"] 255 | @param {[Range]} 256 | @return Sorted disjoint ranges 257 | */ 258 | function negate(ranges /*:[Range rg]*/) { 259 | var MIN_CHAR="\u0000",MAX_CHAR="\uffff"; 260 | ranges=classify(ranges).ranges; 261 | var negated=[]; 262 | if (!ranges.length) return negated; 263 | if (ranges[0][0]!==MIN_CHAR) ranges.unshift(MAX_CHAR); 264 | var hi=ranges.length-1; 265 | if ((ranges[hi][1] || ranges[hi][0])!==MAX_CHAR) ranges.push(MIN_CHAR); 266 | ranges.reduce(function (acc,r) { 267 | var start=succ(acc[1] || acc[0]),end=pred(r[0]); 268 | if (start 1 && charset.shift(); 285 | charset.forEach(function (c) { 286 | if (chars[0]=='-' && chars.length>1) {//chars=['-','a'],c=='z' 287 | if (chars[1] > c ) // z-a is invalid 288 | throw new Error('Charset range out of order:'+chars[1]+'-'+c+'!'); 289 | ranges.push(chars[1]+c); 290 | chars.splice(0,2); 291 | } else chars.unshift(c); 292 | }); 293 | ranges=ranges.concat(chars); 294 | //convert exclude to include 295 | return exclude?negate(ranges):classify(ranges).ranges; 296 | } 297 | 298 | /** 299 | Coalesce closed ranges. 300 | ['ac','d','ez'] will be coalesced to ['az'] 301 | @param {[Range]} ranges Sorted disjoint ranges return by `classify`. 302 | @return {[Range]} Compressed ranges 303 | */ 304 | function coalesce(ranges) { 305 | if (!ranges.length) return []; 306 | var results=[ranges[0]]; 307 | ranges.reduce(function (a,b) { 308 | var prev=results.length-1; 309 | if (a[a.length-1]===pred(b[0])) { 310 | return results[prev]=results[prev][0]+b[b.length-1]; 311 | } 312 | results.push(b); 313 | return b; 314 | }); 315 | return results; 316 | } 317 | 318 | function chr(n) {return String.fromCharCode(n)} 319 | function ord(c) {return c.charCodeAt(0)} 320 | function pred(c) {return String.fromCharCode(c.charCodeAt(0)-1)} 321 | function succ(c) {return String.fromCharCode(c.charCodeAt(0)+1)} 322 | 323 | var printEscapeMap={ 324 | "\n":"\\n","\t":"\\t","\f":"\\f", 325 | "\r":"\\r"," ":" ","\\":"\\\\" 326 | }; 327 | // Convert string to printable,replace all control chars and unicode to hex escape 328 | function toPrint(s) { 329 | var ctrl=/[\x00-\x1F\x7F-\x9F]/,unicode=/[\u009F-\uFFFF]/; 330 | if (ctrl.test(s) || unicode.test(s)) { 331 | s=s.split('').map(function (c) { 332 | if (printEscapeMap.hasOwnProperty(c)) return printEscapeMap[c]; 333 | else if (ctrl.test(c)) return '\\x'+ord(c).toString(16).toUpperCase(); 334 | else if (unicode.test(c)) return '\\u'+('00'+ord(c).toString(16)).slice(-4); 335 | return c; 336 | }).join(''); 337 | } 338 | return s; 339 | } 340 | //flatten two-dimensional array to one-dimension 341 | function flatten2(a) {return [].concat.apply([],a)} 342 | function repeats(s,n) {return new Array(n+1).join(s)} 343 | 344 | function log() { 345 | var a=slice.call(arguments); 346 | if (isBrowser) { 347 | Function.prototype.apply.apply(console.log,[console,a]); 348 | } else {//Assume it is Node.js 349 | var util=require('util'); 350 | a.forEach(function (x) { 351 | console.log(util.inspect(x,{ 352 | showHidden:false,customInspect:true, 353 | depth:64,colors:true 354 | })); 355 | }); 356 | 357 | } 358 | } 359 | 360 | function locals(f) { 361 | var src=f.toString(); 362 | var re=/^\s+function\s+([a-zA-Z]\w+)\s*\(/mg; 363 | var fns=[],match; 364 | while (match=re.exec(src)) fns.push(match[1]); 365 | var methods=[],f; 366 | while (f=fns.pop()) methods.push(f+':'+f); 367 | return '{\n'+methods.join(',\n')+'\n}'; 368 | } 369 | 370 | return { 371 | sortUnique:sortUnique, 372 | idUnique:idUnique,hashUnique:hashUnique, 373 | Set:Set, repeats:repeats, 374 | negate:negate,coalesce:coalesce, 375 | classify:classify, 376 | parseCharset:parseCharset, 377 | chr:chr,ord:ord,pred:pred,succ:succ,toPrint:toPrint, 378 | flatten2:flatten2, 379 | log:log,isBrowser:isBrowser, 380 | locals:locals 381 | }; 382 | 383 | }); 384 | -------------------------------------------------------------------------------- /src/RegExp.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['./parse','./Kit','./NFA'],function (parse,K,NFA) { 3 | /** 4 | Mock RegExp class 5 | */ 6 | parse.exportConstants(); 7 | //options 8 | RegExp.DEBUG=RegExp.D=1; 9 | RegExp.MULTILINE=RegExp.M=2; 10 | RegExp.GLOBAL=RegExp.G=4; 11 | RegExp.IGNORECASE=RegExp.I=8; 12 | function RegExp(re,options) { 13 | if (!(this instanceof RegExp)) return new RegExp(re,options); 14 | re=re+''; 15 | var opts={}; 16 | if (typeof options==='string') { 17 | options=options.toLowerCase(); 18 | if (~options.indexOf('i')) opts.ignoreCase=true; 19 | if (~options.indexOf('m')) opts.multiline=true; 20 | if (~options.indexOf('g')) opts.global=true; 21 | if (~options.indexOf('d')) opts.debug=true; 22 | } else { 23 | opts=options; 24 | } 25 | 26 | var ast=this.ast=parse(re); 27 | this.source=re; 28 | this.multiline=!!opts.multiline; 29 | this.global=!!opts.global; 30 | this.ignoreCase=!!opts.ignoreCase; 31 | this.debug=!!opts.debug; 32 | this.flags=''; 33 | if (this.multiline) this.flags+='m'; 34 | if (this.ignoreCase) this.flags+='i'; 35 | if (this.global) this.flags+='g'; 36 | _readonly(this,['source','options','multiline','global','ignoreCase','flags','debug']); 37 | 38 | var ignoreCase=this.ignoreCase; 39 | ast.traverse(function (node) {explainCharset(node,ignoreCase)},CHARSET_NODE); 40 | ast.traverse(function (node) {explainExact(node,ignoreCase)},EXACT_NODE); 41 | if (this.multiline) ast.traverse(multilineAssert,ASSERT_NODE); 42 | 43 | } 44 | 45 | RegExp.prototype={ 46 | toString:function () {return '/'+this.source+'/'+this.flags;}, 47 | test:function(s) { 48 | return this.exec(s)!==null; 49 | }, 50 | exec:function (s) { 51 | var nfa=this.getNFA(),ret; 52 | var startIndex=this.global?(this.lastIndex || 0):0,max=s.length; 53 | for (;startIndexmin;max--) { 347 | a=builder(node,from); 348 | moreTrans=moreTrans.concat(a.trans); 349 | from=a.accepts; 350 | accepts=accepts.concat(a.accepts); 351 | } 352 | } else { 353 | var beforeStates=from.slice(); 354 | a=builder(node,from); 355 | moreTrans=moreTrans.concat(a.trans); 356 | accepts=accepts.concat(a.accepts); 357 | moreTrans.push({ 358 | from:a.accepts,to:beforeStates,charset:false 359 | }); 360 | } 361 | var endState=[newState()]; 362 | if (repeat.nonGreedy) { 363 | trans.push({ 364 | from:accepts,to:endState,charset:false 365 | }); 366 | trans=trans.concat(moreTrans); 367 | } else { 368 | trans=trans.concat(moreTrans); 369 | trans.push({ 370 | from:accepts,to:endState,charset:false 371 | }); 372 | } 373 | return {accepts:endState,trans:trans}; 374 | } 375 | 376 | function _readonly(obj,attrs) { 377 | attrs.forEach(function (a) { 378 | Object.defineProperty(obj,a,{writable:false,enumerable:true}); 379 | }); 380 | } 381 | 382 | return RegExp; 383 | 384 | }); 385 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Regulex : JavaScript Regular Expression Visualizer. 6 | 166 | 167 | 168 | 195 |

RegulexJavaScript Regular Expression Visualizer.

196 |
197 |

/(ab|c)*/

198 | 199 | 200 |
201 |

Error Message

202 |
203 | 204 | 205 | 206 | 207 | 208 | 209 | 449 | 450 |
Created by Jex.
451 | Fork me on GitHub 452 | 453 | 454 | -------------------------------------------------------------------------------- /src/visualize.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['./Kit','./parse'],function (K,parse) { 3 | parse.exportConstants(); 4 | 5 | var FONT_SIZE=16,LABEL_FONT_SIZE=14,PATH_LEN=16, 6 | FONT_FAMILY='DejaVu Sans Mono,monospace'; 7 | 8 | var PAPER_MARGIN=10; 9 | 10 | var _charSizeCache={},_tmpText; 11 | function getCharSize(fontSize,fontBold) { 12 | fontBold=fontBold || 'normal'; 13 | if (_charSizeCache[fontSize] && _charSizeCache[fontSize][fontBold]) 14 | return _charSizeCache[fontSize][fontBold]; 15 | _tmpText.attr({'font-size':fontSize,'font-weight':fontBold}); 16 | var box=_tmpText.getBBox(); 17 | _charSizeCache[fontSize]=_charSizeCache[fontSize] || {}; 18 | return _charSizeCache[fontSize][fontBold]={ 19 | width:box.width/((_tmpText.attr('text').length-1)/2), 20 | height:box.height/2 21 | }; 22 | } 23 | 24 | function initTmpText(paper) { 25 | _tmpText=_tmpText || paper.text(-1000,-1000,"XgfTlM|.q\nXgfTlM|.q").attr('font-family',FONT_FAMILY); 26 | } 27 | 28 | /** 29 | @param {AST} re AST returned by `parse` 30 | */ 31 | function visualize(re,paper) { 32 | paper.clear(); 33 | initTmpText(paper); 34 | 35 | var texts=highlight(re.tree); 36 | texts.unshift(text("RegExp:")); 37 | var charSize=getCharSize(FONT_SIZE,'bold'), 38 | startX=PAPER_MARGIN,startY=charSize.height/2+PAPER_MARGIN, 39 | width,height; 40 | width=texts.reduce(function(x,t) { 41 | t.x=x; 42 | t.y=startY; 43 | var w=t.text.length*charSize.width; 44 | return x+w; 45 | },startX); 46 | width+=PAPER_MARGIN; 47 | height=charSize.height+PAPER_MARGIN*2; 48 | texts=paper.add(texts); 49 | paper.setSize(width,charSize.height+PAPER_MARGIN*2); 50 | 51 | var ret=plot(re.tree,0,0); 52 | 53 | height=Math.max(ret.height+3*PAPER_MARGIN+charSize.height,height); 54 | width=Math.max(ret.width+2*PAPER_MARGIN,width); 55 | 56 | paper.setSize(width,height); 57 | translate(ret.items,PAPER_MARGIN,PAPER_MARGIN*2+charSize.height-ret.y); 58 | paper.add(ret.items); 59 | } 60 | 61 | 62 | 63 | function plot(tree,x,y) { 64 | tree.unshift({type:'startPoint'}); 65 | tree.push({type:'endPoint'}); 66 | return plotTree(tree,x,y); 67 | } 68 | 69 | function translate(items,dx,dy) { 70 | items.forEach(function (t) { 71 | if (t._translate) t._translate(dx,dy); 72 | t.x+=dx;t.y+=dy; 73 | }); 74 | } 75 | 76 | // return NodePlot config 77 | function plotTree(tree,x,y) { 78 | var results=[],items=[], 79 | width=0,height=0, 80 | fromX=x,top=y,bottom=y; 81 | if (!tree.length) return plotNode.empty(null,x,y); 82 | tree.forEach(function (node) { 83 | var ret; 84 | if (node.repeat) { 85 | ret=plotNode.repeat(node,fromX,y); 86 | } else { 87 | ret=plotNode[node.type](node,fromX,y); 88 | } 89 | results.push(ret); 90 | fromX+=ret.width+PATH_LEN; 91 | width+=ret.width; 92 | top=Math.min(top,ret.y); 93 | bottom=Math.max(bottom,ret.y+ret.height); 94 | items=items.concat(ret.items); 95 | }); 96 | 97 | height=bottom-top; 98 | 99 | results.reduce(function (a,b) { 100 | width+=PATH_LEN; 101 | var p=hline(a.lineOutX,y,b.lineInX); 102 | items.push(p); 103 | return b; 104 | }); 105 | var lineInX=results[0].lineInX,lineOutX=results[results.length-1].lineOutX; 106 | return { 107 | items:items, 108 | width:width,height:height,x:x,y:top, 109 | lineInX:lineInX,lineOutX:lineOutX 110 | }; 111 | } 112 | // return NodePlot config 113 | function textRect(s,x,y,bgColor,textColor) { 114 | s=K.toPrint(s); 115 | var padding=6; 116 | var charSize=getCharSize(FONT_SIZE); 117 | var tw=s.length*charSize.width,h=charSize.height+padding*2,w=tw+padding*2; 118 | var rect={ 119 | type:'rect', 120 | x:x,y:y-(h/2), 121 | width:w,height:h, 122 | stroke:'none', 123 | fill:bgColor || 'transparent' 124 | }; 125 | var t={ 126 | type:'text', 127 | x:x+w/2,y:y, 128 | text:s, 129 | 'font-size':FONT_SIZE, 130 | fill:textColor || 'black' 131 | }; 132 | return { 133 | text:t,rect:rect, 134 | items:[rect,t], 135 | width:w,height:h, 136 | x:x,y:rect.y, 137 | lineInX:x,lineOutX:x+w 138 | }; 139 | } 140 | 141 | // return LabelObject {lable:Element,x,y,width,height} 142 | function textLabel(x,y,s,color) {// x is center x ,y is bottom y 143 | var charSize=getCharSize(LABEL_FONT_SIZE); 144 | var lines=s.split("\n"); 145 | var textHeight=lines.length*charSize.height; 146 | var textWidth; 147 | if (lines.length>1) { 148 | textWidth=Math.max.apply(Math,lines.map(function (a) {return a.length})); 149 | } else { 150 | textWidth=s.length; 151 | } 152 | textWidth=textWidth*charSize.width; 153 | var margin=4; 154 | var txt={ 155 | type:'text', 156 | x:x,y:y-textHeight/2-margin, 157 | text:s, 158 | 'font-size':LABEL_FONT_SIZE, 159 | fill:color || '#444' 160 | }; 161 | return { 162 | label:txt, 163 | x:x-textWidth/2,y:y-textHeight-margin, 164 | width:textWidth,height:textHeight+margin 165 | }; 166 | } 167 | //return element config 168 | function hline(x,y,destX) { 169 | return { 170 | type:'path', 171 | x:x,y:y, 172 | path:["M",x,y,"H",destX], 173 | 'stroke-linecap':'butt', 174 | 'stroke-linejoin':'round', 175 | 'stroke':'#333', 176 | 'stroke-width':2, 177 | _translate:function (x,y) { 178 | var p=this.path; 179 | p[1]+=x;p[2]+=y;p[4]+=x; 180 | }, 181 | }; 182 | } 183 | 184 | //return element config 185 | function smoothLine(fromX,fromY,toX,toY) { 186 | var radius=10,p,_translate; 187 | var signX=fromX>toX?-1:1,signY=fromY>toY?-1:1; 188 | if (Math.abs(fromY-toY) 1 ? " to " : " or ") +_plural(repeat.max); 295 | } else { 296 | txt+=" or more times."; 297 | } 298 | } 299 | 300 | var r=padding; 301 | var rectW=ret.width+padding*2,rectH=ret.y+ret.height+padding-y; 302 | 303 | var py=y; 304 | var p={ 305 | type:'path', 306 | path:['M',ret.lineInX+padding,py, 307 | 'Q',x,py,x,py+r, 308 | 'V',py+rectH-r, 309 | 'Q',x,py+rectH,x+r,py+rectH, 310 | 'H',x+rectW-r, 311 | 'Q',x+rectW,py+rectH,x+rectW,py+rectH-r, 312 | 'V',py+r, 313 | 'Q',x+rectW,py,ret.lineOutX+padding,py 314 | ], 315 | _translate:function (x,y) { 316 | var p=this.path; 317 | p[1]+=x;p[2]+=y; 318 | p[4]+=x;p[5]+=y;p[6]+=x;p[7]+=y; 319 | p[9]+=y; 320 | p[11]+=x;p[12]+=y;p[13]+=x;p[14]+=y; 321 | p[16]+=x; 322 | p[18]+=x;p[19]+=y;p[20]+=x;p[21]+=y; 323 | p[23]+=y; 324 | p[25]+=x;p[26]+=y;p[27]+=x;p[28]+=y; 325 | }, 326 | stroke:'maroon', 327 | 'stroke-width':2 328 | }; 329 | 330 | if (repeat.nonGreedy) { 331 | txt+="(NonGreedy!)"; 332 | p.stroke="Brown"; 333 | p['stroke-dasharray']="-"; 334 | } 335 | 336 | var tl=textLabel(x+rectW/2,y,txt); 337 | translate([tl.label],0,rectH+tl.height+LABEL_MARGIN); //bottom label 338 | 339 | var width=Math.max(tl.width,rectW); 340 | var offsetX=(width-rectW)/2; 341 | if (offsetX) translate([p,tl.label],offsetX,0); 342 | translate(ret.items,padding+offsetX,0); 343 | ret.items.unshift(p); 344 | ret.items.push(tl.label); 345 | return { 346 | items:ret.items, 347 | width:width,height:ret.height+padding+tl.height+LABEL_MARGIN, 348 | x:offsetX+padding+x,y:ret.y, 349 | lineInX:ret.lineInX+padding+offsetX, 350 | lineOutX:ret.lineOutX+padding+offsetX 351 | }; 352 | 353 | function _plural(n) { 354 | return n+ ((n<2)? " time.":" times."); 355 | } 356 | }, 357 | choice:function (node,x,y) { 358 | var marginX=20,spacing=6,paddingY=4,height=0,width=0; 359 | var branches=node.branches.map(function (branch) { 360 | var ret=plotTree(branch,x,y); 361 | height+=ret.height; 362 | width=Math.max(width,ret.width); 363 | return ret; 364 | }); 365 | height+=(branches.length-1)*spacing+paddingY*2; 366 | width+=marginX*2; 367 | 368 | var centerX=x+width/2,dy=y-height/2+paddingY,lineOutX=x+width, 369 | items=[]; 370 | branches.forEach(function (a) { 371 | var dx=centerX-a.width/2; 372 | translate(a.items,dx-a.x,dy-a.y); 373 | var p1=smoothLine(x,y,dx-a.x+a.lineInX,y+dy-a.y); 374 | var p2=smoothLine(lineOutX,y,a.lineOutX+dx-a.x,y+dy-a.y); 375 | items=items.concat(a.items); 376 | items.push(p1,p2); 377 | dy+=a.height+spacing; 378 | }); 379 | 380 | return { 381 | items:items, 382 | width:width,height:height, 383 | x:x,y:y-height/2, 384 | lineInX:x,lineOutX:lineOutX 385 | }; 386 | 387 | }, 388 | charset:function (node,x,y) { 389 | var padding=6,spacing=4; 390 | var clsDesc={d:'Digit',D:'NonDigit',w:'Word',W:'NonWord',s:'WhiteSpace',S:'NonWhiteSpace'}; 391 | var charBgColor='LightSkyBlue',charTextColor='black', 392 | clsBgColor='Green',clsTextColor='white', 393 | rangeBgColor='teal',rangeTextColor='white', 394 | boxColor=node.exclude?'Pink':'Khaki', 395 | labelColor=node.exclude?'#C00':''; 396 | var simple=onlyCharClass(node); 397 | if (simple) { 398 | var a=textRect(clsDesc[node.classes[0]],x,y,clsBgColor,clsTextColor); 399 | a.rect.r=5; 400 | if (!node.exclude) { 401 | return a; 402 | } else { 403 | var tl=textLabel(a.x+a.width/2,a.y,'None of:',labelColor); 404 | var items=a.items; 405 | items.push(tl.label); 406 | var oldWidth=a.width; 407 | var width=Math.max(tl.width,a.width); 408 | var offsetX=(width-oldWidth)/2;//ajust label text 409 | translate(items,offsetX,0); 410 | return { 411 | items:items, 412 | width:width,height:a.height+tl.height, 413 | x:Math.min(tl.x,a.x),y:tl.y, 414 | lineInX:offsetX+a.x,lineOutX:offsetX+a.x+a.width 415 | }; 416 | } 417 | } 418 | if (!node.chars && !node.ranges.length && !node.classes.length) { 419 | // It must be exclude charset here 420 | var a= textRect('AnyChar',x,y,'green','white'); 421 | a.rect.r=5; 422 | return a; 423 | } 424 | var packs=[],ret,width=0,height=0,singleBoxHeight; 425 | if (node.chars) { 426 | ret=textRect(node.chars,x,y,charBgColor,charTextColor); 427 | ret.rect.r=5; 428 | packs.push(ret); 429 | width=ret.width; 430 | } 431 | node.ranges.forEach(function (rg) { 432 | rg=rg.split('').join('-'); 433 | var ret=textRect(rg,x,y,rangeBgColor,rangeTextColor); 434 | ret.rect.r=5; 435 | packs.push(ret); 436 | width=Math.max(ret.width,width); 437 | }); 438 | node.classes.forEach(function (cls) { 439 | var ret=textRect(clsDesc[cls],x,y,clsBgColor,clsTextColor); 440 | ret.rect.r=5; 441 | packs.push(ret); 442 | width=Math.max(ret.width,width); 443 | }); 444 | 445 | singleBoxHeight=packs[0].height; 446 | 447 | var pack1=[],pack2=[]; 448 | packs.sort(function (a,b) {return b.width-a.width}); 449 | packs.forEach(function (a) { 450 | if (a.width*2+spacing>width) pack1.push(a); 451 | else pack2.push(a); // can be inline 452 | }); 453 | packs=pack1; 454 | var a1,a2; 455 | while (pack2.length) { 456 | a1=pack2.pop(); a2=pack2.pop(); 457 | if (!a2) {packs.push(a1);break;} 458 | if (a1.width-a2.width > 2) { 459 | packs.push(a1); 460 | pack2.push(a2); 461 | continue; 462 | } 463 | translate(a2.items,a1.width+spacing,0); 464 | packs.push({ 465 | items:a1.items.concat(a2.items), 466 | width:a1.width+a2.width+spacing, 467 | height:a1.height, 468 | x:a1.x,y:a1.y 469 | }); 470 | height-=a1.height; 471 | } 472 | 473 | width+=padding*2; 474 | height=(packs.length-1)*spacing+packs.length*singleBoxHeight+padding*2; 475 | 476 | var rect={ 477 | type:'rect', 478 | x:x,y:y-height/2,r:4, 479 | width:width,height:height, 480 | stroke:'none',fill:boxColor 481 | }; 482 | 483 | var startY=rect.y+padding; 484 | var items=[rect]; 485 | 486 | packs.forEach(function (a) { 487 | translate(a.items,x-a.x+(width-a.width)/2,startY-a.y); 488 | items=items.concat(a.items); 489 | startY+=a.height+spacing; 490 | }); 491 | var tl=textLabel(rect.x+rect.width/2,rect.y,(node.exclude?'None':'One')+' of:',labelColor); 492 | items.push(tl.label); 493 | var oldWidth=width; 494 | width=Math.max(tl.width,width); 495 | var offsetX=(width-oldWidth)/2;//ajust label text 496 | translate(items,offsetX,0); 497 | return { 498 | items:items, 499 | width:width,height:height+tl.height, 500 | x:Math.min(tl.x,x),y:tl.y, 501 | lineInX:offsetX+x,lineOutX:offsetX+x+rect.width 502 | }; 503 | }, 504 | group:function (node,x,y) { 505 | var padding=10,lineColor='silver',strokeWidth=2; 506 | var sub=plotTree(node.sub,x,y); 507 | if (node.num) { 508 | translate(sub.items,padding,0); 509 | var rectW=sub.width+padding*2,rectH=sub.height+padding*2; 510 | var rect={ 511 | type:'rect', 512 | x:x,y:sub.y-padding,r:6, 513 | width:rectW,height:rectH, 514 | 'stroke-dasharray':".", 515 | stroke:lineColor, 516 | 'stroke-width':strokeWidth 517 | }; 518 | var tl=textLabel(rect.x+rect.width/2,rect.y-strokeWidth,'Group #'+node.num); 519 | var items=sub.items.concat([rect,tl.label]); 520 | var width=Math.max(tl.width,rectW); 521 | var offsetX=(width-rectW)/2;//ajust label text space 522 | if (offsetX) translate(items,offsetX,0); 523 | return { 524 | items:items, 525 | width:width, 526 | height:rectH+tl.height, 527 | x:x,y:tl.y, 528 | lineInX:offsetX+sub.lineInX+padding,lineOutX:offsetX+sub.lineOutX+padding 529 | }; 530 | } 531 | return sub; 532 | }, 533 | assert:function (node,x,y) { 534 | var simpleAssert={ 535 | AssertNonWordBoundary:{bg:"maroon",fg:"white"}, 536 | AssertWordBoundary:{bg:"purple",fg:"white"}, 537 | AssertEnd:{bg:"Indigo",fg:"white"}, 538 | AssertBegin:{bg:"Indigo",fg:"white"} 539 | }; 540 | var conf,nat=node.assertionType,txt=nat.replace('Assert','')+'!'; 541 | if (conf=simpleAssert[nat]) { 542 | return textRect(txt,x,y,conf.bg,conf.fg); 543 | } 544 | 545 | var lineColor,fg,padding=8; 546 | if (nat===AssertLookahead) { 547 | lineColor="CornflowerBlue"; 548 | fg="darkgreen"; 549 | txt="If followed by:"; 550 | } else if (nat===AssertNegativeLookahead) { 551 | lineColor="#F63"; 552 | fg="Purple"; 553 | //txt="Negative\nLookahead!"; // break line 554 | txt="If not followed by:"; 555 | } 556 | 557 | var sub=plotNode.group(node,x,y); 558 | var rectH=sub.height+padding*2,rectW=sub.width+padding*2; 559 | var rect={ 560 | type:'rect', 561 | x:x,y:sub.y-padding,r:6, 562 | width:rectW,height:rectH, 563 | 'stroke-dasharray':"-", 564 | stroke:lineColor, 565 | 'stroke-width':2 566 | }; 567 | 568 | var tl=textLabel(rect.x+rectW/2,rect.y,txt,fg); 569 | var width=Math.max(rectW,tl.width); 570 | var offsetX=(width-rectW)/2;//ajust label text 571 | translate(sub.items,offsetX+padding,0); 572 | 573 | if (offsetX) translate([rect,tl.label],offsetX,0); 574 | var items=sub.items.concat([rect,tl.label]); 575 | return { 576 | items:items, 577 | width:width, 578 | height:rect.height+tl.height, 579 | x:x,y:tl.y, 580 | lineInX:offsetX+sub.lineInX+padding,lineOutX:offsetX+sub.lineOutX+padding 581 | }; 582 | } 583 | }; 584 | 585 | 586 | 587 | var hlColorMap={ 588 | exact:'#334', 589 | dot:'darkblue', 590 | backref:'teal', 591 | '$':'purple', 592 | '^':'purple', 593 | '\\b':'#F30', 594 | '\\B':'#F30', 595 | '(':'blue', 596 | ')':'blue', 597 | '?=':'darkgreen', 598 | '?!':'red', 599 | '?:':'grey', 600 | '[':'navy', 601 | ']':'navy', 602 | '|':'blue', 603 | '{':'maroon', 604 | ',':'maroon', 605 | '}':'maroon', 606 | '*':'maroon', 607 | '+':'maroon', 608 | '?':'maroon', 609 | repeatNonGreedy:'#F61', 610 | defaults:'black', 611 | charsetRange:'olive', 612 | charsetClass:'navy', 613 | charsetExclude:'red', 614 | charsetChars:'#334' 615 | }; 616 | 617 | 618 | /** 619 | @param {AST.tree} re AST.tree return by `parse` 620 | */ 621 | function highlight(tree) { 622 | var texts=[]; 623 | tree.forEach(function (node) { 624 | if (node.sub) { 625 | texts.push(text('(')); 626 | if (node.type===ASSERT_NODE) { 627 | if (node.assertionType===AssertLookahead) { 628 | texts.push(text('?=')); 629 | } else { 630 | texts.push(text('?!')); 631 | } 632 | } else if (node.nonCapture) { 633 | texts.push(text('?:')); 634 | } 635 | texts=texts.concat(highlight(node.sub)); 636 | texts.push(text(')')); 637 | } else if (node.branches) { 638 | node.branches.map(highlight).forEach(function (ts) { 639 | texts=texts.concat(ts); 640 | texts.push(text('|')); 641 | }); 642 | texts.pop(); 643 | } else { 644 | var color=hlColorMap[node.type] || hlColorMap.defaults; 645 | switch (node.type) { 646 | case EXACT_NODE: 647 | texts.push(text(K.toPrint(node.chars),color)); 648 | break; 649 | case DOT_NODE: 650 | texts.push(text('.',color)); 651 | break; 652 | case BACKREF_NODE: 653 | texts.push(text("\\"+node.num,color)); 654 | break; 655 | case ASSERT_NODE: 656 | texts.push(text(node.raw)); 657 | break; 658 | case CHARSET_NODE: 659 | var simple=onlyCharClass(node); 660 | (!simple || node.exclude) && texts.push(text('[')); 661 | if (node.exclude) texts.push(text('^',hlColorMap.charsetExclude)); 662 | node.ranges.forEach(function (rg) { 663 | texts.push(text(K.toPrint(rg[0]+'-'+rg[1]),hlColorMap.charsetRange)); 664 | }); 665 | node.classes.forEach(function (cls) { 666 | texts.push(text("\\"+cls,hlColorMap.charsetClass)); 667 | }); 668 | texts.push(text(K.toPrint(node.chars),hlColorMap.charsetChars)); 669 | (!simple || node.exclude) && texts.push(text(']')); 670 | break; 671 | } 672 | } 673 | if (node.repeat) { 674 | var min=node.repeat.min,max=node.repeat.max; 675 | if (min===0 && max===Infinity) texts.push(text('*')); 676 | else if (min===1 && max===Infinity) texts.push(text('+')); 677 | else if (min===0 && max===1) texts.push(text('?')); 678 | else { 679 | texts.push(text('{')); 680 | texts.push(text(min)); 681 | if (min===max) texts.push(text('}')); 682 | else { 683 | texts.push(text(',')); 684 | if (isFinite(max)) texts.push(text(max)); 685 | texts.push(text('}')); 686 | } 687 | } 688 | if (node.repeat.nonGreedy) { 689 | texts.push(text('?',hlColorMap.repeatNonGreedy)); 690 | } 691 | } 692 | }); 693 | return texts; 694 | } 695 | 696 | function text(s,color) { 697 | color = color || hlColorMap[s] || hlColorMap.defaults; 698 | return { 699 | type:'text', 700 | 'font-size':FONT_SIZE,'font-family':FONT_FAMILY, 701 | text:s+"",fill:color,'text-anchor':'start','font-weight':'bold' 702 | }; 703 | } 704 | 705 | function onlyCharClass(node) { 706 | return !node.chars && !node.ranges.length && node.classes.length===1; 707 | } 708 | 709 | return visualize; 710 | 711 | }); 712 | -------------------------------------------------------------------------------- /src/parse.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(['./NFA','./Kit'],function (NFA,K) { 3 | /** 4 | Parse Regex to AST 5 | parse:Function(re:String) 6 | parse.Constants 7 | parse.exportConstants:Function 8 | */ 9 | 10 | var Constants={ 11 | //Node Type Constants 12 | EXACT_NODE:"exact", 13 | CHARSET_NODE:"charset", 14 | CHOICE_NODE:"choice", 15 | GROUP_NODE:"group", 16 | ASSERT_NODE:"assert", 17 | DOT_NODE:"dot", 18 | BACKREF_NODE:"backref", 19 | EMPTY_NODE:"empty", 20 | //Assertion Type Constants 21 | AssertLookahead:"AssertLookahead", 22 | AssertNegativeLookahead:"AssertNegativeLookahead", 23 | AssertNonWordBoundary:"AssertNonWordBoundary", 24 | AssertWordBoundary:"AssertWordBoundary", 25 | AssertEnd:"AssertEnd", 26 | AssertBegin:"AssertBegin" 27 | }; 28 | 29 | /** 30 | AST: 31 | Node = { // Base Node interface 32 | type:NodeType, // Node type string 33 | raw:String, // Raw regex string 34 | repeat:{ 35 | min:Int,max:Int, // Repeat times. [min,max] means "{min,max}". 36 | // Set max=Infinity forms a "{min,}" range 37 | // Set max=undefined forms a "{min}" range 38 | nonGreedy:Boolean // If this repeat is non-greedy,viz. had a "?" quantifier 39 | }, 40 | indices:[Int,Int] // Raw string in original regex index range [start,end) 41 | // You can use regexStr.slice(start,end) to retrieve node.raw string 42 | } 43 | 44 | NodeType = exact|dot|charset|choice|empty|group|assert|backref 45 | 46 | ExactNode = { // Literal match chars string 47 | type:"exact", 48 | chars:"c", 49 | raw:"c{1,2}" // When repeat or escape,raw will diff from chars 50 | } 51 | DotNode = {type:"dot"} //viz. "." , dot match any char but newline "\n\r" 52 | 53 | // Because of IgnoreCase flag, 54 | // The client code need to compute disjoint ranges itself. 55 | CharsetNode = { 56 | type:"charset", 57 | exclude:Boolean, // True only if it is "[^abc]" form 58 | classes:[Char], // Named character classes. e.g. [\d]. 59 | // All names: d(Digit),D(Non-digit),w,W,s,S 60 | chars:String, // Literal chars. e.g. [abc] repr as 'abc' 61 | ranges:[Range] // Range: a-z repr as 'az' 62 | } 63 | 64 | ChoiceNode = { 65 | type:"choice", 66 | branches:[[Node]] // Choice more branches,e.g. /a|b|c/ 67 | } 68 | 69 | EmptyNode = { // This node will match any input,include empty string 70 | type:"empty" //new RegExp("") will give an empty node. /a|/ will give branches with an empty node 71 | } 72 | 73 | GroupNode = { 74 | type:"group", 75 | nonCapture:false, // true means:"(?:abc)",default is false 76 | num:Int, // If capture is true.It is group's int index(>=1). 77 | endParenIndex:Int, // /(a)+/ will generate only one node,so indices is [0,4],endParenIndex is 3 78 | sub:[Node] // Sub pattern nodes 79 | } 80 | 81 | AssertNode = { 82 | type:"assert", 83 | assertionType:String, //See Assertion Type Constants 84 | sub:[Node] //Optional,\b \B ^ $ Assertion this property is empty 85 | } 86 | Only AssertLookahead,AssertNegativeLookahead has `sub` property 87 | "(?=(abc))" repr as { 88 | type:"assert", assertionType:AssertLookahead, 89 | sub:[{ 90 | type:"group", 91 | sub:[{type:"exact",raw:"abc"}] 92 | }] 93 | } 94 | 95 | BackrefNode = { 96 | type:"backref", 97 | num:Int // Back references index.Correspond to group.num 98 | } 99 | 100 | */ 101 | 102 | function exportConstants() { 103 | var code=Object.keys(Constants).map(function (k) { 104 | return k+"="+JSON.stringify(Constants[k]); 105 | }).join(";"); 106 | var Global=(function () { 107 | return this; 108 | })(); 109 | Global.eval(code); 110 | } 111 | exportConstants(); 112 | 113 | function AST(a) { 114 | this.raw=a.raw; 115 | this.tree=a.tree; 116 | this.groupCount=a.groupCount; 117 | } 118 | /** 119 | @param {Function} f Visitor function accept node as one argument. 120 | @param {String} nodeType Give the node type you want to visit,or omitted to visit all 121 | */ 122 | AST.prototype.traverse=function (f,nodeType) { 123 | travel(this.tree,f); 124 | function travel(stack,f) { 125 | stack.forEach(function (node) { 126 | if (!nodeType || node.type===nodeType) f(node); 127 | if (node.sub) travel(node.sub,f); 128 | else if (node.branches) node.branches.forEach(function (b) {travel(b,f)}); 129 | }); 130 | } 131 | }; 132 | 133 | 134 | var G_DEBUG; 135 | /** 136 | @param {String} re input regex as string 137 | @param {Object} [options] 138 | @option {Boolean} options.debug If enable debug log 139 | @option {Boolean} options.strict If enable strict mode 140 | @return {Object} 141 | { 142 | raw:String, // original re 143 | groupCount:Int, //Total group count 144 | tree:Array // AST Tree Stack 145 | } 146 | */ 147 | function parse(re,_debug) { 148 | G_DEBUG=_debug; 149 | var parser=getNFAParser(); 150 | 151 | var ret,stack,lastState; 152 | ret=parser.input(re); 153 | stack=ret.stack; 154 | stack=actions.endChoice(stack); // e.g. /a|b/ 155 | lastState=ret.lastState; 156 | var valid=ret.acceptable && ret.lastIndex===re.length-1;//just syntax valid regex 157 | if (!valid) { 158 | var error; 159 | switch (lastState) { 160 | case 'charsetRangeEndWithNullChar': 161 | error={ 162 | type:'CharsetRangeEndWithNullChar', 163 | message:"Charset range end with NUL char does not make sense!\n"+ 164 | "Because [a-\\0] is not a valid range.\n"+ 165 | "And [\\0-\\0] should be rewritten into [\\0].", 166 | }; 167 | break; 168 | case 'repeatErrorFinal': 169 | error={ 170 | type:'NothingRepeat', 171 | message:"Nothing to repeat!" 172 | }; 173 | break; 174 | case 'digitFollowNullError': 175 | error={ 176 | type:'DigitFollowNullError', 177 | message:"The '\\0' represents the char and cannot be followed by a decimal digit!" 178 | }; 179 | break; 180 | case 'charsetRangeEndClass': 181 | error={ 182 | type:'CharsetRangeEndClass', 183 | message:'Charset range ends with class such as "\\w\\W\\d\\D\\s\\S" is invalid!' 184 | }; 185 | break; 186 | case 'charsetOctEscape': 187 | error={ 188 | type:'DecimalEscape', 189 | message:'Decimal escape appears in charset is invalid.Because it can\'t be explained as backreference.And octal escape is deprecated!' 190 | }; 191 | break; 192 | default: 193 | if (lastState.indexOf('charset')===0) { 194 | error={ 195 | type:'UnclosedCharset', 196 | message:'Unterminated character class!' 197 | }; 198 | } else if (re[ret.lastIndex]===')') { 199 | error={ 200 | type:'UnmatchedParen', 201 | message:'Unmatched end parenthesis!' 202 | }; 203 | } else { 204 | error={ 205 | type:'UnexpectedChar', 206 | message:'Unexpected char!' 207 | } 208 | } 209 | } 210 | if (error) { 211 | error.lastIndex=ret.lastIndex; 212 | error.astStack=ret.stack; 213 | error.lastState=lastState; 214 | throw new RegexSyntaxError(error); 215 | } 216 | } 217 | 218 | if (stack._parentGroup) { 219 | throw new RegexSyntaxError({ 220 | type:"UnterminatedGroup", 221 | message:"Unterminated group!", 222 | lastIndex:stack._parentGroup.indices[0], 223 | lastState:lastState, 224 | astStack:stack 225 | }); 226 | } 227 | 228 | if (valid) { 229 | var groupCount=stack.groupCounter?stack.groupCounter.i:0; 230 | delete stack.groupCounter; 231 | var ast=new AST({ 232 | raw:re, 233 | groupCount:groupCount, 234 | tree:stack 235 | }); 236 | _fixNodes(stack,re,re.length); 237 | // Check charset ranges out of order error.(Because of charsetRangeEndEscape) 238 | ast.traverse(_checkCharsetRange,CHARSET_NODE); 239 | // Check any repeats after assertion. e.g. /a(?=b)+/ doesn't make sense. 240 | ast.traverse(_checkRepeat,ASSERT_NODE); 241 | _coalesceExactNode(stack); 242 | G_DEBUG=false; 243 | return ast; 244 | } 245 | 246 | 247 | 248 | } 249 | 250 | parse.Constants=Constants; 251 | parse.exportConstants=exportConstants; 252 | parse.RegexSyntaxError=RegexSyntaxError; 253 | parse.getNFAParser=getNFAParser; 254 | 255 | var _NFAParser; 256 | function getNFAParser() { 257 | if (!_NFAParser) { 258 | _NFAParser=NFA(config,G_DEBUG); 259 | } 260 | return _NFAParser; 261 | } 262 | 263 | function _set(obj,prop,value) { 264 | Object.defineProperty(obj,prop,{ 265 | value:value,enumerable:G_DEBUG,writable:true,configurable:true 266 | }); 267 | } 268 | 269 | function _coalesceExactNode(stack) { 270 | var prev=stack[0]; 271 | for (var i=1,j=1,l=stack.length,node;irange[1]) { 328 | throw new RegexSyntaxError({ 329 | type:"OutOfOrder", 330 | lastIndex:range.lastIndex, 331 | message:"Range ["+range.join('-')+"] out of order in character class!" 332 | }); 333 | } 334 | return range.join(''); 335 | })); 336 | } 337 | 338 | function RegexSyntaxError(e) { 339 | this.name="RegexSyntaxError"; 340 | this.type=e.type; 341 | this.lastIndex=e.lastIndex; 342 | this.lastState=e.lastState; 343 | this.astStack=e.astStack; 344 | this.message=e.message; 345 | Object.defineProperty(this,'stack',{ 346 | value:new Error(e.message).stack,enumerable:false 347 | }); 348 | } 349 | RegexSyntaxError.prototype.toString=function () { 350 | return this.name+' '+this.type+':'+this.message; 351 | }; 352 | 353 | 354 | 355 | var escapeCharMap={n:"\n",r:"\r",t:"\t",v:"\v",f:"\f"}; 356 | 357 | // All indices' end will be fixed later by stack[i].indices.push(stack[i+1].indices[0]) 358 | // All raw string filled later by node.raw=s.slice(node.indices[0],node.indices[1]) 359 | // All nodes are unshift to stack, so they're reverse order. 360 | var actions=(function _() { 361 | 362 | function exact(stack,c,i) { //any literal string. 363 | // ExactNode.chars will be filled later (than raw) 364 | // Escape actions and repeat actions will fill node.chars 365 | // node.chars = node.chars || node.raw 366 | var last=stack[0]; 367 | if (!last || last.type!=EXACT_NODE || last.repeat || last.chars) 368 | stack.unshift({type:EXACT_NODE, indices:[i]}); 369 | } 370 | function dot(stack,c,i) { // /./ 371 | stack.unshift({type:DOT_NODE,indices:[i]}); 372 | } 373 | function nullChar(stack,c,i) { 374 | c="\0"; 375 | actions.exact.apply(this,arguments); 376 | } 377 | function assertBegin(stack,c,i) { // /^/ 378 | stack.unshift({ 379 | type:ASSERT_NODE, 380 | indices:[i], 381 | assertionType:AssertBegin 382 | }); 383 | } 384 | function assertEnd(stack,c,i,state,s) { 385 | stack.unshift({ 386 | type:ASSERT_NODE, 387 | indices:[i], 388 | assertionType:AssertEnd 389 | }); 390 | } 391 | function assertWordBoundary(stack,c,i) {//\b \B assertion 392 | stack.unshift({ 393 | type:ASSERT_NODE, 394 | indices:[i-1], 395 | assertionType: c=='b'?AssertWordBoundary:AssertNonWordBoundary 396 | }); 397 | } 398 | function repeatnStart(stack,c,i) { // /a{/ 399 | //Treat repeatn as normal exact node,do transfer in repeatnEnd action. 400 | //Because /a{+/ is valid. 401 | var last=stack[0]; 402 | if (last.type===EXACT_NODE) { 403 | return; 404 | } else { // '[a-z]{' is valid 405 | stack.unshift({type:EXACT_NODE,indices:[i]}); 406 | } 407 | } 408 | function repeatnComma(stack,c,i) { // /a{n,}/ 409 | var last=stack[0]; 410 | _set(last,'_commaIndex',i); 411 | } 412 | function repeatnEnd(stack,c,i,state,s) { // /a{n,m}/ 413 | var last=stack[0],charEndIndex=s.lastIndexOf('{',i); 414 | var min=parseInt(s.slice(charEndIndex+1,last._commaIndex || i),10); 415 | var max; 416 | if (!last._commaIndex) { // /a{n}/ 417 | max=min; 418 | } else { 419 | if (last._commaIndex+1==i) { // /a{n,}/ 420 | max=Infinity; 421 | } else { 422 | max=parseInt(s.slice(last._commaIndex+1,i),10); 423 | } 424 | if (max < min) { 425 | throw new RegexSyntaxError({ 426 | type:"OutOfOrder",lastState:state, 427 | lastIndex:i,astStack:stack, 428 | message:"Numbers out of order in {} quantifier!" 429 | }); 430 | } 431 | delete last._commaIndex; 432 | } 433 | if (last.indices[0]===charEndIndex) { // '[a-z]{1,3}' 434 | stack.shift(); 435 | } 436 | _repeat(stack,min,max,charEndIndex,s); 437 | } 438 | function repeat0(stack,c,i,state,s) { _repeat(stack,0,Infinity,i,s) } // e.g. /a*/ 439 | function repeat01(stack,c,i,state,s) { _repeat(stack,0,1,i,s) } // e.g. /a?/ 440 | function repeat1(stack,c,i,state,s) { _repeat(stack,1,Infinity,i,s) } // e.g. /a+/ 441 | function _repeat(stack,min,max,charEndIndex,s) { 442 | var last=stack[0],repeat={min:min,max:max,nonGreedy:false}, 443 | charIndex=charEndIndex-1; 444 | if (last.chars && last.chars.length===1) charIndex=last.indices[0]; 445 | if (last.type===EXACT_NODE) { // exact node only repeat last char 446 | var a={ 447 | type:EXACT_NODE, 448 | repeat:repeat,chars:last.chars?last.chars:s[charIndex], 449 | indices:[charIndex] 450 | }; 451 | if (last.indices[0]===charIndex) stack.shift(); // e.g. /a{n}/ should be only single node 452 | stack.unshift(a); 453 | } else { 454 | last.repeat=repeat; 455 | } 456 | } 457 | function repeatNonGreedy(stack) { stack[0].repeat.nonGreedy=true} 458 | function normalEscape(stack,c,i) { 459 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c]; 460 | stack.unshift({ 461 | type:EXACT_NODE,chars:c,indices:[i-1] 462 | }); 463 | } 464 | function charClassEscape(stack,c,i) { 465 | stack.unshift({ 466 | type:CHARSET_NODE,indices:[i-1],chars:'',ranges:[], 467 | classes:[c],exclude:false 468 | }); 469 | } 470 | function hexEscape(stack,c,i,state,s) { 471 | c=String.fromCharCode(parseInt(s[i-1]+c,16)); 472 | stack.unshift({ 473 | type:EXACT_NODE, chars:c, 474 | indices:[i-3] // \xAA length-1 475 | }); 476 | } 477 | function unicodeEscape(stack,c,i,state,s) { 478 | c=String.fromCharCode(parseInt(s.slice(i-3,i+1),16)); 479 | stack.unshift({ 480 | type:EXACT_NODE, chars:c, 481 | indices:[i-5] // \u5409 length-1 482 | }); 483 | } 484 | function groupStart(stack,c,i) { 485 | var counter=(stack.groupCounter=(stack.groupCounter || {i:0})); 486 | counter.i++; 487 | var group={ 488 | type:GROUP_NODE, 489 | num: counter.i, 490 | sub:[], indices:[i], 491 | _parentStack:stack // Used to restore current stack when group end,viz. encounters ")" 492 | }; 493 | stack=group.sub; 494 | _set(stack,'_parentGroup',group); 495 | stack.groupCounter=counter; //keep groupCounter persist and ref modifiable 496 | return stack; 497 | } 498 | function groupNonCapture(stack) { // /(?:)/\ 499 | var group=stack._parentGroup 500 | group.nonCapture=true; 501 | group.num=undefined; 502 | stack.groupCounter.i--; 503 | } 504 | function groupToAssertion(stack,c,i) { // Convert /(?!)/,/(?=)/ to AssertNode 505 | var group=stack._parentGroup; 506 | group.type=ASSERT_NODE; 507 | group.assertionType= c=='=' ? AssertLookahead : AssertNegativeLookahead ; 508 | // Caveat!!! Assertion group no need to capture 509 | group.num=undefined; 510 | stack.groupCounter.i--; 511 | } 512 | function groupEnd(stack,c,i,state,s) { 513 | stack=endChoice(stack); // restore group's stack from choice 514 | var group=stack._parentGroup; 515 | if (!group) { 516 | throw new RegexSyntaxError({ 517 | type:'UnexpectedChar', 518 | lastIndex:i, 519 | lastState:state, 520 | astStack:stack, 521 | message:"Unexpected end parenthesis!" 522 | }); 523 | } 524 | delete stack._parentGroup; // Be generous,I don't care sparse object performance. 525 | delete stack.groupCounter; // clean 526 | stack=group._parentStack; // restore stack 527 | delete group._parentStack; 528 | stack.unshift(group); 529 | group.endParenIndex=i; 530 | return stack; 531 | } 532 | function choice(stack,c,i) { // encounters "|" 533 | //replace current stack with choices new branch stack 534 | var newStack=[],choice; 535 | if (stack._parentChoice) { 536 | choice=stack._parentChoice; 537 | choice.branches.unshift(newStack); 538 | _set(newStack,'_parentChoice',choice); 539 | _set(newStack,'_parentGroup',choice); 540 | newStack.groupCounter=stack.groupCounter; // keep track 541 | delete stack._parentChoice; 542 | delete stack.groupCounter; // This stack is in choice.branches,so clean it 543 | } else { // "/(a|)/" ,create new ChoiceNode 544 | var first=stack[stack.length-1]; // Because of stack is reverse order 545 | choice={ 546 | type:CHOICE_NODE,indices:[(first?first.indices[0]:i-1)], 547 | branches:[] 548 | }; 549 | _set(choice,'_parentStack',stack); 550 | choice.branches.unshift(stack.slice()); // contents before "|" 551 | stack.length=0; 552 | /* e.g. "/(a|b)/" is { 553 | type:'group',sub:[ 554 | {type:'choice',branches:[ 555 | [{type:'exact',chars:'a'}], 556 | [{type:'exact',chars:'b'}] 557 | ]}]}*/ 558 | stack.unshift(choice); // must not clean groupCounter 559 | 560 | newStack.groupCounter=stack.groupCounter; 561 | _set(newStack,'_parentChoice',choice); 562 | _set(newStack,'_parentGroup',choice); 563 | choice.branches.unshift(newStack); 564 | } 565 | return newStack; 566 | } 567 | //if current stack is a choice's branch,return the original parent stack 568 | function endChoice(stack) { 569 | if (stack._parentChoice) { 570 | var choice=stack._parentChoice; 571 | delete stack._parentChoice; 572 | delete stack._parentGroup; 573 | delete stack.groupCounter; 574 | var parentStack=choice._parentStack; 575 | delete choice._parentStack; 576 | return parentStack; 577 | } 578 | return stack; 579 | } 580 | function charsetStart(stack,c,i) { 581 | stack.unshift({ 582 | type:CHARSET_NODE,indices:[i], 583 | classes:[],ranges:[],chars:'' 584 | }); 585 | } 586 | function charsetExclude(stack) {stack[0].exclude=true} 587 | function charsetContent(stack,c,i) {stack[0].chars+=c} 588 | function charsetNormalEscape(stack,c,i) { 589 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c]; 590 | stack[0].chars+=c; 591 | } 592 | function charsetNullChar(stack,c,i) { 593 | stack[0].chars+="\0"; 594 | } 595 | function charsetClassEscape(stack,c) { 596 | stack[0].classes.push(c); 597 | } 598 | function charsetHexEscape(stack,c,i,state,s) { 599 | var last=stack[0]; 600 | c=String.fromCharCode(parseInt(last.chars.slice(-1)+c,16)); 601 | last.chars=last.chars.slice(0,-2); // also remove "xA" 602 | last.chars+=c; 603 | } 604 | function charsetUnicodeEscape(stack,c,i,state,s) { 605 | var last=stack[0]; 606 | c=String.fromCharCode(parseInt(last.chars.slice(-3)+c,16)); 607 | last.chars=last.chars.slice(0,-4); //remove "uABC" 608 | last.chars+=c; 609 | } 610 | 611 | function charsetRangeEnd(stack,c,i,state,s) { 612 | var charset=stack[0]; 613 | var range=charset.chars.slice(-2); 614 | range=[range[0],c]; 615 | range.lastIndex=i; 616 | charset.ranges.push(range); 617 | charset.chars=charset.chars.slice(0,-2); 618 | } 619 | function charsetRangeEndNormalEscape(stack,c) { 620 | if (escapeCharMap.hasOwnProperty(c)) c=escapeCharMap[c]; 621 | charsetRangeEnd.apply(this,arguments); 622 | } 623 | // [\x30-\x78] first repr as {ranges:['\x30','x']} 624 | // [\u0000-\u4567] first repr as {ranges:['\0','u']} 625 | // If escape sequences are valid then replace range end with corrent char 626 | // stack[0].chars did not contain 'u' or 'x' 627 | function charsetRangeEndUnicodeEscape(stack,c,i) { 628 | var charset=stack[0]; 629 | var code=charset.chars.slice(-3)+c; 630 | charset.chars=charset.chars.slice(0,-3); // So just remove previous three,no 'u' 631 | var range=charset.ranges.pop(); 632 | c=String.fromCharCode(parseInt(code,16)); 633 | range=[range[0],c]; 634 | range.lastIndex=i; 635 | charset.ranges.push(range); 636 | } 637 | function charsetRangeEndHexEscape(stack,c,i) { 638 | var charset=stack[0]; 639 | var code=charset.chars.slice(-1)+c; 640 | charset.chars=charset.chars.slice(0,-1); // last.chars does'nt contain 'x' 641 | var range=charset.ranges.pop(); 642 | c=String.fromCharCode(parseInt(code,16)); 643 | range=[range[0],c]; 644 | range.lastIndex=i; 645 | charset.ranges.push(range); 646 | } 647 | 648 | 649 | /* Caveat!!! 650 | See:https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/RegExp 651 | \0 Matches a NUL character. Do not follow this with another digit. 652 | ECMA-262 Standard: 15.10.2.11 DecimalEscape 653 | NOTE 654 | If \ is followed by a decimal number n whose first digit is not 0, then the escape sequence is considered to be 655 | a backreference. It is an error if n is greater than the total number of left capturing parentheses in the entire regular 656 | expression. \0 represents the character and cannot be followed by a decimal digit. 657 | 658 | But in both Chrome and Firefox, /\077/ matches "\077",e.g. String.fromCharCode(parseInt("77",8)) 659 | /(g)\1/ matches "gg",it's OK. 660 | But /(g)\14/ matches "g\14","\14" is String.fromCharCode(parseInt("14",8)) 661 | And /(g)\1456/ matches "g\145"+"6",/(g)\19/ matches "g\1"+"9". Who knows WTF? 662 | Considering that ECMAScript StrictMode did not support OctEscape, 663 | I'm not going to implement OctEscape. 664 | 665 | I will make it conform the Standard.(Also keep code simple) 666 | */ 667 | function backref(stack,c,i,state) { 668 | var last=stack[0],n=parseInt(c,10), 669 | isFirstNum=state==='escape', 670 | counter=stack.groupCounter, 671 | cn=(counter && counter.i) || 0; 672 | 673 | if (!isFirstNum) { //previous node must be backref node 674 | n=parseInt(last.num+""+n,10); 675 | } else { 676 | last={type:BACKREF_NODE,indices:[i-1]}; 677 | stack.unshift(last); 678 | } 679 | var rn; 680 | if (n>cn) { 681 | throw new RegexSyntaxError({ 682 | type:'InvalidBackReference',lastIndex:i,astStack:stack,lastState:state, 683 | message:'Back reference number('+n+') greater than current groups count('+cn+').' 684 | }); 685 | } else if (rn=_isRecursive(n,stack)) { 686 | throw new RegexSyntaxError({ 687 | type:'InvalidBackReference',lastIndex:i,astStack:stack,lastState:state, 688 | message:'Recursive back reference in group ('+rn+') itself.' 689 | }); 690 | } 691 | last.num=n; 692 | 693 | function _isRecursive(n,stack) { 694 | if (!stack._parentGroup) return false; 695 | if (stack._parentGroup.num==n) return n; 696 | return _isRecursive(n,stack._parentGroup._parentStack); 697 | } 698 | } 699 | 700 | //console.log(K.locals(_)); 701 | 702 | return { 703 | exact:exact,dot:dot,nullChar:nullChar,assertBegin:assertBegin, 704 | assertEnd:assertEnd,assertWordBoundary:assertWordBoundary, 705 | repeatnStart:repeatnStart,repeatnComma:repeatnComma,repeatNonGreedy:repeatNonGreedy, 706 | repeatnEnd:repeatnEnd,repeat1:repeat1,repeat01:repeat01,repeat0:repeat0, 707 | charClassEscape:charClassEscape,normalEscape:normalEscape, 708 | unicodeEscape:unicodeEscape,hexEscape:hexEscape,charClassEscape:charClassEscape, 709 | groupStart:groupStart,groupNonCapture:groupNonCapture,backref:backref, 710 | groupToAssertion:groupToAssertion,groupEnd:groupEnd, 711 | choice:choice,endChoice:endChoice, 712 | charsetStart:charsetStart,charsetExclude:charsetExclude, 713 | charsetContent:charsetContent,charsetNullChar:charsetNullChar, 714 | charsetClassEscape:charsetClassEscape, 715 | charsetHexEscape:charsetHexEscape, 716 | charsetUnicodeEscape:charsetUnicodeEscape, 717 | charsetRangeEnd:charsetRangeEnd,charsetNormalEscape:charsetNormalEscape, 718 | charsetRangeEndNormalEscape:charsetRangeEndNormalEscape, 719 | charsetRangeEndUnicodeEscape:charsetRangeEndUnicodeEscape, 720 | charsetRangeEndHexEscape:charsetRangeEndHexEscape 721 | }; 722 | 723 | })(); 724 | 725 | var digit='0-9'; 726 | var hexDigit='0-9a-fA-F'; 727 | 728 | //EX,It is an exclusive charset 729 | var exactEXCharset='^+*?^$.|(){[\\'; 730 | 731 | var charClassEscape='dDwWsS'; 732 | var unicodeEscape='u'; 733 | var hexEscape='x'; 734 | //var octDigit='0-7'; 735 | //var octEscape='0-7'; Never TODO. JavaScript doesn't support string OctEscape in strict mode. 736 | 737 | // In charset,\b\B means "\b","\B",not word boundary 738 | // NULL Escape followed digit should throw error 739 | var normalEscapeInCharsetEX='^'+charClassEscape+unicodeEscape+hexEscape+'0-9'; 740 | 741 | // 'rntvf\\' escape ,others return raw 742 | // Also need exclude \b\B assertion and backref 743 | var normalEscapeEX=normalEscapeInCharsetEX+'bB1-9'; 744 | 745 | //var controlEscape;//Never TODO.Same reason as OctEscape. 746 | 747 | 748 | var repeatnStates='repeatnStart,repeatn_1,repeatn_2,repeatnErrorStart,repeatnError_1,repeatnError_2'; 749 | var hexEscapeStates='hexEscape1,hexEscape2'; 750 | var unicodeEscapeStates='unicodeEscape1,unicodeEscape2,unicodeEscape3,unicodeEscape4'; 751 | 752 | var allHexEscapeStates=hexEscapeStates+','+unicodeEscapeStates; 753 | 754 | var charsetIncompleteEscapeStates='charsetUnicodeEscape1,charsetUnicodeEscape2,charsetUnicodeEscape3,charsetUnicodeEscape4,charsetHexEscape1,charsetHexEscape2'; 755 | 756 | // [a-\u1z] means [a-u1z], [a-\u-z] means [-za-u] 757 | // [a-\u0-9] means [a-u0-9]. WTF! 758 | var charsetRangeEndIncompleteEscapeFirstStates='charsetRangeEndUnicodeEscape1,charsetRangeEndHexEscape1'; 759 | 760 | var charsetRangeEndIncompleteEscapeRemainStates='charsetRangeEndUnicodeEscape2,charsetRangeEndUnicodeEscape3,charsetRangeEndUnicodeEscape4,charsetRangeEndHexEscape2'; 761 | 762 | var charsetRangeEndIncompleteEscapeStates=charsetRangeEndIncompleteEscapeFirstStates+','+charsetRangeEndIncompleteEscapeRemainStates; 763 | 764 | var config={ 765 | compact:true, 766 | accepts:'start,begin,end,repeat0,repeat1,exact,repeatn,repeat01,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates), 767 | trans:[ 768 | ['start,begin,end,exact,repeatNonGreedy,repeat0,repeat1,repeat01,groupStart,groupQualifiedStart,choice,repeatn>exact',exactEXCharset,actions.exact], 769 | // e.g. /\u54/ means /u54/ 770 | [allHexEscapeStates+'>exact',exactEXCharset+hexDigit,actions.exact], 771 | // e.g. /\0abc/ is exact "\0abc",but /\012/ is an error 772 | ['nullChar>exact',exactEXCharset+digit,actions.exact], 773 | //[(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>exact',exactEXCharset+''] 774 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+',start,begin,end,exact,repeatNonGreedy,repeat0,repeat1,repeat01,groupStart,groupQualifiedStart,choice,repeatn>exact','.',actions.dot], 775 | ['start,groupStart,groupQualifiedStart,end,begin,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates+'>begin','^',actions.assertBegin], 776 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+',exact>repeatnStart','{',actions.repeatnStart], 777 | ['start,begin,end,groupQualifiedStart,groupStart,repeat0,repeat1,repeatn,repeat01,repeatNonGreedy,choice>repeatnErrorStart','{',actions.exact],//No repeat,treat as exact char e.g. /{/,/^{/,/a|{/ 778 | ['repeatnStart>repeatn_1',digit,actions.exact], // Now maybe /a{1/ 779 | ['repeatn_1>repeatn_1',digit,actions.exact], // Could be /a{11/ 780 | ['repeatn_1>repeatn_2',',',actions.repeatnComma], // Now maybe /a{1,/ 781 | ['repeatn_2>repeatn_2',digit,actions.exact], // Now maybe /a{1,3/ 782 | ['repeatn_1,repeatn_2>repeatn','}',actions.repeatnEnd], //Totally end /a{1,3}/ 783 | //Repeat treat as exact chars 784 | ['repeatnStart,repeatnErrorStart>exact','}',actions.exact], // e.g. /{}/,/a{}/ 785 | //Add exclusion 0-9 and "}", e.g. /a{a/,/a{,/ are valid exact match 786 | ['repeatnStart,repeatnErrorStart>exact',exactEXCharset+'0-9}',actions.exact], 787 | 788 | // "/{}/" is valid exact match but /{1,2}/ is error repeat. 789 | // So must track it with states repeatnError_1,repeatnError_2 790 | ['repeatnErrorStart>repeatnError_1',digit,actions.exact], 791 | ['repeatnError_1>repeatnError_1',digit,actions.exact], 792 | ['repeatnError_1>repeatnError_2',',',actions.exact], 793 | ['repeatnError_2>repeatnError_2',digit,actions.exact], 794 | // repeatErrorFinal is an unacceptable state. Nothing to repeat error should be throwed 795 | ['repeatnError_2,repeatnError_1>repeatErrorFinal','}'], 796 | 797 | // "/a{2a/" and "/{2a/" are valid exact match 798 | ['repeatn_1,repeatnError_1>exact',exactEXCharset+digit+',}',actions.exact], 799 | // "/a{2,a/" and "/{3,a" are valid 800 | ['repeatn_2,repeatnError_2>exact',exactEXCharset+digit+'}',actions.exact], 801 | 802 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat0','*',actions.repeat0], 803 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat1','+',actions.repeat1], 804 | ['exact,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>repeat01','?',actions.repeat01], 805 | ['choice>repeatErrorFinal','*+?'], 806 | ['repeat0,repeat1,repeat01,repeatn>repeatNonGreedy','?',actions.repeatNonGreedy], 807 | ['repeat0,repeat1,repeat01,repeatn>repeatErrorFinal','+*'], 808 | 809 | // Escape 810 | ['start,begin,end,groupStart,groupQualifiedStart,exact,repeatNonGreedy,repeat0,repeat1,repeat01,repeatn,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>escape','\\'], 811 | ['escape>nullChar','0',actions.nullChar], 812 | ['nullChar>digitFollowNullError','0-9'], // "/\0123/" is invalid in standard 813 | ['escape>exact',normalEscapeEX,actions.normalEscape], 814 | ['escape>exact','bB',actions.assertWordBoundary], 815 | ['escape>exact',charClassEscape,actions.charClassEscape], 816 | ['escape>unicodeEscape1',unicodeEscape,actions.exact], 817 | ['unicodeEscape1>unicodeEscape2',hexDigit,actions.exact], 818 | ['unicodeEscape2>unicodeEscape3',hexDigit,actions.exact], 819 | ['unicodeEscape3>unicodeEscape4',hexDigit,actions.exact], 820 | ['unicodeEscape4>exact',hexDigit,actions.unicodeEscape], 821 | ['escape>hexEscape1',hexEscape,actions.exact], 822 | ['hexEscape1>hexEscape2',hexDigit,actions.exact], 823 | ['hexEscape2>exact',hexDigit,actions.hexEscape], 824 | 825 | ['escape>digitBackref','1-9',actions.backref], 826 | ['digitBackref>digitBackref',digit,actions.backref], 827 | ['digitBackref>exact',exactEXCharset+digit,actions.exact], 828 | 829 | // Group start 830 | ['exact,begin,end,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,start,groupStart,groupQualifiedStart,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>groupStart','(',actions.groupStart], 831 | ['groupStart>groupQualify','?'], 832 | ['groupQualify>groupQualifiedStart',':',actions.groupNonCapture],//group non-capturing 833 | ['groupQualify>groupQualifiedStart','=',actions.groupToAssertion],//group positive lookahead 834 | ['groupQualify>groupQualifiedStart','!',actions.groupToAssertion],//group negative lookahead 835 | [(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'groupStart,groupQualifiedStart,end,exact,repeat1,repeat0,repeat01,repeatn,repeatNonGreedy,choice>exact',')',actions.groupEnd],//group end 836 | 837 | //choice 838 | ['start,begin,end,groupStart,groupQualifiedStart,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>choice','|', actions.choice], 839 | 840 | ['start,groupStart,groupQualifiedStart,begin,exact,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>end','$',actions.assertEnd], 841 | 842 | // Charset [HA-HO] 843 | ['exact,begin,end,repeat0,repeat1,repeat01,repeatn,repeatNonGreedy,groupQualifiedStart,groupStart,start,choice,'+(repeatnStates+',nullChar,digitBackref,'+unicodeEscapeStates+','+hexEscapeStates)+'>charsetStart','[',actions.charsetStart], 844 | ['charsetStart>charsetExclude','^',actions.charsetExclude], 845 | ['charsetStart>charsetContent','^\\]^',actions.charsetContent], 846 | ['charsetExclude>charsetContent','^\\]',actions.charsetContent], // "[^^]" is valid 847 | ['charsetContent,charsetClass>charsetContent','^\\]-',actions.charsetContent], 848 | ['charsetClass>charsetContent','-',actions.charsetContent], 849 | 850 | 851 | // Charset Escape 852 | [charsetIncompleteEscapeStates+ 853 | ',charsetStart,charsetContent,charsetClass,charsetExclude,charsetRangeEnd>charsetEscape','\\'], 854 | ['charsetEscape>charsetContent',normalEscapeInCharsetEX,actions.charsetNormalEscape], 855 | ['charsetEscape>charsetNullChar','0',actions.charsetNullChar], 856 | 857 | //Didn't allow oct escape 858 | ['charsetEscape>charsetOctEscape','1-9'], 859 | ['charsetRangeEndEscape>charsetOctEscape','1-9'], 860 | //Treat /[\012]/ as an error 861 | ['charsetNullChar>digitFollowNullError',digit], 862 | // Only null char not followed by digit is valid 863 | ['charsetNullChar>charsetContent','^0-9\\]-',actions.charsetContent], 864 | 865 | // charsetClass state should diff from charsetContent 866 | // Because /[\s-a]/ means /[-a\s]/ 867 | ['charsetEscape>charsetClass',charClassEscape,actions.charsetClassEscape], 868 | 869 | ['charsetEscape>charsetUnicodeEscape1',unicodeEscape,actions.charsetContent], 870 | ['charsetUnicodeEscape1>charsetUnicodeEscape2',hexDigit,actions.charsetContent], 871 | ['charsetUnicodeEscape2>charsetUnicodeEscape3',hexDigit,actions.charsetContent], 872 | ['charsetUnicodeEscape3>charsetUnicodeEscape4',hexDigit,actions.charsetContent], 873 | ['charsetUnicodeEscape4>charsetContent',hexDigit,actions.charsetUnicodeEscape], 874 | ['charsetEscape>charsetHexEscape1',hexEscape,actions.charsetContent], 875 | ['charsetHexEscape1>charsetHexEscape2',hexDigit,actions.charsetContent], 876 | ['charsetHexEscape2>charsetContent',hexDigit,actions.charsetHexEscape], 877 | 878 | // [a\u54-9] should be treat as [4-9au5] 879 | [charsetIncompleteEscapeStates+'>charsetContent','^\\]'+hexDigit+'-',actions.charsetContent], 880 | 881 | [charsetIncompleteEscapeStates+',charsetNullChar,charsetContent>charsetRangeStart','-',actions.charsetContent], 882 | ['charsetRangeStart>charsetRangeEnd','^\\]',actions.charsetRangeEnd], 883 | ['charsetRangeEnd>charsetContent','^\\]',actions.charsetContent], 884 | 885 | 886 | // Some troubles here, [0-\x39] means [0-9] 887 | ['charsetRangeStart>charsetRangeEndEscape','\\'], 888 | ['charsetRangeEndEscape>charsetRangeEnd',normalEscapeEX,actions.charsetRangeEndNormalEscape], 889 | // No need to care [a-\0],it is not a valid range so will throw OutOfOrder error. 890 | // But what about [\0-\0]? Insane! 891 | ['charsetRangeEndEscape>charsetRangeEndWithNullChar','0'], 892 | 893 | ['charsetRangeEndEscape>charsetRangeEndUnicodeEscape1',unicodeEscape,actions.charsetRangeEnd], 894 | ['charsetRangeEndUnicodeEscape1>charsetRangeEndUnicodeEscape2',hexDigit,actions.charsetContent], 895 | ['charsetRangeEndUnicodeEscape2>charsetRangeEndUnicodeEscape3',hexDigit,actions.charsetContent], 896 | ['charsetRangeEndUnicodeEscape3>charsetRangeEndUnicodeEscape4',hexDigit,actions.charsetContent], 897 | ['charsetRangeEndUnicodeEscape4>charsetRangeEnd',hexDigit,actions.charsetRangeEndUnicodeEscape], 898 | ['charsetRangeEndEscape>charsetRangeEndHexEscape1',hexEscape,actions.charsetRangeEnd], 899 | ['charsetRangeEndHexEscape1>charsetRangeEndHexEscape2',hexDigit,actions.charsetContent], 900 | ['charsetRangeEndHexEscape2>charsetRangeEnd',hexDigit,actions.charsetRangeEndHexEscape], 901 | // [0-\w] means [-0\w]? Should throw error! 902 | ['charsetRangeEndEscape>charsetRangeEndClass',charClassEscape], 903 | 904 | // [a-\uz] means [za-u],[a-\u-z] means [-za-u] 905 | [charsetRangeEndIncompleteEscapeFirstStates+'>charsetContent','^\\]'+hexDigit,actions.charsetContent], 906 | 907 | // [a-\u0-9] means [0-9a-u] 908 | [charsetRangeEndIncompleteEscapeRemainStates+'>charsetRangeStart','-',actions.charsetContent], 909 | [charsetIncompleteEscapeStates+',' 910 | +charsetRangeEndIncompleteEscapeStates 911 | +',charsetNullChar,charsetRangeStart,charsetContent' 912 | +',charsetClass,charsetExclude,charsetRangeEnd>exact', 913 | ']'] 914 | ] 915 | }; 916 | 917 | 918 | return parse; 919 | }); 920 | -------------------------------------------------------------------------------- /tests/testData.js: -------------------------------------------------------------------------------- 1 | if (typeof define !== 'function') var define = require('amdefine')(module); 2 | define(function() { 3 | function str(v) { 4 | return (typeof v === 'string') ? v : v.source 5 | } 6 | 7 | var reMatchCases=[ 8 | //[RegExp,input:String] 9 | [/abc/,'abc'], 10 | [/abc/i,'ABC'], 11 | [/Abc/i,'aBC'], 12 | [/^abc$/,'abcdef'], 13 | [/^Abc$/im,'def\nabc\ndef'], 14 | [/[a-z]{3}/,'--abc--'], 15 | [/[^A-H]/i,'abchijk'], 16 | [/[A-H]+/,'AAAA'], 17 | [/[A-H]+?/,'AAAA'], 18 | [/\w\d\s/,'A1 '], 19 | [/(\w|\d|\s)+/,'A1 B2\n'], 20 | [/[\w\d\s]+/,'A1 B2\r'], 21 | [/[\W\D\S]+/,'+-&*'], 22 | [/[^\W\D\S]+/,'+-&*'], 23 | [/(\d+|^a)$/,'def123'], 24 | [/(\d+|^a)$/,'a'], 25 | [/([a-z]{3}|\d+$)+/,'abc'], 26 | [/([a-z]{3}|\d+$)+/,'123'], 27 | [/^([a-zA-Z0-9])(([-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$/,'alan.dot@jackson.com'], 28 | [/\d+(?=ab)/,'123-456ab'], 29 | [/\d*(?=ab)/,'ab-456ab'], 30 | [/\d*?(?=ab)/,'ab-456ab'], 31 | [/https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)/,'http://www.google.com'], 32 | [/https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)/,'https://www.google.com'], 33 | [/^https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)$/,'http://www.google.com'], 34 | [/^https?:\/\/([a-z]+)\.(\w+)\.([a-z]+)$/,'https://www.google.com/'], 35 | [/<(\w+)\s\w+="(.+?)">(.*?)<\/\1>/,'
abc
'], 36 | [/abc(\d+)1{2,}?\1def/,'abc12311123def'], 37 | [/(\w+)+\1+/,'abc123abc123!'], 38 | [/((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)/, 39 | ['127.0.0.1','255.255.255.0','192.168.11.12'] 40 | ], 41 | [/\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/,'barzar-hall@ruby-lang.com'], 42 | [/\d{4}-\d{1,2}-\d{1,2}/,'1990-10-1'], 43 | [/\b((?!abc)\w)+\b/,'babcue'], 44 | [/\b((?!abc)\w)+\b/,'babbcue'], 45 | [/^\w{1,15}(?:@(?!-))(?:(?:[a-z0-9-]*)(?:[a-z0-9](?!-))(?:\.(?!-)))+[a-z]{2,4}$/, 46 | [ 47 | 'abc@def.com', 48 | 'jelly_bean@google.com.hk', 49 | 'snow_bear@snow-bear.com.cn', 50 | 'i@jex.im', 51 | 'i@jex-cn.com.im', 52 | 'i@jex-cn.bear', 53 | 'i@123cn.bear', 54 | 'dollar@cn.com', 55 | 'dollar@-cn.com', 56 | 'dollar@cn-.com', 57 | 'snow.bear@bear.com' 58 | ] 59 | ], 60 | [/^(a?b)?[a-z]+X?$/,['bb','abb','bbX']], 61 | [ 62 | new RegExp('http://([\\w-]+\\.)+[\\w-]+(/[\\w- ./?%&=]*)?'), 63 | ['http://jex.im/','http://163.com','https://github.com/JexCheng/regulex'] 64 | ], 65 | [ 66 | /^<([a-z]+)([^<]+)*(?:>(.*)<\/\1>|\s+\/>)$/ , 67 | ['','

'] 68 | ] 69 | 70 | ]; 71 | 72 | 73 | var expectedPass = [ 74 | /[^<]+|<(!(--([^-]-([^-][^-]-)->?)?|\[CDATA\[([^]]]([^]]+])]+([^]>][^]]]([^]]+])]+)>)?|DOCTYPE([ \n\t\r]+([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])|"[^"]"|'[^']'))([ \n\t\r]+)?(\[(<(!(--[^-]-([^-][^-]-)->|[^-]([^]"'><]+|"[^"]"|'[^']')>)|\?([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])(\?>|[\n\r\t ][^?]\?+([^>?][^?]\?+)>))|%([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F]);|[ \n\t\r]+)]([ \n\t\r]+)?)?>?)?)?|\?(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])(\?>|[\n\r\t ][^?]\?+([^>?][^?]\?+)>)?)?|\/(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+)?>?)?|(([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+([A-Za-z_:]|[^\x00-\x7F])([A-Za-z0-9_:.-]|[^\x00-\x7F])([ \n\t\r]+)?=([ \n\t\r]+)?("[^<"]"|'[^<']'))*([ \n\t\r]+)?\/?>?)?)/, 75 | 76 | 'ab+(1|0)?[a-z][^0-9]', 77 | /[\0-\n]/, 78 | '/abc/', 79 | '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]', 80 | '[abc-\\u540-\\x69]', 81 | "^abc+d*e+?\\?[\\n-\\rbcd]{3,110}?(?:(a|b)+|(d|[e-z]?(?!abc)))$", 82 | "aa+b*?c{0PP{,{10}ab+?", 83 | "abc(d|e)f(c(a|(?:a|b|[a-z]|a(?=def)))|b|)", 84 | "abc+abc", 85 | "abc*abc", 86 | "ab+\\+c*abc", 87 | "ab[abc]+", 88 | "ab[abc-d]+", 89 | "ab[^abc-d]*", 90 | "ab[^c-d]*", 91 | "ab[[]*", 92 | "ab[\\]]*", 93 | "ab[\\]-a]*", 94 | "ab[^]*", 95 | "ab[-]*", 96 | "ab[a-]*", 97 | "ab[-b]*", 98 | "ab[[]", 99 | "]", 100 | "[a-z0-1]", 101 | "[a-z-b]", 102 | "(abc(def)+(a)((a),(b),(c,(d))))", 103 | "([a-z]+,[abc]444,[^a-b])+,(a(t)o(a[0-1]+b,(a[0-1]+)) )", 104 | '[a-zA-z]+://[^\\s]*', 105 | '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)', 106 | '\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*', 107 | '[a-zA-z]{}://[^\\s]*?', 108 | 'a{1,2}{}', 109 | 'a{1,2}{1,2,4}', 110 | 'a{1,2}{{4}', 111 | 'a+{1,{4}', 112 | 'a+{1a}', 113 | 'a+{1|3}', 114 | 'a+{1\\}', 115 | 'a+{\\}', 116 | 'a+{34,45{}', 117 | '{}{4}{5', 118 | '}{4}{5', 119 | '{{4}{5(a|b)}', 120 | '{{4}{5[a-z]}', 121 | '{{4}{[0-9]}', 122 | /((2[0-4]\d|25[0-5]|[01]?\d\d?)\.){3}(2[0-4]\d|25[0-5]|[01]?\d\d?)/, 123 | /\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*/, 124 | /[1-9]\d{4,}/, 125 | /<(.*)(.*)>.*<\/\1>|<(.*) \/>/, 126 | /(?=^.{8,}$)(?=.*\d)(?=.*\W+)(?=.*[A-Z])(?=.*[a-z])(?!.*\n).*$/, 127 | /(\d{4}|\d{2})-((1[0-2])|(0?[1-9]))-(([12][0-9])|(3[01])|(0?[1-9]))/, 128 | /((1[0-2])|(0?[1-9]))\/(([12][0-9])|(3[01])|(0?[1-9]))\/(\d{4}|\d{2})/, 129 | /((1|0?)[0-9]|2[0-3]):([0-5][0-9])/, 130 | /[\u4e00-\u9fa5]/, 131 | /[\u3000-\u301e\ufe10-\ufe19\ufe30-\ufe44\ufe50-\ufe6b\uff01-\uffee]/, 132 | /(\d{4}-|\d{3}-)?(\d{8}|\d{7})/, 133 | /1\d{10}/, 134 | /[1-9]\d{5}/, 135 | /\d{15}(\d\d[0-9xX])?/, 136 | /\d+/, 137 | /[0-9]*[1-9][0-9]*/, 138 | /-[0-9]*[1-9][0-9]*/, 139 | /-?\d+/, 140 | '[a-b](a|b)+{4,5def', 141 | /(-?\d+)(\.\d+)?$\nabc/, 142 | /\b((?!abc)\w)+\b/, 143 | 'a(?=b){4,' 144 | ].map(str); 145 | 146 | var expectedFail = [ 147 | 'a(?=b)+','a(?=b)?','a(?=b){4}', 148 | '{}{4}{5}', '[a-b][z-a]{2,6}', 149 | '[z-\\n]', 150 | '[a-zA-z]+{3}', 151 | 'abc{3,7}+', 152 | 'a?{1,2}', 153 | 'a+{1,2}', 154 | 'a*{1,2}', 155 | 'a{1}{1,2}', 156 | 'a{1,4}{1,2}', 157 | "abc(def,([a-z],[0-6],([0-5]def),aaa)", 158 | "ab[abc", 159 | "abc*+abc", 160 | "ab++c*abc", 161 | "\\", 162 | 'abc{42,13}' 163 | ].map(str); 164 | 165 | var re2ast =[{ 166 | raw: 'ab+(1|0)?[a-z][^0-9]a\\nb\\rc\\td', 167 | groupCount: 1, 168 | tree: [{ 169 | type: 'exact', 170 | indices: [0, 1], 171 | raw: 'a', 172 | chars: 'a' 173 | }, { 174 | type: 'exact', 175 | repeat: { 176 | min: 1, 177 | max: Infinity, 178 | nonGreedy: false 179 | }, 180 | chars: 'b', 181 | indices: [1, 3], 182 | raw: 'b+' 183 | }, { 184 | type: 'group', 185 | num: 1, 186 | sub: [{ 187 | type: 'choice', 188 | indices: [4, 7], 189 | branches: [ 190 | [{ 191 | type: 'exact', 192 | indices: [4, 5], 193 | raw: '1', 194 | chars: '1' 195 | }], 196 | [{ 197 | type: 'exact', 198 | indices: [6, 7], 199 | raw: '0', 200 | chars: '0' 201 | }] 202 | ], 203 | raw: '1|0' 204 | }], 205 | indices: [3, 9], 206 | endParenIndex: 7, 207 | repeat: { 208 | min: 0, 209 | max: 1, 210 | nonGreedy: false 211 | }, 212 | raw: '(1|0)?' 213 | }, { 214 | type: 'charset', 215 | indices: [9, 14], 216 | classes: [], 217 | ranges: ['az'], 218 | chars: '', 219 | raw: '[a-z]' 220 | }, { 221 | type: 'charset', 222 | indices: [14, 20], 223 | classes: [], 224 | ranges: ['09'], 225 | chars: '', 226 | exclude: true, 227 | raw: '[^0-9]' 228 | },{ 229 | type: 'exact', 230 | raw:'a\\nb\\rc\\td', 231 | chars:'a\nb\rc\td', 232 | indices:[20,30] 233 | }] 234 | }, { 235 | raw: '[\\0-\\n]', 236 | groupCount: 0, 237 | tree: [{ 238 | type: 'charset', 239 | indices: [0, 7], 240 | classes: [], 241 | ranges: ['\u0000\n'], 242 | chars: '', 243 | raw: '[\\0-\\n]' 244 | }] 245 | }, { 246 | raw: '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]', 247 | groupCount: 0, 248 | tree: [{ 249 | type: 'charset', 250 | indices: [0, 37], 251 | classes: ['w'], 252 | ranges: ['0u', '5h', 'az', 'z吉'], 253 | chars: 'abcdef54', 254 | raw: '[abcdefa-z\\w0-\\u540-\\u5-\\x68z-\\u5409]' 255 | }] 256 | }, { 257 | raw: '[abc-\\u540-\\x69]', 258 | groupCount: 0, 259 | tree: [{ 260 | type: 'charset', 261 | indices: [0, 16], 262 | classes: [], 263 | ranges: ['0i', 'cu'], 264 | chars: 'ab54', 265 | raw: '[abc-\\u540-\\x69]' 266 | }] 267 | }, { 268 | raw: '^abc+d*e+?\\?[\\n-\\rbcd]{3,110}?(?:(a|b)+|(d|[e-z]?(?!abc)))$', 269 | groupCount: 2, 270 | tree: [{ 271 | type: 'assert', 272 | indices: [0, 1], 273 | assertionType: 'AssertBegin', 274 | raw: '^' 275 | }, { 276 | type: 'exact', 277 | indices: [1, 3], 278 | raw: 'ab', 279 | chars: 'ab' 280 | }, { 281 | type: 'exact', 282 | repeat: { 283 | min: 1, 284 | max: Infinity, 285 | nonGreedy: false 286 | }, 287 | chars: 'c', 288 | indices: [3, 5], 289 | raw: 'c+' 290 | }, { 291 | type: 'exact', 292 | repeat: { 293 | min: 0, 294 | max: Infinity, 295 | nonGreedy: false 296 | }, 297 | chars: 'd', 298 | indices: [5, 7], 299 | raw: 'd*' 300 | }, { 301 | type: 'exact', 302 | repeat: { 303 | min: 1, 304 | max: Infinity, 305 | nonGreedy: true 306 | }, 307 | chars: 'e', 308 | indices: [7, 10], 309 | raw: 'e+?' 310 | }, { 311 | type: 'exact', 312 | chars: '?', 313 | indices: [10, 12], 314 | raw: '\\?' 315 | }, { 316 | type: 'charset', 317 | indices: [12, 30], 318 | classes: [], 319 | ranges: ['\n\r'], 320 | chars: 'bcd', 321 | repeat: { 322 | min: 3, 323 | max: 110, 324 | nonGreedy: true 325 | }, 326 | raw: '[\\n-\\rbcd]{3,110}?' 327 | }, { 328 | type: 'group', 329 | num: undefined, 330 | sub: [{ 331 | type: 'choice', 332 | indices: [33, 57], 333 | branches: [ 334 | [{ 335 | type: 'group', 336 | num: 1, 337 | sub: [{ 338 | type: 'choice', 339 | indices: [34, 37], 340 | branches: [ 341 | [{ 342 | type: 'exact', 343 | indices: [34, 35], 344 | raw: 'a', 345 | chars: 'a' 346 | }], 347 | [{ 348 | type: 'exact', 349 | indices: [36, 37], 350 | raw: 'b', 351 | chars: 'b' 352 | }] 353 | ], 354 | raw: 'a|b' 355 | }], 356 | indices: [33, 39], 357 | endParenIndex: 37, 358 | repeat: { 359 | min: 1, 360 | max: Infinity, 361 | nonGreedy: false 362 | }, 363 | raw: '(a|b)+' 364 | }], 365 | [{ 366 | type: 'group', 367 | num: 2, 368 | sub: [{ 369 | type: 'choice', 370 | indices: [41, 56], 371 | branches: [ 372 | [{ 373 | type: 'exact', 374 | indices: [41, 42], 375 | raw: 'd', 376 | chars: 'd' 377 | }], 378 | [{ 379 | type: 'charset', 380 | indices: [43, 49], 381 | classes: [], 382 | ranges: ['ez'], 383 | chars: '', 384 | repeat: { 385 | min: 0, 386 | max: 1, 387 | nonGreedy: false 388 | }, 389 | raw: '[e-z]?' 390 | }, { 391 | type: 'assert', 392 | num: undefined, 393 | sub: [{ 394 | type: 'exact', 395 | indices: [52, 55], 396 | raw: 'abc', 397 | chars: 'abc' 398 | }], 399 | indices: [49, 56], 400 | assertionType: 'AssertNegativeLookahead', 401 | endParenIndex: 55, 402 | raw: '(?!abc)' 403 | }] 404 | ], 405 | raw: 'd|[e-z]?(?!abc)' 406 | }], 407 | indices: [40, 57], 408 | endParenIndex: 56, 409 | raw: '(d|[e-z]?(?!abc))' 410 | }] 411 | ], 412 | raw: '(a|b)+|(d|[e-z]?(?!abc))' 413 | }], 414 | indices: [30, 58], 415 | nonCapture: true, 416 | endParenIndex: 57, 417 | raw: '(?:(a|b)+|(d|[e-z]?(?!abc)))' 418 | }, { 419 | type: 'assert', 420 | indices: [58, 59], 421 | assertionType: 'AssertEnd', 422 | raw: '$' 423 | }] 424 | }, { 425 | raw: 'aa+b*?c{0PP{,{10}ab+?', 426 | groupCount: 0, 427 | tree: [{ 428 | type: 'exact', 429 | indices: [0, 1], 430 | raw: 'a', 431 | chars: 'a' 432 | }, { 433 | type: 'exact', 434 | repeat: { 435 | min: 1, 436 | max: Infinity, 437 | nonGreedy: false 438 | }, 439 | chars: 'a', 440 | indices: [1, 3], 441 | raw: 'a+' 442 | }, { 443 | type: 'exact', 444 | repeat: { 445 | min: 0, 446 | max: Infinity, 447 | nonGreedy: true 448 | }, 449 | chars: 'b', 450 | indices: [3, 6], 451 | raw: 'b*?' 452 | }, { 453 | type: 'exact', 454 | indices: [6, 12], 455 | raw: 'c{0PP{', 456 | chars: 'c{0PP{' 457 | }, { 458 | type: 'exact', 459 | repeat: { 460 | min: 10, 461 | max: 10, 462 | nonGreedy: false 463 | }, 464 | chars: ',', 465 | indices: [12, 17], 466 | raw: ',{10}' 467 | }, { 468 | type: 'exact', 469 | indices: [17, 18], 470 | raw: 'a', 471 | chars: 'a' 472 | }, { 473 | type: 'exact', 474 | repeat: { 475 | min: 1, 476 | max: Infinity, 477 | nonGreedy: true 478 | }, 479 | chars: 'b', 480 | indices: [18, 21], 481 | raw: 'b+?' 482 | }] 483 | }, { 484 | raw: 'ab[\\]-a]*', 485 | groupCount: 0, 486 | tree: [{ 487 | type: 'exact', 488 | indices: [0, 2], 489 | raw: 'ab', 490 | chars: 'ab' 491 | }, { 492 | type: 'charset', 493 | indices: [2, 9], 494 | classes: [], 495 | ranges: [']a'], 496 | chars: '', 497 | repeat: { 498 | min: 0, 499 | max: Infinity, 500 | nonGreedy: false 501 | }, 502 | raw: '[\\]-a]*' 503 | }] 504 | }, { 505 | raw: 'ab[^]*', 506 | groupCount: 0, 507 | tree: [{ 508 | type: 'exact', 509 | indices: [0, 2], 510 | raw: 'ab', 511 | chars: 'ab' 512 | }, { 513 | type: 'charset', 514 | indices: [2, 6], 515 | classes: [], 516 | ranges: [], 517 | chars: '', 518 | exclude: true, 519 | repeat: { 520 | min: 0, 521 | max: Infinity, 522 | nonGreedy: false 523 | }, 524 | raw: '[^]*' 525 | }] 526 | }, { 527 | raw: 'ab[-]*', 528 | groupCount: 0, 529 | tree: [{ 530 | type: 'exact', 531 | indices: [0, 2], 532 | raw: 'ab', 533 | chars: 'ab' 534 | }, { 535 | type: 'charset', 536 | indices: [2, 6], 537 | classes: [], 538 | ranges: [], 539 | chars: '-', 540 | repeat: { 541 | min: 0, 542 | max: Infinity, 543 | nonGreedy: false 544 | }, 545 | raw: '[-]*' 546 | }] 547 | }, { 548 | raw: 'ab[a-]*', 549 | groupCount: 0, 550 | tree: [{ 551 | type: 'exact', 552 | indices: [0, 2], 553 | raw: 'ab', 554 | chars: 'ab' 555 | }, { 556 | type: 'charset', 557 | indices: [2, 7], 558 | classes: [], 559 | ranges: [], 560 | chars: 'a-', 561 | repeat: { 562 | min: 0, 563 | max: Infinity, 564 | nonGreedy: false 565 | }, 566 | raw: '[a-]*' 567 | }] 568 | }, { 569 | raw: '[a-z-b]', 570 | groupCount: 0, 571 | tree: [{ 572 | type: 'charset', 573 | indices: [0, 7], 574 | classes: [], 575 | ranges: ['az'], 576 | chars: '-b', 577 | raw: '[a-z-b]' 578 | }] 579 | }, { 580 | raw: '(abc(def)+(a)((a),(b),(c,(d))))', 581 | groupCount: 8, 582 | tree: [{ 583 | type: 'group', 584 | num: 1, 585 | sub: [{ 586 | type: 'exact', 587 | indices: [1, 4], 588 | raw: 'abc', 589 | chars: 'abc' 590 | }, { 591 | type: 'group', 592 | num: 2, 593 | sub: [{ 594 | type: 'exact', 595 | indices: [5, 8], 596 | raw: 'def', 597 | chars: 'def' 598 | }], 599 | indices: [4, 10], 600 | endParenIndex: 8, 601 | repeat: { 602 | min: 1, 603 | max: Infinity, 604 | nonGreedy: false 605 | }, 606 | raw: '(def)+' 607 | }, { 608 | type: 'group', 609 | num: 3, 610 | sub: [{ 611 | type: 'exact', 612 | indices: [11, 12], 613 | raw: 'a', 614 | chars: 'a' 615 | }], 616 | indices: [10, 13], 617 | endParenIndex: 12, 618 | raw: '(a)' 619 | }, { 620 | type: 'group', 621 | num: 4, 622 | sub: [{ 623 | type: 'group', 624 | num: 5, 625 | sub: [{ 626 | type: 'exact', 627 | indices: [15, 16], 628 | raw: 'a', 629 | chars: 'a' 630 | }], 631 | indices: [14, 17], 632 | endParenIndex: 16, 633 | raw: '(a)' 634 | }, { 635 | type: 'exact', 636 | indices: [17, 18], 637 | raw: ',', 638 | chars: ',' 639 | }, { 640 | type: 'group', 641 | num: 6, 642 | sub: [{ 643 | type: 'exact', 644 | indices: [19, 20], 645 | raw: 'b', 646 | chars: 'b' 647 | }], 648 | indices: [18, 21], 649 | endParenIndex: 20, 650 | raw: '(b)' 651 | }, { 652 | type: 'exact', 653 | indices: [21, 22], 654 | raw: ',', 655 | chars: ',' 656 | }, { 657 | type: 'group', 658 | num: 7, 659 | sub: [{ 660 | type: 'exact', 661 | indices: [23, 25], 662 | raw: 'c,', 663 | chars: 'c,' 664 | }, { 665 | type: 'group', 666 | num: 8, 667 | sub: [{ 668 | type: 'exact', 669 | indices: [26, 27], 670 | raw: 'd', 671 | chars: 'd' 672 | }], 673 | indices: [25, 28], 674 | endParenIndex: 27, 675 | raw: '(d)' 676 | }], 677 | indices: [22, 29], 678 | endParenIndex: 28, 679 | raw: '(c,(d))' 680 | }], 681 | indices: [13, 30], 682 | endParenIndex: 29, 683 | raw: '((a),(b),(c,(d)))' 684 | }], 685 | indices: [0, 31], 686 | endParenIndex: 30, 687 | raw: '(abc(def)+(a)((a),(b),(c,(d))))' 688 | }] 689 | }, { 690 | raw: '([a-z]+,[abc]444,[^a-b])+,(a(t)o(a[0-1]+b,(a[0-1]+)) )', 691 | groupCount: 5, 692 | tree: [{ 693 | type: 'group', 694 | num: 1, 695 | sub: [{ 696 | type: 'charset', 697 | indices: [1, 7], 698 | classes: [], 699 | ranges: ['az'], 700 | chars: '', 701 | repeat: { 702 | min: 1, 703 | max: Infinity, 704 | nonGreedy: false 705 | }, 706 | raw: '[a-z]+' 707 | }, { 708 | type: 'exact', 709 | indices: [7, 8], 710 | raw: ',', 711 | chars: ',' 712 | }, { 713 | type: 'charset', 714 | indices: [8, 13], 715 | classes: [], 716 | ranges: [], 717 | chars: 'abc', 718 | raw: '[abc]' 719 | }, { 720 | type: 'exact', 721 | indices: [13, 17], 722 | raw: '444,', 723 | chars: '444,' 724 | }, { 725 | type: 'charset', 726 | indices: [17, 23], 727 | classes: [], 728 | ranges: ['ab'], 729 | chars: '', 730 | exclude: true, 731 | raw: '[^a-b]' 732 | }], 733 | indices: [0, 25], 734 | endParenIndex: 23, 735 | repeat: { 736 | min: 1, 737 | max: Infinity, 738 | nonGreedy: false 739 | }, 740 | raw: '([a-z]+,[abc]444,[^a-b])+' 741 | }, { 742 | type: 'exact', 743 | indices: [25, 26], 744 | raw: ',', 745 | chars: ',' 746 | }, { 747 | type: 'group', 748 | num: 2, 749 | sub: [{ 750 | type: 'exact', 751 | indices: [27, 28], 752 | raw: 'a', 753 | chars: 'a' 754 | }, { 755 | type: 'group', 756 | num: 3, 757 | sub: [{ 758 | type: 'exact', 759 | indices: [29, 30], 760 | raw: 't', 761 | chars: 't' 762 | }], 763 | indices: [28, 31], 764 | endParenIndex: 30, 765 | raw: '(t)' 766 | }, { 767 | type: 'exact', 768 | indices: [31, 32], 769 | raw: 'o', 770 | chars: 'o' 771 | }, { 772 | type: 'group', 773 | num: 4, 774 | sub: [{ 775 | type: 'exact', 776 | indices: [33, 34], 777 | raw: 'a', 778 | chars: 'a' 779 | }, { 780 | type: 'charset', 781 | indices: [34, 40], 782 | classes: [], 783 | ranges: ['01'], 784 | chars: '', 785 | repeat: { 786 | min: 1, 787 | max: Infinity, 788 | nonGreedy: false 789 | }, 790 | raw: '[0-1]+' 791 | }, { 792 | type: 'exact', 793 | indices: [40, 42], 794 | raw: 'b,', 795 | chars: 'b,' 796 | }, { 797 | type: 'group', 798 | num: 5, 799 | sub: [{ 800 | type: 'exact', 801 | indices: [43, 44], 802 | raw: 'a', 803 | chars: 'a' 804 | }, { 805 | type: 'charset', 806 | indices: [44, 50], 807 | classes: [], 808 | ranges: ['01'], 809 | chars: '', 810 | repeat: { 811 | min: 1, 812 | max: Infinity, 813 | nonGreedy: false 814 | }, 815 | raw: '[0-1]+' 816 | }], 817 | indices: [42, 51], 818 | endParenIndex: 50, 819 | raw: '(a[0-1]+)' 820 | }], 821 | indices: [32, 52], 822 | endParenIndex: 51, 823 | raw: '(a[0-1]+b,(a[0-1]+))' 824 | }, { 825 | type: 'exact', 826 | indices: [52, 53], 827 | raw: ' ', 828 | chars: ' ' 829 | }], 830 | indices: [26, 54], 831 | endParenIndex: 53, 832 | raw: '(a(t)o(a[0-1]+b,(a[0-1]+)) )' 833 | }] 834 | }, { 835 | raw: '[a-zA-z]+://[^\\s]*', 836 | groupCount: 0, 837 | tree: [{ 838 | type: 'charset', 839 | indices: [0, 9], 840 | classes: [], 841 | ranges: ['Az', 'az'], 842 | chars: '', 843 | repeat: { 844 | min: 1, 845 | max: Infinity, 846 | nonGreedy: false 847 | }, 848 | raw: '[a-zA-z]+' 849 | }, { 850 | type: 'exact', 851 | indices: [9, 12], 852 | raw: '://', 853 | chars: '://' 854 | }, { 855 | type: 'charset', 856 | indices: [12, 18], 857 | classes: ['s'], 858 | ranges: [], 859 | chars: '', 860 | exclude: true, 861 | repeat: { 862 | min: 0, 863 | max: Infinity, 864 | nonGreedy: false 865 | }, 866 | raw: '[^\\s]*' 867 | }] 868 | }, { 869 | raw: '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)', 870 | groupCount: 3, 871 | tree: [{ 872 | type: 'group', 873 | num: 1, 874 | sub: [{ 875 | type: 'group', 876 | num: 2, 877 | sub: [{ 878 | type: 'choice', 879 | indices: [2, 29], 880 | branches: [ 881 | [{ 882 | type: 'exact', 883 | indices: [2, 3], 884 | raw: '2', 885 | chars: '2' 886 | }, { 887 | type: 'charset', 888 | indices: [3, 8], 889 | classes: [], 890 | ranges: ['04'], 891 | chars: '', 892 | raw: '[0-4]' 893 | }, { 894 | type: 'charset', 895 | indices: [8, 10], 896 | chars: '', 897 | ranges: [], 898 | classes: ['d'], 899 | exclude: false, 900 | raw: '\\d' 901 | }], 902 | [{ 903 | type: 'exact', 904 | indices: [11, 13], 905 | raw: '25', 906 | chars: '25' 907 | }, { 908 | type: 'charset', 909 | indices: [13, 18], 910 | classes: [], 911 | ranges: ['05'], 912 | chars: '', 913 | raw: '[0-5]' 914 | }], 915 | [{ 916 | type: 'charset', 917 | indices: [19, 24], 918 | classes: [], 919 | ranges: [], 920 | chars: '01', 921 | repeat: { 922 | min: 0, 923 | max: 1, 924 | nonGreedy: false 925 | }, 926 | raw: '[01]?' 927 | }, { 928 | type: 'charset', 929 | indices: [24, 26], 930 | chars: '', 931 | ranges: [], 932 | classes: ['d'], 933 | exclude: false, 934 | raw: '\\d' 935 | }, { 936 | type: 'charset', 937 | indices: [26, 29], 938 | chars: '', 939 | ranges: [], 940 | classes: ['d'], 941 | exclude: false, 942 | repeat: { 943 | min: 0, 944 | max: 1, 945 | nonGreedy: false 946 | }, 947 | raw: '\\d?' 948 | }] 949 | ], 950 | raw: '2[0-4]\\d|25[0-5]|[01]?\\d\\d?' 951 | }], 952 | indices: [1, 30], 953 | endParenIndex: 29, 954 | raw: '(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)' 955 | }, { 956 | type: 'exact', 957 | chars: '.', 958 | indices: [30, 32], 959 | raw: '\\.' 960 | }], 961 | indices: [0, 36], 962 | endParenIndex: 32, 963 | repeat: { 964 | min: 3, 965 | max: 3, 966 | nonGreedy: false 967 | }, 968 | raw: '((2[0-4]\\d|25[0-5]|[01]?\\d\\d?)\\.){3}' 969 | }, { 970 | type: 'group', 971 | num: 3, 972 | sub: [{ 973 | type: 'choice', 974 | indices: [37, 64], 975 | branches: [ 976 | [{ 977 | type: 'exact', 978 | indices: [37, 38], 979 | raw: '2', 980 | chars: '2' 981 | }, { 982 | type: 'charset', 983 | indices: [38, 43], 984 | classes: [], 985 | ranges: ['04'], 986 | chars: '', 987 | raw: '[0-4]' 988 | }, { 989 | type: 'charset', 990 | indices: [43, 45], 991 | chars: '', 992 | ranges: [], 993 | classes: ['d'], 994 | exclude: false, 995 | raw: '\\d' 996 | }], 997 | [{ 998 | type: 'exact', 999 | indices: [46, 48], 1000 | raw: '25', 1001 | chars: '25' 1002 | }, { 1003 | type: 'charset', 1004 | indices: [48, 53], 1005 | classes: [], 1006 | ranges: ['05'], 1007 | chars: '', 1008 | raw: '[0-5]' 1009 | }], 1010 | [{ 1011 | type: 'charset', 1012 | indices: [54, 59], 1013 | classes: [], 1014 | ranges: [], 1015 | chars: '01', 1016 | repeat: { 1017 | min: 0, 1018 | max: 1, 1019 | nonGreedy: false 1020 | }, 1021 | raw: '[01]?' 1022 | }, { 1023 | type: 'charset', 1024 | indices: [59, 61], 1025 | chars: '', 1026 | ranges: [], 1027 | classes: ['d'], 1028 | exclude: false, 1029 | raw: '\\d' 1030 | }, { 1031 | type: 'charset', 1032 | indices: [61, 64], 1033 | chars: '', 1034 | ranges: [], 1035 | classes: ['d'], 1036 | exclude: false, 1037 | repeat: { 1038 | min: 0, 1039 | max: 1, 1040 | nonGreedy: false 1041 | }, 1042 | raw: '\\d?' 1043 | }] 1044 | ], 1045 | raw: '2[0-4]\\d|25[0-5]|[01]?\\d\\d?' 1046 | }], 1047 | indices: [36, 65], 1048 | endParenIndex: 64, 1049 | raw: '(2[0-4]\\d|25[0-5]|[01]?\\d\\d?)' 1050 | }] 1051 | }, { 1052 | raw: '\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*', 1053 | groupCount: 3, 1054 | tree: [{ 1055 | type: 'charset', 1056 | indices: [0, 3], 1057 | chars: '', 1058 | ranges: [], 1059 | classes: ['w'], 1060 | exclude: false, 1061 | repeat: { 1062 | min: 1, 1063 | max: Infinity, 1064 | nonGreedy: false 1065 | }, 1066 | raw: '\\w+' 1067 | }, { 1068 | type: 'group', 1069 | num: 1, 1070 | sub: [{ 1071 | type: 'charset', 1072 | indices: [4, 9], 1073 | classes: [], 1074 | ranges: [], 1075 | chars: '-+.', 1076 | raw: '[-+.]' 1077 | }, { 1078 | type: 'charset', 1079 | indices: [9, 12], 1080 | chars: '', 1081 | ranges: [], 1082 | classes: ['w'], 1083 | exclude: false, 1084 | repeat: { 1085 | min: 1, 1086 | max: Infinity, 1087 | nonGreedy: false 1088 | }, 1089 | raw: '\\w+' 1090 | }], 1091 | indices: [3, 14], 1092 | endParenIndex: 12, 1093 | repeat: { 1094 | min: 0, 1095 | max: Infinity, 1096 | nonGreedy: false 1097 | }, 1098 | raw: '([-+.]\\w+)*' 1099 | }, { 1100 | type: 'exact', 1101 | indices: [14, 15], 1102 | raw: '@', 1103 | chars: '@' 1104 | }, { 1105 | type: 'charset', 1106 | indices: [15, 18], 1107 | chars: '', 1108 | ranges: [], 1109 | classes: ['w'], 1110 | exclude: false, 1111 | repeat: { 1112 | min: 1, 1113 | max: Infinity, 1114 | nonGreedy: false 1115 | }, 1116 | raw: '\\w+' 1117 | }, { 1118 | type: 'group', 1119 | num: 2, 1120 | sub: [{ 1121 | type: 'charset', 1122 | indices: [19, 23], 1123 | classes: [], 1124 | ranges: [], 1125 | chars: '-.', 1126 | raw: '[-.]' 1127 | }, { 1128 | type: 'charset', 1129 | indices: [23, 26], 1130 | chars: '', 1131 | ranges: [], 1132 | classes: ['w'], 1133 | exclude: false, 1134 | repeat: { 1135 | min: 1, 1136 | max: Infinity, 1137 | nonGreedy: false 1138 | }, 1139 | raw: '\\w+' 1140 | }], 1141 | indices: [18, 28], 1142 | endParenIndex: 26, 1143 | repeat: { 1144 | min: 0, 1145 | max: Infinity, 1146 | nonGreedy: false 1147 | }, 1148 | raw: '([-.]\\w+)*' 1149 | }, { 1150 | type: 'exact', 1151 | chars: '.', 1152 | indices: [28, 30], 1153 | raw: '\\.' 1154 | }, { 1155 | type: 'charset', 1156 | indices: [30, 33], 1157 | chars: '', 1158 | ranges: [], 1159 | classes: ['w'], 1160 | exclude: false, 1161 | repeat: { 1162 | min: 1, 1163 | max: Infinity, 1164 | nonGreedy: false 1165 | }, 1166 | raw: '\\w+' 1167 | }, { 1168 | type: 'group', 1169 | num: 3, 1170 | sub: [{ 1171 | type: 'charset', 1172 | indices: [34, 38], 1173 | classes: [], 1174 | ranges: [], 1175 | chars: '-.', 1176 | raw: '[-.]' 1177 | }, { 1178 | type: 'charset', 1179 | indices: [38, 41], 1180 | chars: '', 1181 | ranges: [], 1182 | classes: ['w'], 1183 | exclude: false, 1184 | repeat: { 1185 | min: 1, 1186 | max: Infinity, 1187 | nonGreedy: false 1188 | }, 1189 | raw: '\\w+' 1190 | }], 1191 | indices: [33, 43], 1192 | endParenIndex: 41, 1193 | repeat: { 1194 | min: 0, 1195 | max: Infinity, 1196 | nonGreedy: false 1197 | }, 1198 | raw: '([-.]\\w+)*' 1199 | }] 1200 | }, { 1201 | raw: 'a{1,2}{}', 1202 | groupCount: 0, 1203 | tree: [{ 1204 | type: 'exact', 1205 | repeat: { 1206 | min: 1, 1207 | max: 2, 1208 | nonGreedy: false 1209 | }, 1210 | chars: 'a', 1211 | indices: [0, 6], 1212 | raw: 'a{1,2}' 1213 | }, { 1214 | type: 'exact', 1215 | indices: [6, 8], 1216 | raw: '{}', 1217 | chars: '{}' 1218 | }] 1219 | }, { 1220 | raw: 'a{1,2}{1,2,4}', 1221 | groupCount: 0, 1222 | tree: [{ 1223 | type: 'exact', 1224 | repeat: { 1225 | min: 1, 1226 | max: 2, 1227 | nonGreedy: false 1228 | }, 1229 | chars: 'a', 1230 | indices: [0, 6], 1231 | raw: 'a{1,2}' 1232 | }, { 1233 | type: 'exact', 1234 | indices: [6, 13], 1235 | raw: '{1,2,4}', 1236 | chars: '{1,2,4}' 1237 | }] 1238 | }, { 1239 | raw: 'a{1,2}{{4}', 1240 | groupCount: 0, 1241 | tree: [{ 1242 | type: 'exact', 1243 | repeat: { 1244 | min: 1, 1245 | max: 2, 1246 | nonGreedy: false 1247 | }, 1248 | chars: 'a', 1249 | indices: [0, 6], 1250 | raw: 'a{1,2}' 1251 | }, { 1252 | type: 'exact', 1253 | repeat: { 1254 | min: 4, 1255 | max: 4, 1256 | nonGreedy: false 1257 | }, 1258 | chars: '{', 1259 | indices: [6, 10], 1260 | raw: '{{4}' 1261 | }] 1262 | }, { 1263 | raw: 'a+{1,{4}', 1264 | groupCount: 0, 1265 | tree: [{ 1266 | type: 'exact', 1267 | repeat: { 1268 | min: 1, 1269 | max: Infinity, 1270 | nonGreedy: false 1271 | }, 1272 | chars: 'a', 1273 | indices: [0, 2], 1274 | raw: 'a+' 1275 | }, { 1276 | type: 'exact', 1277 | indices: [2, 4], 1278 | raw: '{1', 1279 | chars: '{1' 1280 | }, { 1281 | type: 'exact', 1282 | repeat: { 1283 | min: 4, 1284 | max: 4, 1285 | nonGreedy: false 1286 | }, 1287 | chars: ',', 1288 | indices: [4, 8], 1289 | raw: ',{4}' 1290 | }] 1291 | }, { 1292 | raw: '<(.*)(.*)>.*<\\/\\1>|<(.*) \\/>', 1293 | groupCount: 3, 1294 | tree: [{ 1295 | type: 'choice', 1296 | indices: [0, 28], 1297 | branches: [ 1298 | [{ 1299 | type: 'exact', 1300 | indices: [0, 1], 1301 | raw: '<', 1302 | chars: '<' 1303 | }, { 1304 | type: 'group', 1305 | num: 1, 1306 | sub: [{ 1307 | type: 'dot', 1308 | indices: [2, 4], 1309 | repeat: { 1310 | min: 0, 1311 | max: Infinity, 1312 | nonGreedy: false 1313 | }, 1314 | raw: '.*' 1315 | }], 1316 | indices: [1, 5], 1317 | endParenIndex: 4, 1318 | raw: '(.*)' 1319 | }, { 1320 | type: 'group', 1321 | num: 2, 1322 | sub: [{ 1323 | type: 'dot', 1324 | indices: [6, 8], 1325 | repeat: { 1326 | min: 0, 1327 | max: Infinity, 1328 | nonGreedy: false 1329 | }, 1330 | raw: '.*' 1331 | }], 1332 | indices: [5, 9], 1333 | endParenIndex: 8, 1334 | raw: '(.*)' 1335 | }, { 1336 | type: 'exact', 1337 | indices: [9, 10], 1338 | raw: '>', 1339 | chars: '>' 1340 | }, { 1341 | type: 'dot', 1342 | indices: [10, 12], 1343 | repeat: { 1344 | min: 0, 1345 | max: Infinity, 1346 | nonGreedy: false 1347 | }, 1348 | raw: '.*' 1349 | }, { 1350 | type: 'exact', 1351 | indices: [12, 13], 1352 | raw: '<', 1353 | chars: '<' 1354 | }, { 1355 | type: 'exact', 1356 | chars: '/', 1357 | indices: [13, 15], 1358 | raw: '\\/' 1359 | }, { 1360 | type: 'backref', 1361 | indices: [15, 17], 1362 | num: 1, 1363 | raw: '\\1' 1364 | }, { 1365 | type: 'exact', 1366 | indices: [17, 18], 1367 | raw: '>', 1368 | chars: '>' 1369 | }], 1370 | [{ 1371 | type: 'exact', 1372 | indices: [19, 20], 1373 | raw: '<', 1374 | chars: '<' 1375 | }, { 1376 | type: 'group', 1377 | num: 3, 1378 | sub: [{ 1379 | type: 'dot', 1380 | indices: [21, 23], 1381 | repeat: { 1382 | min: 0, 1383 | max: Infinity, 1384 | nonGreedy: false 1385 | }, 1386 | raw: '.*' 1387 | }], 1388 | indices: [20, 24], 1389 | endParenIndex: 23, 1390 | raw: '(.*)' 1391 | }, { 1392 | type: 'exact', 1393 | indices: [24, 25], 1394 | raw: ' ', 1395 | chars: ' ' 1396 | }, { 1397 | type: 'exact', 1398 | chars: '/', 1399 | indices: [25, 27], 1400 | raw: '\\/' 1401 | }, { 1402 | type: 'exact', 1403 | indices: [27, 28], 1404 | raw: '>', 1405 | chars: '>' 1406 | }] 1407 | ], 1408 | raw: '<(.*)(.*)>.*<\\/\\1>|<(.*) \\/>' 1409 | }] 1410 | }, { 1411 | raw: '(?=^.{8,}$)(?=.*\\d)(?=.*\\W+)(?=.*[A-Z])(?=.*[a-z])(?!.*\\n).*$', 1412 | groupCount: 0, 1413 | tree: [{ 1414 | type: 'assert', 1415 | num: undefined, 1416 | sub: [{ 1417 | type: 'assert', 1418 | indices: [3, 4], 1419 | assertionType: 'AssertBegin', 1420 | raw: '^' 1421 | }, { 1422 | type: 'dot', 1423 | indices: [4, 9], 1424 | repeat: { 1425 | min: 8, 1426 | max: Infinity, 1427 | nonGreedy: false 1428 | }, 1429 | raw: '.{8,}' 1430 | }, { 1431 | type: 'assert', 1432 | indices: [9, 10], 1433 | assertionType: 'AssertEnd', 1434 | raw: '$' 1435 | }], 1436 | indices: [0, 11], 1437 | assertionType: 'AssertLookahead', 1438 | endParenIndex: 10, 1439 | raw: '(?=^.{8,}$)' 1440 | }, { 1441 | type: 'assert', 1442 | num: undefined, 1443 | sub: [{ 1444 | type: 'dot', 1445 | indices: [14, 16], 1446 | repeat: { 1447 | min: 0, 1448 | max: Infinity, 1449 | nonGreedy: false 1450 | }, 1451 | raw: '.*' 1452 | }, { 1453 | type: 'charset', 1454 | indices: [16, 18], 1455 | chars: '', 1456 | ranges: [], 1457 | classes: ['d'], 1458 | exclude: false, 1459 | raw: '\\d' 1460 | }], 1461 | indices: [11, 19], 1462 | assertionType: 'AssertLookahead', 1463 | endParenIndex: 18, 1464 | raw: '(?=.*\\d)' 1465 | }, { 1466 | type: 'assert', 1467 | num: undefined, 1468 | sub: [{ 1469 | type: 'dot', 1470 | indices: [22, 24], 1471 | repeat: { 1472 | min: 0, 1473 | max: Infinity, 1474 | nonGreedy: false 1475 | }, 1476 | raw: '.*' 1477 | }, { 1478 | type: 'charset', 1479 | indices: [24, 27], 1480 | chars: '', 1481 | ranges: [], 1482 | classes: ['W'], 1483 | exclude: false, 1484 | repeat: { 1485 | min: 1, 1486 | max: Infinity, 1487 | nonGreedy: false 1488 | }, 1489 | raw: '\\W+' 1490 | }], 1491 | indices: [19, 28], 1492 | assertionType: 'AssertLookahead', 1493 | endParenIndex: 27, 1494 | raw: '(?=.*\\W+)' 1495 | }, { 1496 | type: 'assert', 1497 | num: undefined, 1498 | sub: [{ 1499 | type: 'dot', 1500 | indices: [31, 33], 1501 | repeat: { 1502 | min: 0, 1503 | max: Infinity, 1504 | nonGreedy: false 1505 | }, 1506 | raw: '.*' 1507 | }, { 1508 | type: 'charset', 1509 | indices: [33, 38], 1510 | classes: [], 1511 | ranges: ['AZ'], 1512 | chars: '', 1513 | raw: '[A-Z]' 1514 | }], 1515 | indices: [28, 39], 1516 | assertionType: 'AssertLookahead', 1517 | endParenIndex: 38, 1518 | raw: '(?=.*[A-Z])' 1519 | }, { 1520 | type: 'assert', 1521 | num: undefined, 1522 | sub: [{ 1523 | type: 'dot', 1524 | indices: [42, 44], 1525 | repeat: { 1526 | min: 0, 1527 | max: Infinity, 1528 | nonGreedy: false 1529 | }, 1530 | raw: '.*' 1531 | }, { 1532 | type: 'charset', 1533 | indices: [44, 49], 1534 | classes: [], 1535 | ranges: ['az'], 1536 | chars: '', 1537 | raw: '[a-z]' 1538 | }], 1539 | indices: [39, 50], 1540 | assertionType: 'AssertLookahead', 1541 | endParenIndex: 49, 1542 | raw: '(?=.*[a-z])' 1543 | }, { 1544 | type: 'assert', 1545 | num: undefined, 1546 | sub: [{ 1547 | type: 'dot', 1548 | indices: [53, 55], 1549 | repeat: { 1550 | min: 0, 1551 | max: Infinity, 1552 | nonGreedy: false 1553 | }, 1554 | raw: '.*' 1555 | }, { 1556 | type: 'exact', 1557 | chars: '\n', 1558 | indices: [55, 57], 1559 | raw: '\\n' 1560 | }], 1561 | indices: [50, 58], 1562 | assertionType: 'AssertNegativeLookahead', 1563 | endParenIndex: 57, 1564 | raw: '(?!.*\\n)' 1565 | }, { 1566 | type: 'dot', 1567 | indices: [58, 60], 1568 | repeat: { 1569 | min: 0, 1570 | max: Infinity, 1571 | nonGreedy: false 1572 | }, 1573 | raw: '.*' 1574 | }, { 1575 | type: 'assert', 1576 | indices: [60, 61], 1577 | assertionType: 'AssertEnd', 1578 | raw: '$' 1579 | }] 1580 | }, { 1581 | raw: '(\\d{4}|\\d{2})-((1[0-2])|(0?[1-9]))-(([12][0-9])|(3[01])|(0?[1-9]))', 1582 | groupCount: 8, 1583 | tree: [{ 1584 | type: 'group', 1585 | num: 1, 1586 | sub: [{ 1587 | type: 'choice', 1588 | indices: [1, 12], 1589 | branches: [ 1590 | [{ 1591 | type: 'charset', 1592 | indices: [1, 6], 1593 | chars: '', 1594 | ranges: [], 1595 | classes: ['d'], 1596 | exclude: false, 1597 | repeat: { 1598 | min: 4, 1599 | max: 4, 1600 | nonGreedy: false 1601 | }, 1602 | raw: '\\d{4}' 1603 | }], 1604 | [{ 1605 | type: 'charset', 1606 | indices: [7, 12], 1607 | chars: '', 1608 | ranges: [], 1609 | classes: ['d'], 1610 | exclude: false, 1611 | repeat: { 1612 | min: 2, 1613 | max: 2, 1614 | nonGreedy: false 1615 | }, 1616 | raw: '\\d{2}' 1617 | }] 1618 | ], 1619 | raw: '\\d{4}|\\d{2}' 1620 | }], 1621 | indices: [0, 13], 1622 | endParenIndex: 12, 1623 | raw: '(\\d{4}|\\d{2})' 1624 | }, { 1625 | type: 'exact', 1626 | indices: [13, 14], 1627 | raw: '-', 1628 | chars: '-' 1629 | }, { 1630 | type: 'group', 1631 | num: 2, 1632 | sub: [{ 1633 | type: 'choice', 1634 | indices: [15, 33], 1635 | branches: [ 1636 | [{ 1637 | type: 'group', 1638 | num: 3, 1639 | sub: [{ 1640 | type: 'exact', 1641 | indices: [16, 17], 1642 | raw: '1', 1643 | chars: '1' 1644 | }, { 1645 | type: 'charset', 1646 | indices: [17, 22], 1647 | classes: [], 1648 | ranges: ['02'], 1649 | chars: '', 1650 | raw: '[0-2]' 1651 | }], 1652 | indices: [15, 23], 1653 | endParenIndex: 22, 1654 | raw: '(1[0-2])' 1655 | }], 1656 | [{ 1657 | type: 'group', 1658 | num: 4, 1659 | sub: [{ 1660 | type: 'exact', 1661 | repeat: { 1662 | min: 0, 1663 | max: 1, 1664 | nonGreedy: false 1665 | }, 1666 | chars: '0', 1667 | indices: [25, 27], 1668 | raw: '0?' 1669 | }, { 1670 | type: 'charset', 1671 | indices: [27, 32], 1672 | classes: [], 1673 | ranges: ['19'], 1674 | chars: '', 1675 | raw: '[1-9]' 1676 | }], 1677 | indices: [24, 33], 1678 | endParenIndex: 32, 1679 | raw: '(0?[1-9])' 1680 | }] 1681 | ], 1682 | raw: '(1[0-2])|(0?[1-9])' 1683 | }], 1684 | indices: [14, 34], 1685 | endParenIndex: 33, 1686 | raw: '((1[0-2])|(0?[1-9]))' 1687 | }, { 1688 | type: 'exact', 1689 | indices: [34, 35], 1690 | raw: '-', 1691 | chars: '-' 1692 | }, { 1693 | type: 'group', 1694 | num: 5, 1695 | sub: [{ 1696 | type: 'choice', 1697 | indices: [36, 65], 1698 | branches: [ 1699 | [{ 1700 | type: 'group', 1701 | num: 6, 1702 | sub: [{ 1703 | type: 'charset', 1704 | indices: [37, 41], 1705 | classes: [], 1706 | ranges: [], 1707 | chars: '12', 1708 | raw: '[12]' 1709 | }, { 1710 | type: 'charset', 1711 | indices: [41, 46], 1712 | classes: [], 1713 | ranges: ['09'], 1714 | chars: '', 1715 | raw: '[0-9]' 1716 | }], 1717 | indices: [36, 47], 1718 | endParenIndex: 46, 1719 | raw: '([12][0-9])' 1720 | }], 1721 | [{ 1722 | type: 'group', 1723 | num: 7, 1724 | sub: [{ 1725 | type: 'exact', 1726 | indices: [49, 50], 1727 | raw: '3', 1728 | chars: '3' 1729 | }, { 1730 | type: 'charset', 1731 | indices: [50, 54], 1732 | classes: [], 1733 | ranges: [], 1734 | chars: '01', 1735 | raw: '[01]' 1736 | }], 1737 | indices: [48, 55], 1738 | endParenIndex: 54, 1739 | raw: '(3[01])' 1740 | }], 1741 | [{ 1742 | type: 'group', 1743 | num: 8, 1744 | sub: [{ 1745 | type: 'exact', 1746 | repeat: { 1747 | min: 0, 1748 | max: 1, 1749 | nonGreedy: false 1750 | }, 1751 | chars: '0', 1752 | indices: [57, 59], 1753 | raw: '0?' 1754 | }, { 1755 | type: 'charset', 1756 | indices: [59, 64], 1757 | classes: [], 1758 | ranges: ['19'], 1759 | chars: '', 1760 | raw: '[1-9]' 1761 | }], 1762 | indices: [56, 65], 1763 | endParenIndex: 64, 1764 | raw: '(0?[1-9])' 1765 | }] 1766 | ], 1767 | raw: '([12][0-9])|(3[01])|(0?[1-9])' 1768 | }], 1769 | indices: [35, 66], 1770 | endParenIndex: 65, 1771 | raw: '(([12][0-9])|(3[01])|(0?[1-9]))' 1772 | }] 1773 | }, { 1774 | raw: '[\\u4e00-\\u9fa5]', 1775 | groupCount: 0, 1776 | tree: [{ 1777 | type: 'charset', 1778 | indices: [0, 15], 1779 | classes: [], 1780 | ranges: ['\u4e00\u9fa5'], 1781 | chars: '', 1782 | raw: '[\\u4e00-\\u9fa5]' 1783 | }] 1784 | }]; 1785 | 1786 | 1787 | 1788 | 1789 | return { 1790 | expectedFail: expectedFail, 1791 | expectedPass: expectedPass, 1792 | re2ast: re2ast, 1793 | reMatchCases:reMatchCases 1794 | }; 1795 | 1796 | 1797 | }); 1798 | --------------------------------------------------------------------------------