├── .github
    └── workflows
    │   └── nodejs.yml
├── .gitignore
├── README.md
├── __tests__
    ├── parser.test.js
    └── regex.test.js
├── package-lock.json
├── package.json
└── src
    ├── index.js
    ├── nfa.js
    ├── parser.js
    ├── parser2.js
    └── regex.js


/.github/workflows/nodejs.yml:
--------------------------------------------------------------------------------
 1 | name: Node CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 | 
 8 |     runs-on: ubuntu-latest
 9 | 
10 |     strategy:
11 |       matrix:
12 |         node-version: [18.x, 20.x, 21.x]
13 | 
14 |     steps:
15 |     - uses: actions/checkout@v1
16 |     - name: Use Node.js ${{ matrix.node-version }}
17 |       uses: actions/setup-node@v1
18 |       with:
19 |         node-version: ${{ matrix.node-version }}
20 |     - name: npm install, build, and test
21 |       run: |
22 |         npm ci
23 |         npm test
24 |       env:
25 |         CI: true
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # regexjs
 2 | 
 3 | [![Build Status](https://github.com/deniskyashif/regexjs/workflows/Node%20CI/badge.svg)](https://github.com/deniskyashif/ssfst/actions?query=workflow%3A%22Node+CI%22)
 4 | 
 5 | A regular expression engine implementation in JavaScript. It supports concatenation, union (|), zero-or-more (\*), one-or-more (+), and zero-or-one (?) operations as well as grouping. It follows Ken Thompson's algorithm for constructing an NFA from a regular expression.
 6 | 
 7 | Check out my [blog post](https://deniskyashif.com/2019/02/17/implementing-a-regular-expression-engine/) for the complete implementation details.
 8 | 
 9 | ### Example
10 | ```javascript
11 | const { createMatcher } = require('./regex');
12 | const match = createMatcher('(a|b)*c');
13 | 
14 | match('ac'); // true
15 | match('abc'); // true
16 | match('aabababbbc'); // true
17 | match('aaaab'); // false
18 | ```
19 | 
20 | ### Try It
21 | ```
22 | git clone https://github.com/deniskyashif/regexjs.git
23 | cd regexjs
24 | npm i
25 | npm start
26 | ```
27 | 
28 | ### Run the tests
29 | `npm t`
30 | 


--------------------------------------------------------------------------------
/__tests__/parser.test.js:
--------------------------------------------------------------------------------
 1 | const {
 2 |     insertExplicitConcatOperator,
 3 |     toPostfix
 4 | } = require('../src/parser');
 5 | 
 6 | describe('insertExplicitConcatSymbol tests', () => {
 7 |     test('call with "" should return ""', () => {
 8 |         expect(insertExplicitConcatOperator('')).toEqual('');
 9 |     });
10 | 
11 |     test('call with "a*" should return "a*"', () => {
12 |         expect(insertExplicitConcatOperator('a*')).toEqual('a*');
13 |     });
14 | 
15 |     test('call with "a|b" should return "a|b"', () => {
16 |         expect(insertExplicitConcatOperator('a|b')).toEqual('a|b');
17 |     });
18 | 
19 |     test('call with "ab" should return "a.b"', () => {
20 |         expect(insertExplicitConcatOperator('ab')).toEqual('a.b');
21 |     });
22 | 
23 |     test('call with "abcabc" should return "a.b.c.a.b.c"', () => {
24 |         expect(insertExplicitConcatOperator('abcabc')).toEqual('a.b.c.a.b.c');
25 |     });
26 | 
27 |     test('call with "ab*" should return "a.b*"', () => {
28 |         expect(insertExplicitConcatOperator('ab*')).toEqual('a.b*');
29 |     });
30 | 
31 |     test('call with "ab*" should return "a*b*"', () => {
32 |         expect(insertExplicitConcatOperator('a*b*')).toEqual('a*.b*');
33 |     });
34 | 
35 |     test('call with "ab*c" should return "a.b*.c"', () => {
36 |         expect(insertExplicitConcatOperator('ab*c')).toEqual('a.b*.c');
37 |     });
38 | 
39 |     test('call with "ab*(cdd)" should return "a.b*.(c.d.d)"', () => {
40 |         expect(insertExplicitConcatOperator('ab*(cdd)')).toEqual('a.b*.(c.d.d)');
41 |     });
42 | 
43 |     test('call with "(a|b)*c" should return "(a|b)*.c"', () => {
44 |         expect(insertExplicitConcatOperator('(a|b)*c')).toEqual('(a|b)*.c');
45 |     });
46 | });
47 | 
48 | describe('toPostfix tests', () => {
49 |     test('call with "" should return ""', () => {
50 |         expect(toPostfix('')).toEqual('');
51 |     });
52 | 
53 |     test('call with "a" should return "a"', () => {
54 |         expect(toPostfix('a')).toEqual('a');
55 |     });
56 | 
57 |     test('call with "a.b" should return "ab."', () => {
58 |         expect(toPostfix('a.b')).toEqual('ab.');
59 |     });
60 | 
61 |     test('call with "a*" should return "a*"', () => {
62 |         expect(toPostfix('a*')).toEqual('a*');
63 |     });
64 | 
65 |     test('call with "a*.b" should return "a*b."', () => {
66 |         expect(toPostfix('a*.b')).toEqual('a*b.');
67 |     });
68 | 
69 |     test('call with "a.b|c.d" should return "ab.cd.|"', () => {
70 |         expect(toPostfix('a.b|c.d')).toEqual('ab.cd.|');
71 |     });
72 | 
73 |     test('call with "a|b*" should return "ab*|"', () => {
74 |         expect(toPostfix('a|b*')).toEqual('ab*|');
75 |     });
76 | 
77 |     test('call with "a.(b|c)*.d" should return "abc|*.d."', () => {
78 |         expect(toPostfix('a.(b|c)*.d')).toEqual('abc|*.d.');
79 |     });
80 | 
81 |     test('call with ((a.b)) should return ab.', () => {
82 |         expect(toPostfix('((a.b))')).toEqual('ab.');
83 |     });
84 | 
85 |     test('call with ((a.b)*) should return ab.*', () => {
86 |         expect(toPostfix('((a.b)*)')).toEqual('ab.*');
87 |     });
88 | 
89 |     test('call with "(a|b)*cd" should return "ab|*c.d."', () => {
90 |         expect(toPostfix('(a|b)*.c.d')).toEqual('ab|*c.d.');
91 |     });
92 | });
93 | 
94 | 


--------------------------------------------------------------------------------
/__tests__/regex.test.js:
--------------------------------------------------------------------------------
  1 | const { createMatcher } = require('../src/regex');
  2 | 
  3 | describe('createMatcher tests', () => {
  4 |     test('from empty string should recognize only empty string', () => {
  5 |         const match = createMatcher('');
  6 |         expect(match('')).toBeTruthy();
  7 |         expect(match('a')).toBeFalsy();
  8 |         expect(match(' ab')).toBeFalsy();
  9 |     });
 10 | 
 11 |     test('from a should recognize strings of arbitrary number of a', () => {
 12 |         const match = createMatcher('a');
 13 |         expect(match('')).toBeFalsy();
 14 |         expect(match('a')).toBeTruthy();
 15 |         expect(match('aaa')).toBeFalsy();
 16 |     });
 17 | 
 18 |     test('from a* should recognize strings of arbitrary number of a\'s', () => {
 19 |         const match = createMatcher('a*');
 20 |         expect(match('')).toBeTruthy();
 21 |         expect(match('aaaa')).toBeTruthy();
 22 |         expect(match('aa')).toBeTruthy();
 23 |         expect(match('aba')).toBeFalsy();
 24 |     });
 25 | 
 26 |     test('from a? should recognize strings of exactly one or zero number of a\'s', () => {
 27 |         const match = createMatcher('a?');
 28 |         expect(match('')).toBeTruthy();
 29 |         expect(match('a')).toBeTruthy();
 30 |         expect(match('aa')).toBeFalsy();
 31 |         expect(match('aaa')).toBeFalsy();
 32 |         expect(match('aba')).toBeFalsy();
 33 |         expect(match('b')).toBeFalsy();
 34 |     });
 35 | 
 36 |     test('from a+ should recognize strings of one or more number of a\'s', () => {
 37 |         const match = createMatcher('a+');
 38 |         expect(match('')).toBeFalsy();
 39 |         expect(match('a')).toBeTruthy();
 40 |         expect(match('aa')).toBeTruthy();
 41 |         expect(match('aaa')).toBeTruthy();
 42 |         expect(match('aba')).toBeFalsy();
 43 |         expect(match('b')).toBeFalsy();
 44 |     });
 45 | 
 46 |     test('from a*b should recognize strings of arbitrary number of a\'s ending with b', () => {
 47 |         const match = createMatcher('a*b');
 48 |         expect(match('')).toBeFalsy();
 49 |         expect(match('aaaab')).toBeTruthy();
 50 |         expect(match('aab')).toBeTruthy();
 51 |         expect(match('b')).toBeTruthy();
 52 |         expect(match('aba')).toBeFalsy();
 53 |     });
 54 | 
 55 |     // regex for all binary numbers divisible by 3
 56 |     test('from "(0|(1(01*(00)*0)*1)*)*" should recognize its language', () => {
 57 |         const match = createMatcher('(0|(1(01*(00)*0)*1)*)*');
 58 |         expect(match('')).toBeTruthy();
 59 |         expect(match('0')).toBeTruthy();
 60 |         expect(match('00')).toBeTruthy();
 61 |         expect(match('11')).toBeTruthy();
 62 |         expect(match('000')).toBeTruthy();
 63 |         expect(match('011')).toBeTruthy();
 64 |         expect(match('110')).toBeTruthy();
 65 |         expect(match('0000')).toBeTruthy();
 66 |         expect(match('0011')).toBeTruthy();
 67 |     });
 68 | 
 69 |     test('from "(a|b)*c" should recognize strings with arbitrary number of a\'s and b\'s ending with c', () => {
 70 |         const match = createMatcher('(a|b)*c');
 71 |         expect(match('c')).toBeTruthy();
 72 |         expect(match('ac')).toBeTruthy();
 73 |         expect(match('ababc')).toBeTruthy();
 74 |         expect(match('bbbc')).toBeTruthy();
 75 |         expect(match('aaaaaaac')).toBeTruthy();
 76 |         expect(match('ac')).toBeTruthy();
 77 |         expect(match('bac')).toBeTruthy();
 78 |         expect(match('abbbbc')).toBeTruthy();
 79 |         expect(match('cc')).toBeFalsy();
 80 |         expect(match('a')).toBeFalsy();
 81 |         expect(match('b')).toBeFalsy();
 82 |         expect(match('ababab')).toBeFalsy();
 83 |     });
 84 | 
 85 |     test('from "abc|def" should recognize strings of abc or def', () => {
 86 |         const match = createMatcher('abc|def');
 87 |         expect(match('abc')).toBeTruthy();
 88 |         expect(match('def')).toBeTruthy();
 89 |         expect(match('ab')).toBeFalsy();
 90 |         expect(match('ef')).toBeFalsy();
 91 |     });
 92 | 
 93 |     test('from "a(b*|c)" should recognize strings starting with a followed by b\'s or a single c', () => {
 94 |         const match = createMatcher('a(b*|c)');
 95 |         expect(match('ac')).toBeTruthy();
 96 |         expect(match('abbbb')).toBeTruthy();
 97 |         expect(match('ab')).toBeTruthy();
 98 |         expect(match('a')).toBeTruthy();
 99 |         expect(match('abc')).toBeFalsy();
100 |         expect(match('acc')).toBeFalsy();
101 |         expect(match('')).toBeFalsy();
102 |     });
103 | });
104 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "regexjs",
 3 |     "version": "1.0.0",
 4 |     "description": "Finite State Machines",
 5 |     "main": "index.js",
 6 |     "engines": {
 7 |         "node": ">=10.11.0"
 8 |     },
 9 |     "scripts": {
10 |         "start": "node src/index",
11 |         "test": "jest"
12 |     },
13 |     "repository": {
14 |         "type": "git",
15 |         "url": "git+https://github.com/deniskyashif/regexjs.git"
16 |     },
17 |     "keywords": [
18 |         "automata",
19 |         "finite state machines",
20 |         "regex"
21 |     ],
22 |     "author": "deniskyashif",
23 |     "license": "MIT",
24 |     "bugs": {
25 |         "url": "https://github.com/deniskyashif/regexjs/issues"
26 |     },
27 |     "homepage": "https://github.com/deniskyashif/regexjs#readme",
28 |     "devDependencies": {
29 |         "jest": "^24.0.0"
30 |     },
31 |     "dependencies": {
32 |         "antlr4": "^4.7.2"
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
 1 | const { createMatcher } = require('./regex');
 2 | const readline = require('readline');
 3 | 
 4 | const match = createMatcher('(a|b)*c');
 5 | 
 6 | const rl = readline.createInterface({
 7 |     input: process.stdin,
 8 |     output: process.stdout
 9 | });
10 | 
11 | rl.question(`Pattern: `, (pattern) => {
12 |     const match = createMatcher(pattern);
13 | 
14 |     console.log('Check words: ');
15 | 
16 |     rl.on('line', (input) => {
17 |         console.log(`Match? ${match(input)}`);
18 |     });
19 | });
20 | 


--------------------------------------------------------------------------------
/src/nfa.js:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Thompson NFA Construction and Search.
  3 | */
  4 | 
  5 | /*
  6 |   A state in Thompson's NFA can either have 
  7 |    - a single symbol transition to a state
  8 |     or
  9 |    - up to two epsilon transitions to another states
 10 |   but not both.   
 11 | */
 12 | function createState(isEnd) {
 13 |     return {
 14 |         isEnd,
 15 |         transition: {},
 16 |         epsilonTransitions: []
 17 |     };
 18 | }
 19 | 
 20 | function addEpsilonTransition(from, to) {
 21 |     from.epsilonTransitions.push(to);
 22 | }
 23 | 
 24 | /*
 25 |   Thompson's NFA state can have only one transition to another state for a given symbol.
 26 | */
 27 | function addTransition(from, to, symbol) {
 28 |     from.transition[symbol] = to;
 29 | }
 30 | 
 31 | /*
 32 |   Construct an NFA that recognizes only the empty string.
 33 | */
 34 | function fromEpsilon() {
 35 |     const start = createState(false);
 36 |     const end = createState(true);
 37 |     addEpsilonTransition(start, end);
 38 | 
 39 |     return { start, end };
 40 | }
 41 | 
 42 | /* 
 43 |    Construct an NFA that recognizes only a single character string.
 44 | */
 45 | function fromSymbol(symbol) {
 46 |     const start = createState(false);
 47 |     const end = createState(true);
 48 |     addTransition(start, end, symbol);
 49 | 
 50 |     return { start, end };
 51 | }
 52 | 
 53 | /* 
 54 |    Concatenates two NFAs.
 55 | */
 56 | function concat(first, second) {
 57 |     addEpsilonTransition(first.end, second.start);
 58 |     first.end.isEnd = false;
 59 | 
 60 |     return { start: first.start, end: second.end };
 61 | }
 62 | 
 63 | /* 
 64 |    Unions two NFAs.
 65 | */
 66 | function union(first, second) {
 67 |     const start = createState(false);
 68 |     addEpsilonTransition(start, first.start);
 69 |     addEpsilonTransition(start, second.start);
 70 | 
 71 |     const end = createState(true);
 72 | 
 73 |     addEpsilonTransition(first.end, end);
 74 |     first.end.isEnd = false;
 75 |     addEpsilonTransition(second.end, end);
 76 |     second.end.isEnd = false;
 77 | 
 78 |     return { start, end };
 79 | }
 80 | 
 81 | 
 82 | /* 
 83 |    Apply Closure (Kleene's Star) on an NFA.
 84 | */
 85 | function closure(nfa) {
 86 |     const start = createState(false);
 87 |     const end = createState(true);
 88 | 
 89 |     addEpsilonTransition(start, end);
 90 |     addEpsilonTransition(start, nfa.start);
 91 | 
 92 |     addEpsilonTransition(nfa.end, end);
 93 |     addEpsilonTransition(nfa.end, nfa.start);
 94 |     nfa.end.isEnd = false;
 95 | 
 96 |     return { start, end };
 97 | }
 98 | 
 99 | /*
100 |     Zero-or-one of an NFA.
101 | */
102 | 
103 | function zeroOrOne(nfa) {
104 |     const start = createState(false);
105 |     const end = createState(true);
106 | 
107 |     addEpsilonTransition(start, end);
108 |     addEpsilonTransition(start, nfa.start);
109 | 
110 |     addEpsilonTransition(nfa.end, end);
111 |     nfa.end.isEnd = false;
112 | 
113 |     return { start, end };
114 | }
115 | 
116 | /*
117 |     One on more of an NFA.
118 | */
119 | 
120 | function oneOrMore(nfa) {
121 |     const start = createState(false);
122 |     const end = createState(true);
123 | 
124 |     addEpsilonTransition(start, nfa.start);
125 |     addEpsilonTransition(nfa.end, end);
126 |     addEpsilonTransition(nfa.end, nfa.start);
127 |     nfa.end.isEnd = false;
128 | 
129 |     return { start, end };
130 | }
131 | 
132 | /*
133 |   Converts a postfix regular expression into a Thompson NFA.
134 | */
135 | function toNFA(postfixExp) {
136 |     if (postfixExp === '') {
137 |         return fromEpsilon();
138 |     }
139 | 
140 |     const stack = [];
141 | 
142 |     for (const token of postfixExp) {
143 |         if (token === '*') {
144 |             stack.push(closure(stack.pop()));
145 |         } else if (token === "?") {
146 |             stack.push(zeroOrOne(stack.pop()));
147 |         } else if (token === "+") {
148 |             stack.push(oneOrMore(stack.pop()));
149 |         } else if (token === '|') {
150 |             const right = stack.pop();
151 |             const left = stack.pop();
152 |             stack.push(union(left, right));
153 |         } else if (token === '.') {
154 |             const right = stack.pop();
155 |             const left = stack.pop();
156 |             stack.push(concat(left, right));
157 |         } else {
158 |             stack.push(fromSymbol(token));
159 |         }
160 |     }
161 | 
162 |     return stack.pop();
163 | }
164 | 
165 | /*
166 |   Regex to NFA construction using a parse tree.
167 | */
168 | const { toParseTree } = require('./parser2');
169 | 
170 | function toNFAfromParseTree(root) {
171 |     if (root.label === 'Expr') {
172 |         const term = toNFAfromParseTree(root.children[0]);
173 |         if (root.children.length === 3) // Expr -> Term '|' Expr
174 |             return union(term, toNFAfromParseTree(root.children[2]));
175 | 
176 |         return term; // Expr -> Term
177 |     }
178 | 
179 |     if (root.label === 'Term') {
180 |         const factor = toNFAfromParseTree(root.children[0]);
181 |         if (root.children.length === 2) // Term -> Factor Term
182 |             return concat(factor, toNFAfromParseTree(root.children[1]));
183 | 
184 |         return factor; // Term -> Factor
185 |     }
186 | 
187 |     if (root.label === 'Factor') {
188 |         const atom = toNFAfromParseTree(root.children[0]);
189 |         if (root.children.length === 2) { // Factor -> Atom MetaChar
190 |             const meta = root.children[1].label;
191 |             if (meta === '*')
192 |                 return closure(atom);
193 |             if (meta === '+')
194 |                 return oneOrMore(atom);
195 |             if (meta === '?')
196 |                 return zeroOrOne(atom);
197 |         }
198 | 
199 |         return atom; // Factor -> Atom
200 |     }
201 | 
202 |     if (root.label === 'Atom') {
203 |         if (root.children.length === 3) // Atom -> '(' Expr ')'
204 |             return toNFAfromParseTree(root.children[1]);
205 | 
206 |         return toNFAfromParseTree(root.children[0]); // Atom -> Char
207 |     }
208 | 
209 |     if (root.label === 'Char') {
210 |         if (root.children.length === 2) // Char -> '\' AnyChar
211 |             return fromSymbol(root.children[1].label);
212 | 
213 |         return fromSymbol(root.children[0].label); // Char -> AnyCharExceptMeta
214 |     }
215 | 
216 |     throw new Error('Unrecognized node label ' + root.label);
217 | }
218 | 
219 | function toNFAFromInfixExp(infixExp) {
220 |     if (infixExp === '')
221 |         return fromEpsilon();
222 | 
223 |     return toNFAfromParseTree(toParseTree(infixExp));
224 | }
225 | 
226 | /*
227 |   Process a string through an NFA by recurisively (depth-first) traversing all the possible paths until finding a matching one.
228 |   
229 |   The NFA has N states, from each state it can go to at most N possible states, yet there might be at most 2^N possible paths, 
230 |   therefore, worst case it'll end up going through all of them until it finds a match (or not), resulting in very slow runtimes.
231 | */
232 | function recursiveBacktrackingSearch(state, visited, input, position) {
233 |     if (visited.includes(state)) {
234 |         return false;
235 |     }
236 | 
237 |     visited.push(state);
238 | 
239 |     if (position === input.length) {
240 |         if (state.isEnd) {
241 |             return true;
242 |         }
243 | 
244 |         if (state.epsilonTransitions.some(s => recursiveBacktrackingSearch(s, visited, input, position))) {
245 |             return true;
246 |         }
247 |     } else {
248 |         const nextState = state.transition[input[position]];
249 | 
250 |         if (nextState) {
251 |             if (recursiveBacktrackingSearch(nextState, [], input, position + 1)) {
252 |                 return true;
253 |             }
254 |         } else {
255 |             if (state.epsilonTransitions.some(s => recursiveBacktrackingSearch(s, visited, input, position))) {
256 |                 return true;
257 |             }
258 |         }
259 | 
260 |         return false;
261 |     }
262 | }
263 | 
264 | /* 
265 |    Follows through the epsilon transitions of a state until reaching
266 |    a state with a symbol transition which gets added to the set of next states.
267 | */
268 | function addNextState(state, nextStates, visited) {
269 |     if (state.epsilonTransitions.length) {
270 |         for (const st of state.epsilonTransitions) {
271 |             if (!visited.find(vs => vs === st)) {
272 |                 visited.push(st);
273 |                 addNextState(st, nextStates, visited);
274 |             }
275 |         }
276 |     } else {
277 |         nextStates.push(state);
278 |     }
279 | }
280 | 
281 | /*
282 |   Process a string through an NFA. For each input symbol it transitions into in multiple states at the same time.
283 |   The string is matched if after reading the last symbol, is has transitioned into at least one end state.
284 | 
285 |   For an NFA with N states in can be at at most N states at a time. This algorighm finds a match by processing the input word once.
286 | */
287 | function search(nfa, word) {
288 |     let currentStates = [];
289 |     /* The initial set of current states is either the start state or
290 |        the set of states reachable by epsilon transitions from the start state */
291 |     addNextState(nfa.start, currentStates, []);
292 | 
293 |     for (const symbol of word) {
294 |         const nextStates = [];
295 | 
296 |         for (const state of currentStates) {
297 |             const nextState = state.transition[symbol];
298 |             if (nextState) {
299 |                 addNextState(nextState, nextStates, []);
300 |             }
301 |         }
302 | 
303 |         currentStates = nextStates;
304 |     }
305 | 
306 |     return currentStates.find(s => s.isEnd) ? true : false;
307 | }
308 | 
309 | function recognize(nfa, word) {
310 |     // return recursiveBacktrackingSearch(nfa.start, [], word, 0);
311 |     return search(nfa, word);
312 | }
313 | 
314 | module.exports = {
315 |     toNFA,
316 |     toNFAFromInfixExp,
317 |     recognize
318 | };
319 | 


--------------------------------------------------------------------------------
/src/parser.js:
--------------------------------------------------------------------------------
 1 | function insertExplicitConcatOperator(exp) {
 2 |     let output = '';
 3 | 
 4 |     for (let i = 0; i < exp.length; i++) {
 5 |         const token = exp[i];
 6 |         output += token;
 7 | 
 8 |         if (token === '(' || token === '|') {
 9 |             continue;
10 |         }
11 | 
12 |         if (i < exp.length - 1) {
13 |             const lookahead = exp[i + 1];
14 | 
15 |             if (lookahead === '*' || lookahead === '?' || lookahead === '+' || lookahead === '|' || lookahead === ')') {
16 |                 continue;
17 |             }
18 | 
19 |             output += '.';
20 |         }
21 |     }
22 | 
23 |     return output;
24 | };
25 | 
26 | function peek(stack) {
27 |     return stack.length && stack[stack.length - 1];
28 | }
29 | 
30 | const operatorPrecedence = {
31 |     '|': 0,
32 |     '.': 1,
33 |     '?': 2,
34 |     '*': 2,
35 |     '+': 2
36 | };
37 | 
38 | function toPostfix(exp) {
39 |     let output = '';
40 |     const operatorStack = [];
41 | 
42 |     for (const token of exp) {
43 |         if (token === '.' || token === '|' || token === '*' || token === '?' || token === '+') {
44 |             while (operatorStack.length && peek(operatorStack) !== '('
45 |                 && operatorPrecedence[peek(operatorStack)] >= operatorPrecedence[token]) {
46 |                 output += operatorStack.pop();
47 |             }
48 | 
49 |             operatorStack.push(token);
50 |         } else if (token === '(' || token === ')') {
51 |             if (token === '(') {
52 |                 operatorStack.push(token);
53 |             } else {
54 |                 while (peek(operatorStack) !== '(') {
55 |                     output += operatorStack.pop();
56 |                 }
57 |                 operatorStack.pop();
58 |             }
59 |         } else {
60 |             output += token;
61 |         }
62 |     }
63 | 
64 |     while (operatorStack.length) {
65 |         output += operatorStack.pop();
66 |     }
67 | 
68 |     return output;
69 | };
70 | 
71 | module.exports = {
72 |     insertExplicitConcatOperator,
73 |     toPostfix
74 | };
75 | 


--------------------------------------------------------------------------------
/src/parser2.js:
--------------------------------------------------------------------------------
  1 | /* 
  2 |     Recursive descent parser for regular expressions. Implements the following grammar:
  3 | 
  4 |     Expr -> Term | Term '|' Expr
  5 |     Term -> Factor | Factor Term
  6 |     Factor -> Atom | Atom MetaChar
  7 |     Atom -> Char | '(' Expr ')'
  8 |     Char -> AnyCharExceptMeta | '\' AnyChar
  9 |     MetaChar -> '?' | '*' | '+'
 10 | */
 11 | 
 12 | /**
 13 | * @param{string} label
 14 | * @param{TreeNode[]} children
 15 | */
 16 | function TreeNode(label, children) {
 17 |     this.label = label;
 18 |     this.children = children || [];
 19 | }
 20 | 
 21 | let pattern = '';
 22 | let pos = 0;
 23 | 
 24 | const peek = () => pattern[pos];
 25 | const hasMoreChars = () => pos < pattern.length;
 26 | const isMetaChar = ch => ch === '*' || ch === '+' || ch === '?';
 27 | 
 28 | function match(ch) {
 29 |     if (peek() !== ch)
 30 |         throw new Error(`Unexpected symbol ${ch}`);
 31 |     pos++;
 32 | }
 33 | 
 34 | function next() {
 35 |     let ch = peek();
 36 |     match(ch);
 37 | 
 38 |     return ch;
 39 | }
 40 | 
 41 | function expr() {
 42 |     const trm = term();
 43 | 
 44 |     if (hasMoreChars() && peek() === '|') {
 45 |         match('|');
 46 |         const exp = expr();
 47 |         return new TreeNode('Expr', [trm, new TreeNode('|'), exp]);
 48 |     }
 49 | 
 50 |     return new TreeNode('Expr', [trm]);
 51 | }
 52 | 
 53 | function term() {
 54 |     const factr = factor();
 55 | 
 56 |     if (hasMoreChars() && peek() !== ')' && peek() !== '|') {
 57 |         const trm = term();
 58 |         return new TreeNode('Term', [factr, trm]);
 59 |     }
 60 | 
 61 |     return new TreeNode('Term', [factr]);
 62 | }
 63 | 
 64 | function factor() {
 65 |     const atm = atom();
 66 | 
 67 |     if (hasMoreChars() && isMetaChar(peek())) {
 68 |         const meta = next();
 69 |         return new TreeNode('Factor', [atm, new TreeNode(meta)]);
 70 |     }
 71 | 
 72 |     return new TreeNode('Factor', [atm]);
 73 | }
 74 | 
 75 | function atom() {
 76 |     if (peek() === '(') {
 77 |         match('(');
 78 |         const exp = expr();
 79 |         match(')');
 80 |         return new TreeNode('Atom', [new TreeNode('('), exp, new TreeNode(')')]);
 81 |     }
 82 | 
 83 |     const ch = char();
 84 |     return new TreeNode('Atom', [ch]);
 85 | }
 86 | 
 87 | function char() {
 88 |     if (isMetaChar(peek()))
 89 |         throw new Error(`Unexpected meta char ${peek()}`);
 90 | 
 91 |     if (peek() === '\\') {
 92 |         match('\\');
 93 |         return new TreeNode('Char', [new TreeNode('\\'), new TreeNode(next())]);
 94 |     }
 95 | 
 96 |     return new TreeNode('Char', [new TreeNode(next())]);
 97 | }
 98 | 
 99 | function toParseTree(regex) {
100 |     pattern = regex;
101 |     pos = 0;
102 | 
103 |     return expr();
104 | }
105 | 
106 | module.exports = { toParseTree };
107 | 


--------------------------------------------------------------------------------
/src/regex.js:
--------------------------------------------------------------------------------
 1 | const { insertExplicitConcatOperator, toPostfix } = require('./parser');
 2 | const { toNFA, toNFAFromInfixExp, recognize } = require('./nfa');
 3 | 
 4 | function createMatcher(exp) {
 5 |     // Generates an NFA using a stack
 6 |     // const expWithConcatenationOperator = insertExplicitConcatOperator(exp);
 7 |     // const postfixExp = toPostfix(expWithConcatenationOperator);
 8 |     // const nfa = toNFA(postfixExp);
 9 | 
10 |     // Generates an NFA by constructing a parse tree
11 |     // No explicit concatenation operator required
12 |     const nfa = toNFAFromInfixExp(exp);
13 | 
14 |     return word => recognize(nfa, word);
15 | }
16 | 
17 | module.exports = { createMatcher };
18 | 


--------------------------------------------------------------------------------