├── .gitignore
├── LICENSE
├── README.md
├── pom.xml
└── src
└── main
└── java
└── xyz
└── xindoo
└── re
├── Regex.java
├── RegexTest.java
├── common
├── Constant.java
├── Reader.java
├── State.java
└── StateType.java
├── dfa
├── DFAGraph.java
└── DFAState.java
└── nfa
├── NFAGraph.java
├── NFAState.java
└── strategy
├── CharMatchStrategy.java
├── CharSetMatchStrategy.java
├── DigitalMatchStrategy.java
├── DotMatchStrategy.java
├── EpsilonMatchStrategy.java
├── MatchStrategy.java
├── MatchStrategyManager.java
├── SpaceMatchStrategy.java
└── WMatchStrategy.java
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled class file
2 | *.class
3 |
4 | # Log file
5 | *.log
6 |
7 | # BlueJ files
8 | *.ctxt
9 |
10 | # Mobile Tools for Java (J2ME)
11 | .mtj.tmp/
12 |
13 | # Package Files #
14 | *.jar
15 | *.war
16 | *.nar
17 | *.ear
18 | *.zip
19 | *.tar.gz
20 | *.rar
21 | *.iml
22 | /.idea
23 | /target
24 | *.log.*
25 | *.log
26 | .DS_Store
27 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
28 | hs_err_pid*
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 xindoo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # regex
2 |
3 |
4 | 最近学习编译原理,基于NFA实现了正则表达式,代码刚写完,具体内容参考博客[https://blog.csdn.net/xindoo/article/details/105875239](https://blog.csdn.net/xindoo/article/details/105875239),欢迎查阅。
5 | 已实现NFA转DFA,详见博客[从0到1打造正则表达式执行引擎(二)](https://xindoo.blog.csdn.net/article/details/106458165).
6 |
7 | 目前还是demo,算是刚把引擎的骨架搭建起来,后续继续完善代码。
8 |
9 | ## 是什么不是什么?
10 | 这个引擎不会是一个可以用在生产环境的项目,但会是一个了解正则引擎背后工作原理的项目。
11 |
12 | ## 现状
13 | 目前支持的语义
14 | 基本语义: . ? * + () |
15 | 字符集合: []
16 | 非打印字符: \d \D \s \S \w \W
17 | 支持DFA和NFA双引擎
18 |
19 | ## Todo
20 | - [ ] 支持`{}`限定符
21 | - [ ] 支持 `^ $ \b` 等定位符
22 | - [x] 实现DFA引擎
23 | - [ ] DFA最小化(Hopcroft算法)
24 | - [ ] 支持捕获和引用
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | xyz.xindoo
8 | regex
9 | 0.0.2-SNAPSHOT
10 |
11 |
12 | org.openjdk.jmh
13 | jmh-generator-annprocess
14 | 1.21
15 |
16 |
17 |
18 |
19 |
20 |
21 | org.apache.maven.plugins
22 | maven-compiler-plugin
23 |
24 | 8
25 | 8
26 |
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/Regex.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re;
2 |
3 | import xyz.xindoo.re.common.Constant;
4 | import xyz.xindoo.re.common.Reader;
5 | import xyz.xindoo.re.common.State;
6 | import xyz.xindoo.re.common.StateType;
7 | import xyz.xindoo.re.dfa.DFAGraph;
8 | import xyz.xindoo.re.dfa.DFAState;
9 | import xyz.xindoo.re.nfa.NFAGraph;
10 | import xyz.xindoo.re.nfa.NFAState;
11 | import xyz.xindoo.re.nfa.strategy.MatchStrategy;
12 | import xyz.xindoo.re.nfa.strategy.MatchStrategyManager;
13 |
14 | import java.util.ArrayDeque;
15 | import java.util.Collections;
16 | import java.util.HashSet;
17 | import java.util.LinkedList;
18 | import java.util.List;
19 | import java.util.Map;
20 | import java.util.Queue;
21 | import java.util.Set;
22 |
23 | public class Regex {
24 | private NFAGraph nfaGraph;
25 | private DFAGraph dfaGraph;
26 |
27 | public static Regex compile(String regex) {
28 | if (regex == null || regex.length() == 0) {
29 | return null;
30 | }
31 | NFAGraph nfaGraph = regex2nfa(regex);
32 | nfaGraph.end.setStateType(StateType.END); // 将NFA的end节点标记为终止态
33 | DFAGraph dfaGraph = convertNfa2Dfa(nfaGraph);
34 | return new Regex(nfaGraph, dfaGraph);
35 | }
36 |
37 | private Regex(NFAGraph nfaGraph, DFAGraph dfaGraph) {
38 | this.nfaGraph = nfaGraph;
39 | // printNfa();
40 | this.dfaGraph = dfaGraph;
41 | // printDfa();
42 | }
43 |
44 | /**
45 | * 有向图的广度优先遍历
46 | */
47 | public void printNfa() {
48 | Queue queue = new ArrayDeque<>();
49 | Set addedStates = new HashSet<>();
50 | queue.add(nfaGraph.start);
51 | addedStates.add(nfaGraph.start.getId());
52 | while (!queue.isEmpty()) {
53 | State curState = queue.poll();
54 | for (Map.Entry> entry : curState.next.entrySet()) {
55 | String key = entry.getKey();
56 | Set nexts = entry.getValue();
57 | for (State next : nexts) {
58 | System.out.printf("%2d->%2d %s\n", curState.getId(), next.getId(), key);
59 | if (!addedStates.contains(next.getId())) {
60 | queue.add(next);
61 | addedStates.add(next.getId());
62 | }
63 | }
64 | }
65 | }
66 | }
67 |
68 | public void printDfa() {
69 | Queue queue = new ArrayDeque<>();
70 | Set addedStates = new HashSet<>();
71 | queue.add(dfaGraph.start);
72 | addedStates.add(dfaGraph.start.getAllStateIds());
73 | while (!queue.isEmpty()) {
74 | State curState = queue.poll();
75 | for (Map.Entry> entry : curState.next.entrySet()) {
76 | String key = entry.getKey();
77 | Set nexts = entry.getValue();
78 | for (State next : nexts) {
79 | System.out.printf("%s -> %s %s \n", ((DFAState)curState).getAllStateIds(),((DFAState)next).getAllStateIds(), key);
80 | if (!addedStates.contains(((DFAState)next).getAllStateIds())) {
81 | queue.add(next);
82 | addedStates.add(((DFAState)next).getAllStateIds());
83 | }
84 | }
85 | }
86 | }
87 | }
88 |
89 | private static NFAGraph regex2nfa(String regex) {
90 | Reader reader = new Reader(regex);
91 | NFAGraph nfaGraph = null;
92 | while (reader.hasNext()) {
93 | char ch = reader.next();
94 | String edge = null;
95 | switch (ch) {
96 | // 子表达式特殊处理
97 | case '(' : {
98 | String subRegex = reader.getSubRegex(reader);
99 | NFAGraph newNFAGraph = regex2nfa(subRegex);
100 | checkRepeat(reader, newNFAGraph);
101 | if (nfaGraph == null) {
102 | nfaGraph = newNFAGraph;
103 | } else {
104 | nfaGraph.addSeriesGraph(newNFAGraph);
105 | }
106 | break;
107 | }
108 | // 或表达式特殊处理
109 | case '|' : {
110 | String remainRegex = reader.getRemainRegex(reader);
111 | NFAGraph newNFAGraph = regex2nfa(remainRegex);
112 | if (nfaGraph == null) {
113 | nfaGraph = newNFAGraph;
114 | } else {
115 | nfaGraph.addParallelGraph(newNFAGraph);
116 | }
117 | break;
118 | }
119 | case '[' : {
120 | edge = getCharSetMatch(reader);
121 | break;
122 | }
123 | // 暂时未支持零宽断言
124 | case '^' : {
125 | break;
126 | }
127 | // 暂未支持
128 | case '$' : {
129 | break;
130 | }
131 | case '.' : {
132 | edge = ".";
133 | break;
134 | }
135 | // 处理特殊占位符
136 | case '\\' : {
137 | char nextCh = reader.next();
138 | switch (nextCh) {
139 | case 'd': {
140 | edge = "\\d";
141 | break;
142 | }
143 | case 'D': {
144 | edge = "\\D";
145 | break;
146 | }
147 | case 'w': {
148 | edge = "\\w";
149 | break;
150 | }
151 | case 'W': {
152 | edge = "\\W";
153 | break;
154 | }
155 | case 's': {
156 | edge = "\\s";
157 | break;
158 | }
159 | case 'S': {
160 | edge = "\\S";
161 | break;
162 | }
163 | // 转义后的字符匹配
164 | default:{
165 | edge = String.valueOf(nextCh);
166 | break;
167 | }
168 | }
169 | break;
170 | }
171 |
172 | default : { // 处理普通字符
173 | edge = String.valueOf(ch);
174 | break;
175 | }
176 | }
177 | if (edge != null) {
178 | NFAState start = new NFAState();
179 | NFAState end = new NFAState();
180 | start.addNext(edge, end);
181 | NFAGraph newNFAGraph = new NFAGraph(start, end);
182 | checkRepeat(reader, newNFAGraph);
183 | if (nfaGraph == null) {
184 | nfaGraph = newNFAGraph;
185 | } else {
186 | nfaGraph.addSeriesGraph(newNFAGraph);
187 | }
188 | }
189 | }
190 | return nfaGraph;
191 | }
192 |
193 | /**
194 | * 使用子集构造法把nfa转成dfa,具体可以参考博客 https://blog.csdn.net/xindoo/article/details/106458165
195 | */
196 | private static DFAGraph convertNfa2Dfa(NFAGraph nfaGraph) {
197 | DFAGraph dfaGraph = new DFAGraph();
198 | Set startStates = new HashSet<>();
199 | // 用NFA图的起始节点构造DFA的起始节点
200 | startStates.addAll(getNextEStates(nfaGraph.start, new HashSet<>()));
201 | if (startStates.size() == 0) {
202 | startStates.add(nfaGraph.start);
203 | }
204 | dfaGraph.start = dfaGraph.getOrBuild(startStates);
205 | Queue queue = new LinkedList<>();
206 | Set finishedStates = new HashSet<>();
207 | // 如果BFS的方式从已找到的起始节点遍历并构建DFA
208 | queue.add(dfaGraph.start);
209 |
210 | while (!queue.isEmpty()) {
211 | // 对当前节点已添加的边做去重,不放到queue和next里.
212 | Set addedNextStates = new HashSet<>();
213 | DFAState curState = queue.poll();
214 | for (State nfaState : curState.nfaStates) {
215 | Set nextStates = new HashSet<>();
216 | Set finishedEdges = new HashSet<>();
217 | finishedEdges.add(Constant.EPSILON);
218 | for (String edge : nfaState.next.keySet()) {
219 | if (finishedEdges.contains(edge)) {
220 | continue;
221 | }
222 | finishedEdges.add(edge);
223 | Set efinishedState = new HashSet<>();
224 | for (State state : curState.nfaStates) {
225 | Set edgeStates = state.next.getOrDefault(edge, Collections.emptySet());
226 | nextStates.addAll(edgeStates);
227 | for (State eState : edgeStates) {
228 | // 添加E可达节点
229 | if (efinishedState.contains(eState)) {
230 | continue;
231 | }
232 | nextStates.addAll(getNextEStates(eState, efinishedState));
233 | efinishedState.add(eState);
234 | }
235 | }
236 | // 将NFA节点列表转化为DFA节点,如果已经有对应的DFA节点就返回,否则创建一个新的DFA节点
237 | DFAState nextDFAstate = dfaGraph.getOrBuild(nextStates);
238 | if (!finishedStates.contains(nextDFAstate) && !addedNextStates.contains(nextDFAstate)) {
239 | queue.add(nextDFAstate);
240 | addedNextStates.add(nextDFAstate); // 对queue里的数据做去重
241 | curState.addNext(edge, nextDFAstate);
242 | }
243 | }
244 | }
245 | finishedStates.add(curState);
246 | }
247 | return dfaGraph;
248 | }
249 |
250 | private static void checkRepeat(Reader reader, NFAGraph newNFAGraph) {
251 | char nextCh = reader.peak();
252 | switch (nextCh) {
253 | case '*': {
254 | newNFAGraph.repeatStar();
255 | reader.next();
256 | break;
257 | } case '+': {
258 | newNFAGraph.repeatPlus();
259 | reader.next();
260 | break;
261 | } case '?' : {
262 | newNFAGraph.addSToE();
263 | reader.next();
264 | break;
265 | } case '{' : {
266 | // 暂未支持{}指定重复次数
267 | break;
268 | } default : {
269 | return;
270 | }
271 | }
272 | }
273 |
274 | /**
275 | * 获取[]中表示的字符集,只支持字母 数字
276 | * */
277 | private static String getCharSetMatch(Reader reader) {
278 | String charSet = "";
279 | char ch;
280 | while ((ch = reader.next()) != ']') {
281 | charSet += ch;
282 | }
283 | return charSet;
284 | }
285 |
286 | private static int[] getRange(Reader reader) {
287 | String rangeStr = "";
288 | char ch;
289 | while ((ch = reader.next()) != '}') {
290 | if (ch == ' ') {
291 | continue;
292 | }
293 | rangeStr += ch;
294 | }
295 | int[] res = new int[2];
296 | if (!rangeStr.contains(",")) {
297 | res[0] = Integer.parseInt(rangeStr);
298 | res[1] = res[0];
299 | } else {
300 | String[] se = rangeStr.split(",", -1);
301 | res[0] = Integer.parseInt(se[0]);
302 | if (se[1].length() == 0) {
303 | res[1] = Integer.MAX_VALUE;
304 | } else {
305 | res[1] = Integer.parseInt(se[1]);
306 | }
307 | }
308 | return res;
309 | }
310 |
311 | // 获取Epsilon可达节点列表
312 | private static Set getNextEStates(State curState, Set stateSet) {
313 | if (!curState.next.containsKey(Constant.EPSILON)) {
314 | return Collections.emptySet();
315 | }
316 | Set res = new HashSet<>();
317 | for (State state : curState.next.get(Constant.EPSILON)) {
318 | if (stateSet.contains(state)) {
319 | continue;
320 | }
321 | res.add(state);
322 | res.addAll(getNextEStates(state, stateSet));
323 | stateSet.add(state);
324 | }
325 | return res;
326 | }
327 |
328 | public boolean isMatch(String text) {
329 | return isMatch(text, 0);
330 | }
331 |
332 | public boolean isMatch(String text, int mode) {
333 | State start = nfaGraph.start;
334 | if (mode == 1) {
335 | start = dfaGraph.start;
336 | }
337 | return isMatch(text, 0, start);
338 | }
339 |
340 | /**
341 | * 匹配过程就是根据输入遍历图的过程, 这里DFA和NFA用了同样的代码, 但实际上因为DFA的特性是不会产生回溯的,
342 | * 所以DFA可以换成非递归的形式
343 | */
344 | private boolean isMatch(String text, int pos, State curState) {
345 | if (pos == text.length()) {
346 | for (State nextState : curState.next.getOrDefault(Constant.EPSILON, Collections.emptySet())) {
347 | if (isMatch(text, pos, nextState)) {
348 | return true;
349 | }
350 | }
351 | if (curState.isEndState()) {
352 | return true;
353 | }
354 | return false;
355 | }
356 |
357 | for (Map.Entry> entry : curState.next.entrySet()) {
358 | String edge = entry.getKey();
359 | // 这个if和else的先后顺序决定了是贪婪匹配还是非贪婪匹配
360 | if (Constant.EPSILON.equals(edge)) {
361 | // 如果是DFA模式,不会有EPSILON边,所以不会进这
362 | for (State nextState : entry.getValue()) {
363 | if (isMatch(text, pos, nextState)) {
364 | return true;
365 | }
366 | }
367 | } else {
368 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge);
369 | if (!matchStrategy.isMatch(text.charAt(pos), edge)) {
370 | continue;
371 | }
372 | // 遍历匹配策略
373 | for (State nextState : entry.getValue()) {
374 | // 如果是DFA匹配模式,entry.getValue()虽然是set,但里面只会有一个元素,所以不需要回溯
375 | if (nextState instanceof DFAState) {
376 | return isMatch(text, pos + 1, nextState);
377 | }
378 | if (isMatch(text, pos + 1, nextState)) {
379 | return true;
380 | }
381 | }
382 | }
383 | }
384 | return false;
385 | }
386 |
387 | public boolean isDfaMatch(String text) {
388 | return isDfaMatch(text, 0, dfaGraph.start);
389 | }
390 |
391 | private boolean isDfaMatch(String text, int pos, State startState) {
392 | State curState = startState;
393 | while (pos < text.length()) {
394 | boolean canContinue = false;
395 | for (Map.Entry> entry : curState.next.entrySet()) {
396 | String edge = entry.getKey();
397 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge);
398 | if (matchStrategy.isMatch(text.charAt(pos), edge)) {
399 | curState = entry.getValue().stream().findFirst().orElse(null);
400 | pos++;
401 | canContinue = true;
402 | break;
403 | }
404 | }
405 | if (!canContinue) {
406 | return false;
407 | }
408 | }
409 | return curState.isEndState();
410 | }
411 |
412 | public List match(String text) {
413 | return match(text, 0);
414 | }
415 |
416 | public List match(String text, int mod) {
417 | int s = 0;
418 | int e = -1;
419 | List res = new LinkedList<>();
420 | while (s != text.length()) {
421 | e = getMatchEnd(text, s, dfaGraph.start);
422 | if (e != -1) {
423 | res.add(text.substring(s, e));
424 | s = e;
425 | } else {
426 | s++;
427 | }
428 | }
429 | return res;
430 | }
431 |
432 | // 获取正则表达式在字符串中能匹配到的结尾的位置
433 | private int getMatchEnd(String text, int pos, State curState) {
434 | int end = -1;
435 | if (curState.isEndState()) {
436 | return pos;
437 | }
438 |
439 | if (pos == text.length()) {
440 | for (State nextState : curState.next.getOrDefault(Constant.EPSILON, Collections.emptySet())) {
441 | end = getMatchEnd(text, pos, nextState);
442 | if (end != -1) {
443 | return end;
444 | }
445 | }
446 | }
447 |
448 | for (Map.Entry> entry : curState.next.entrySet()) {
449 | String edge = entry.getKey();
450 | if (Constant.EPSILON.equals(edge)) {
451 | for (State nextState : entry.getValue()) {
452 | end = getMatchEnd(text, pos, nextState);
453 | if (end != -1) {
454 | return end;
455 | }
456 | }
457 | } else {
458 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge);
459 | if (!matchStrategy.isMatch(text.charAt(pos), edge)) {
460 | continue;
461 | }
462 | // 遍历匹配策略
463 | for (State nextState : entry.getValue()) {
464 | end = getMatchEnd(text, pos + 1, nextState);
465 | if (end != -1) {
466 | return end;
467 | }
468 | }
469 | }
470 | }
471 | return -1;
472 | }
473 | // todo, 使用hopcraft算法将dfa最小化
474 | private DFAGraph hopcroft(DFAGraph dfaGraph){
475 | return new DFAGraph();
476 | }
477 | }
478 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/RegexTest.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re;
2 |
3 | import org.openjdk.jmh.annotations.Benchmark;
4 | import org.openjdk.jmh.annotations.Fork;
5 | import org.openjdk.jmh.annotations.Measurement;
6 | import org.openjdk.jmh.annotations.Threads;
7 | import org.openjdk.jmh.annotations.Warmup;
8 | import org.openjdk.jmh.runner.Runner;
9 | import org.openjdk.jmh.runner.options.Options;
10 | import org.openjdk.jmh.runner.options.OptionsBuilder;
11 |
12 | import java.util.List;
13 |
14 | public class RegexTest {
15 |
16 | // private static Regex regex = Regex.compile("a(b|c)*");
17 | // private static String[] strs = {"ac", "acc", "a", "a bcccdb", "ab", "abcd", "a3abcd", "a33333defd", "aabcabcabcabcabcabcdb",
18 | // "abbbbbbbbb", "acccccccbad", "acccccccccccccccccccccccccb", "abbbbbbbbbbbbbbbc"};
19 | //
20 | // @Benchmark
21 | // @Measurement(iterations = 2)
22 | // @Threads(1)
23 | // @Fork(0)
24 | // @Warmup(iterations = 0)
25 | // public void nfa() {
26 | // for (String str : strs) {
27 | // regex.isMatch(str);
28 | // }
29 | // }
30 | //
31 | // @Benchmark
32 | // @Measurement(iterations = 2)
33 | // @Threads(1)
34 | // @Fork(0)
35 | // @Warmup(iterations = 0)
36 | // public void dfaRecursion() {
37 | // for (String str : strs) {
38 | // regex.isMatch(str, 1);
39 | // }
40 | // }
41 | //
42 | // @Benchmark
43 | // @Measurement(iterations = 2)
44 | // @Threads(1)
45 | // @Fork(0)
46 | // @Warmup(iterations = 0)
47 | // public void dfaNonRecursion() {
48 | // for (String str : strs) {
49 | // regex.isDfaMatch(str);
50 | // }
51 | // }
52 |
53 | public static void main(String[] args) {
54 | test();
55 | // Options options = new OptionsBuilder().include(RegexTest.class.getSimpleName()).build();
56 | // try {
57 | // new Runner(options).run();
58 | // } catch (Exception e) {
59 | // System.out.println(e.fillInStackTrace());
60 | // } finally {
61 | // System.out.println("finshed");
62 | // }
63 | }
64 |
65 | private static void test() {
66 | String str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab";
67 | Regex regex1 = Regex.compile("a*aaaaaaaaaaaaaaaaaaaaaab");
68 | System.out.println(regex1.isDfaMatch(str));
69 | System.out.println("_________________");
70 | System.out.println(regex1.isMatch(str));
71 | System.out.println("_________________");
72 | // Regex regex = Regex.compile("a(b|c)*");
73 | // List res = regex.match("aabacabbbcaccc");
74 | // regex.printNfa();
75 | // System.out.println("");
76 | // regex.printDfa();
77 | //
78 | // System.out.println(regex.isMatch("ac"));
79 | // System.out.println(regex.isMatch("acc"));
80 | // System.out.println(regex.isMatch("a"));
81 | // System.out.println(regex.isMatch("a bcccdb"));
82 | // System.out.println(regex.isMatch("ab"));
83 | // System.out.println(regex.isMatch("abcd"));
84 | // System.out.println(regex.isMatch("a3abcd"));
85 | // System.out.println(regex.isMatch("a33333defd"));
86 | // System.out.println(regex.isMatch("aabcabcabcabcabcabcdb"));
87 | //
88 | // System.out.println("*********");
89 | // System.out.println(regex.isDfaMatch("ac"));
90 | // System.out.println(regex.isDfaMatch("acc"));
91 | // System.out.println(regex.isDfaMatch("a"));
92 | // System.out.println(regex.isDfaMatch("a bcccdb"));
93 | // System.out.println(regex.isDfaMatch("ab"));
94 | // System.out.println(regex.isDfaMatch("abcd"));
95 | // System.out.println(regex.isDfaMatch("a3abcd"));
96 | // System.out.println(regex.isDfaMatch("a33333defd"));
97 | // System.out.println(regex.isDfaMatch("aabcabcabcabcabcabcdb"));
98 | //
99 | // System.out.println("*********");
100 | // System.out.println(regex.isMatch("ac", 1));
101 | // System.out.println(regex.isMatch("acc", 1));
102 | // System.out.println(regex.isMatch("a", 1));
103 | // System.out.println(regex.isMatch("a bcccdb", 1));
104 | // System.out.println(regex.isMatch("ab", 1));
105 | // System.out.println(regex.isMatch("abcd", 1));
106 | // System.out.println(regex.isMatch("a3abcd", 1));
107 | // System.out.println(regex.isMatch("a33333defd", 1));
108 | // System.out.println(regex.isMatch("aabcabcabcabcabcabcdb", 1));
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/common/Constant.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.common;
2 |
3 | public interface Constant {
4 | String EPSILON = "Epsilon";
5 | String CHAR = "char";
6 | String CHARSET = "charSet";
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/common/Reader.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.common;
2 |
3 | public class Reader {
4 | private int cur = 0;
5 | private char[] chars;
6 | public Reader(String regex) {
7 | this.chars = regex.toCharArray();
8 | }
9 | public char peak() {
10 | if (cur == chars.length) {
11 | return '\0';
12 | }
13 | return chars[cur];
14 | }
15 |
16 | public char next() {
17 | if (cur == chars.length) {
18 | return '\0';
19 | }
20 | return chars[cur++];
21 | }
22 |
23 | public boolean hasNext() {
24 | return cur < chars.length;
25 | }
26 |
27 | public String getSubRegex(Reader reader) {
28 | int cntParem = 1;
29 | String regex = "";
30 | while (reader.hasNext()) {
31 | char ch = reader.next();
32 | if (ch == '(') {
33 | cntParem++;
34 | } else if (ch == ')') {
35 | cntParem--;
36 | if (cntParem == 0) {
37 | break;
38 | } else {
39 | }
40 | }
41 | regex += ch;
42 | }
43 | return regex;
44 | }
45 |
46 | public String getRemainRegex(Reader reader) {
47 | String regex = "";
48 | while (reader.hasNext()) {
49 | regex += reader.next();
50 | }
51 | return regex;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/common/State.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.common;
2 |
3 | import java.util.HashMap;
4 | import java.util.HashSet;
5 | import java.util.Map;
6 | import java.util.Set;
7 |
8 | public class State {
9 | protected static int idCnt = 0;
10 | protected int id;
11 | protected StateType stateType;
12 |
13 | public State() {
14 | this.id = idCnt++;
15 | this.stateType = StateType.GENERAL;
16 | }
17 |
18 | public Map> next = new HashMap<>();
19 |
20 | public void addNext(String edge, State nfaState) {
21 | Set set = next.get(edge);
22 | if (set == null) {
23 | set = new HashSet<>();
24 | next.put(edge, set);
25 | }
26 | set.add(nfaState);
27 | }
28 |
29 | public void setStateType(StateType stateType) {
30 | this.stateType = stateType;
31 | }
32 |
33 | public boolean isEndState() {
34 | return stateType == StateType.END;
35 | }
36 |
37 | public int getId() {
38 | return this.id;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/common/StateType.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.common;
2 |
3 | public enum StateType {
4 | GENERAL, END
5 | }
6 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/dfa/DFAGraph.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.dfa;
2 |
3 | import xyz.xindoo.re.common.State;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 | import java.util.Set;
8 |
9 | public class DFAGraph {
10 |
11 | private Map nfaStates2dfaState = new HashMap<>();
12 | public DFAState start = new DFAState();
13 |
14 | // 这里用map保存NFAState结合是已有对应的DFAState, 有就直接拿出来用
15 | public DFAState getOrBuild(Set states) {
16 | String allStateIds = "";
17 | int[] ids = states.stream()
18 | .mapToInt(state -> state.getId())
19 | .sorted()
20 | .toArray();
21 | for (int id : ids) {
22 | allStateIds += "#";
23 | allStateIds += id;
24 | }
25 | if (!nfaStates2dfaState.containsKey(allStateIds)) {
26 | DFAState dfaState = new DFAState(allStateIds, states);
27 | nfaStates2dfaState.put(allStateIds, dfaState);
28 | }
29 | return nfaStates2dfaState.get(allStateIds);
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/dfa/DFAState.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.dfa;
2 |
3 | import xyz.xindoo.re.common.State;
4 | import xyz.xindoo.re.common.StateType;
5 |
6 | import java.util.HashSet;
7 | import java.util.Objects;
8 | import java.util.Set;
9 |
10 | public class DFAState extends State {
11 | public Set nfaStates = new HashSet<>();
12 | // 保存对应NFAState的id,一个DFAState可能是多个NFAState的集合,所以拼接成String
13 | private String allStateIds;
14 | public DFAState() {
15 | this.stateType = StateType.GENERAL;
16 | }
17 |
18 | public DFAState(String allStateIds, Set states) {
19 | this.allStateIds = allStateIds;
20 | this.nfaStates.addAll(states);
21 |
22 | for (State state : states) {
23 | if (state.isEndState()) { // 如果有任意节点是终止态,新建的DFA节点就是终止态
24 | this.stateType = StateType.END;
25 | }
26 | }
27 | }
28 |
29 | public String getAllStateIds() {
30 | return allStateIds;
31 | }
32 |
33 | @Override
34 | public boolean equals(Object o) {
35 | if (this == o) {
36 | return true;
37 | }
38 | return allStateIds.equals(((DFAState)o).allStateIds);
39 | }
40 |
41 | @Override
42 | public int hashCode() {
43 | return Objects.hash(allStateIds);
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/NFAGraph.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa;
2 |
3 | import xyz.xindoo.re.common.Constant;
4 |
5 | public class NFAGraph {
6 | public NFAState start;
7 | public NFAState end;
8 |
9 | public NFAGraph(NFAState start, NFAState end) {
10 | this.start = start;
11 | this.end = end;
12 | }
13 |
14 | // |
15 | public void addParallelGraph(NFAGraph NFAGraph) {
16 | NFAState newStart = new NFAState();
17 | NFAState newEnd = new NFAState();
18 | newStart.addNext(Constant.EPSILON, this.start);
19 | newStart.addNext(Constant.EPSILON, NFAGraph.start);
20 | this.end.addNext(Constant.EPSILON, newEnd);
21 | NFAGraph.end.addNext(Constant.EPSILON, newEnd);
22 | this.start = newStart;
23 | this.end = newEnd;
24 | }
25 |
26 | //
27 | public void addSeriesGraph(NFAGraph NFAGraph) {
28 | this.end.addNext(Constant.EPSILON, NFAGraph.start);
29 | this.end = NFAGraph.end;
30 | }
31 |
32 | // * 重复0-n次
33 | public void repeatStar() {
34 | repeatPlus();
35 | addSToE(); // 重复0
36 | }
37 |
38 | // ? 重复0次哦
39 | public void addSToE() {
40 | start.addNext(Constant.EPSILON, end);
41 | }
42 |
43 | // + 重复1-n次
44 | public void repeatPlus() {
45 | NFAState newStart = new NFAState();
46 | NFAState newEnd = new NFAState();
47 | newStart.addNext(Constant.EPSILON, this.start);
48 | end.addNext(Constant.EPSILON, newEnd);
49 | end.addNext(Constant.EPSILON, start);
50 | this.start = newStart;
51 | this.end = newEnd;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/NFAState.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa;
2 |
3 | import xyz.xindoo.re.common.State;
4 |
5 | public class NFAState extends State {
6 |
7 | }
8 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/CharMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class CharMatchStrategy extends MatchStrategy{
4 |
5 | @Override
6 | public boolean isMatch(char c, String edge) {
7 | return edge.charAt(0) == c;
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/CharSetMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class CharSetMatchStrategy extends MatchStrategy {
4 | @Override
5 | public boolean isMatch(char c, String charSet) {
6 | boolean res = false;
7 | for (int i = 0; i < charSet.length(); i++) {
8 | if (charSet.charAt(0) == '^') {
9 | continue;
10 | }
11 | if ('-' == charSet.charAt(i)) {
12 | return c >= charSet.charAt(i-1) && c <= charSet.charAt(i+1);
13 | }
14 | if (c == charSet.charAt(i)) {
15 | res = true;
16 | break;
17 | }
18 | }
19 | if (charSet.charAt(0) == '^') {
20 | return !res;
21 | }
22 | return res;
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/DigitalMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class DigitalMatchStrategy extends MatchStrategy {
4 | private boolean isReverse;
5 |
6 | public DigitalMatchStrategy(boolean isReverse) {
7 | this.isReverse = isReverse;
8 | }
9 |
10 | @Override
11 | public boolean isMatch(char c, String edge) {
12 | boolean res = c >= '0' && c <= '9';
13 | if (isReverse) {
14 | return !res;
15 | }
16 | return res;
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/DotMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class DotMatchStrategy extends MatchStrategy {
4 | @Override
5 | public boolean isMatch(char c, String edge) {
6 | return c != '\n' && c != '\r';
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/EpsilonMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class EpsilonMatchStrategy extends MatchStrategy {
4 | @Override
5 | public boolean isMatch(char c, String edge) {
6 | return true;
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/MatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class MatchStrategy {
4 | protected boolean isReverse = false;
5 |
6 | public boolean isMatch(char c, String edge) {
7 | return false;
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/MatchStrategyManager.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | import xyz.xindoo.re.common.Constant;
4 |
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | public class MatchStrategyManager {
9 | private static Map matchStrategyMap;
10 |
11 | static {
12 | matchStrategyMap = new HashMap<>();
13 | matchStrategyMap.put("\\d", new DigitalMatchStrategy(false));
14 | matchStrategyMap.put("\\D", new DigitalMatchStrategy(true));
15 | matchStrategyMap.put("\\w", new WMatchStrategy(false));
16 | matchStrategyMap.put("\\W", new WMatchStrategy(true));
17 | matchStrategyMap.put("\\s", new SpaceMatchStrategy(false));
18 | matchStrategyMap.put("\\S", new SpaceMatchStrategy(true));
19 | matchStrategyMap.put(".", new DotMatchStrategy());
20 | matchStrategyMap.put(Constant.EPSILON, new EpsilonMatchStrategy());
21 | matchStrategyMap.put(Constant.CHAR, new CharMatchStrategy());
22 | matchStrategyMap.put(Constant.CHARSET, new CharSetMatchStrategy());
23 | }
24 |
25 | public static MatchStrategy getStrategy(String key) {
26 | // 特殊字符的匹配
27 | if (matchStrategyMap.containsKey(key)) {
28 | return matchStrategyMap.get(key);
29 | }
30 | // 单字符和字符集的匹配
31 | if (key.length() == 1) {
32 | return matchStrategyMap.get(Constant.CHAR);
33 | } else {
34 | return matchStrategyMap.get(Constant.CHARSET);
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/SpaceMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | public class SpaceMatchStrategy extends MatchStrategy {
4 | private boolean isReverse;
5 |
6 | public SpaceMatchStrategy(boolean isReverse) {
7 | this.isReverse = isReverse;
8 | }
9 |
10 | @Override
11 | public boolean isMatch(char c, String edge) {
12 | boolean res = (c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == ' ');
13 | if (isReverse) {
14 | return !res;
15 | }
16 | return res;
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/xyz/xindoo/re/nfa/strategy/WMatchStrategy.java:
--------------------------------------------------------------------------------
1 | package xyz.xindoo.re.nfa.strategy;
2 |
3 | /**
4 | * 匹配 \w和\W
5 | */
6 | public class WMatchStrategy extends MatchStrategy {
7 |
8 | public WMatchStrategy(boolean isReverse) {
9 | this.isReverse = isReverse;
10 | }
11 |
12 | @Override
13 | public boolean isMatch(char c, String edge) {
14 | boolean res = c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9';
15 | if (isReverse) {
16 | return !res;
17 | }
18 | return res;
19 | }
20 | }
21 |
--------------------------------------------------------------------------------