├── .gitignore ├── LICENSE ├── README.md ├── pom.xml └── src └── main └── java └── xyz └── xindoo └── re ├── Regex.java ├── RegexTest.java ├── common ├── Constant.java ├── Reader.java ├── State.java └── StateType.java ├── dfa ├── DFAGraph.java └── DFAState.java └── nfa ├── NFAGraph.java ├── NFAState.java └── strategy ├── CharMatchStrategy.java ├── CharSetMatchStrategy.java ├── DigitalMatchStrategy.java ├── DotMatchStrategy.java ├── EpsilonMatchStrategy.java ├── MatchStrategy.java ├── MatchStrategyManager.java ├── SpaceMatchStrategy.java └── WMatchStrategy.java /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled class file 2 | *.class 3 | 4 | # Log file 5 | *.log 6 | 7 | # BlueJ files 8 | *.ctxt 9 | 10 | # Mobile Tools for Java (J2ME) 11 | .mtj.tmp/ 12 | 13 | # Package Files # 14 | *.jar 15 | *.war 16 | *.nar 17 | *.ear 18 | *.zip 19 | *.tar.gz 20 | *.rar 21 | *.iml 22 | /.idea 23 | /target 24 | *.log.* 25 | *.log 26 | .DS_Store 27 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 28 | hs_err_pid* 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 xindoo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # regex 2 | 3 | 4 | 最近学习编译原理,基于NFA实现了正则表达式,代码刚写完,具体内容参考博客[https://blog.csdn.net/xindoo/article/details/105875239](https://blog.csdn.net/xindoo/article/details/105875239),欢迎查阅。 5 | 已实现NFA转DFA,详见博客[从0到1打造正则表达式执行引擎(二)](https://xindoo.blog.csdn.net/article/details/106458165). 6 | 7 | 目前还是demo,算是刚把引擎的骨架搭建起来,后续继续完善代码。 8 | 9 | ## 是什么不是什么? 10 | 这个引擎不会是一个可以用在生产环境的项目,但会是一个了解正则引擎背后工作原理的项目。 11 | 12 | ## 现状 13 | 目前支持的语义 14 | 基本语义: . ? * + () | 15 | 字符集合: [] 16 | 非打印字符: \d \D \s \S \w \W 17 | 支持DFA和NFA双引擎 18 | 19 | ## Todo 20 | - [ ] 支持`{}`限定符 21 | - [ ] 支持 `^ $ \b` 等定位符 22 | - [x] 实现DFA引擎 23 | - [ ] DFA最小化(Hopcroft算法) 24 | - [ ] 支持捕获和引用 -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | xyz.xindoo 8 | regex 9 | 0.0.2-SNAPSHOT 10 | 11 | 12 | org.openjdk.jmh 13 | jmh-generator-annprocess 14 | 1.21 15 | 16 | 17 | 18 | 19 | 20 | 21 | org.apache.maven.plugins 22 | maven-compiler-plugin 23 | 24 | 8 25 | 8 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/Regex.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re; 2 | 3 | import xyz.xindoo.re.common.Constant; 4 | import xyz.xindoo.re.common.Reader; 5 | import xyz.xindoo.re.common.State; 6 | import xyz.xindoo.re.common.StateType; 7 | import xyz.xindoo.re.dfa.DFAGraph; 8 | import xyz.xindoo.re.dfa.DFAState; 9 | import xyz.xindoo.re.nfa.NFAGraph; 10 | import xyz.xindoo.re.nfa.NFAState; 11 | import xyz.xindoo.re.nfa.strategy.MatchStrategy; 12 | import xyz.xindoo.re.nfa.strategy.MatchStrategyManager; 13 | 14 | import java.util.ArrayDeque; 15 | import java.util.Collections; 16 | import java.util.HashSet; 17 | import java.util.LinkedList; 18 | import java.util.List; 19 | import java.util.Map; 20 | import java.util.Queue; 21 | import java.util.Set; 22 | 23 | public class Regex { 24 | private NFAGraph nfaGraph; 25 | private DFAGraph dfaGraph; 26 | 27 | public static Regex compile(String regex) { 28 | if (regex == null || regex.length() == 0) { 29 | return null; 30 | } 31 | NFAGraph nfaGraph = regex2nfa(regex); 32 | nfaGraph.end.setStateType(StateType.END); // 将NFA的end节点标记为终止态 33 | DFAGraph dfaGraph = convertNfa2Dfa(nfaGraph); 34 | return new Regex(nfaGraph, dfaGraph); 35 | } 36 | 37 | private Regex(NFAGraph nfaGraph, DFAGraph dfaGraph) { 38 | this.nfaGraph = nfaGraph; 39 | // printNfa(); 40 | this.dfaGraph = dfaGraph; 41 | // printDfa(); 42 | } 43 | 44 | /** 45 | * 有向图的广度优先遍历 46 | */ 47 | public void printNfa() { 48 | Queue queue = new ArrayDeque<>(); 49 | Set addedStates = new HashSet<>(); 50 | queue.add(nfaGraph.start); 51 | addedStates.add(nfaGraph.start.getId()); 52 | while (!queue.isEmpty()) { 53 | State curState = queue.poll(); 54 | for (Map.Entry> entry : curState.next.entrySet()) { 55 | String key = entry.getKey(); 56 | Set nexts = entry.getValue(); 57 | for (State next : nexts) { 58 | System.out.printf("%2d->%2d %s\n", curState.getId(), next.getId(), key); 59 | if (!addedStates.contains(next.getId())) { 60 | queue.add(next); 61 | addedStates.add(next.getId()); 62 | } 63 | } 64 | } 65 | } 66 | } 67 | 68 | public void printDfa() { 69 | Queue queue = new ArrayDeque<>(); 70 | Set addedStates = new HashSet<>(); 71 | queue.add(dfaGraph.start); 72 | addedStates.add(dfaGraph.start.getAllStateIds()); 73 | while (!queue.isEmpty()) { 74 | State curState = queue.poll(); 75 | for (Map.Entry> entry : curState.next.entrySet()) { 76 | String key = entry.getKey(); 77 | Set nexts = entry.getValue(); 78 | for (State next : nexts) { 79 | System.out.printf("%s -> %s %s \n", ((DFAState)curState).getAllStateIds(),((DFAState)next).getAllStateIds(), key); 80 | if (!addedStates.contains(((DFAState)next).getAllStateIds())) { 81 | queue.add(next); 82 | addedStates.add(((DFAState)next).getAllStateIds()); 83 | } 84 | } 85 | } 86 | } 87 | } 88 | 89 | private static NFAGraph regex2nfa(String regex) { 90 | Reader reader = new Reader(regex); 91 | NFAGraph nfaGraph = null; 92 | while (reader.hasNext()) { 93 | char ch = reader.next(); 94 | String edge = null; 95 | switch (ch) { 96 | // 子表达式特殊处理 97 | case '(' : { 98 | String subRegex = reader.getSubRegex(reader); 99 | NFAGraph newNFAGraph = regex2nfa(subRegex); 100 | checkRepeat(reader, newNFAGraph); 101 | if (nfaGraph == null) { 102 | nfaGraph = newNFAGraph; 103 | } else { 104 | nfaGraph.addSeriesGraph(newNFAGraph); 105 | } 106 | break; 107 | } 108 | // 或表达式特殊处理 109 | case '|' : { 110 | String remainRegex = reader.getRemainRegex(reader); 111 | NFAGraph newNFAGraph = regex2nfa(remainRegex); 112 | if (nfaGraph == null) { 113 | nfaGraph = newNFAGraph; 114 | } else { 115 | nfaGraph.addParallelGraph(newNFAGraph); 116 | } 117 | break; 118 | } 119 | case '[' : { 120 | edge = getCharSetMatch(reader); 121 | break; 122 | } 123 | // 暂时未支持零宽断言 124 | case '^' : { 125 | break; 126 | } 127 | // 暂未支持 128 | case '$' : { 129 | break; 130 | } 131 | case '.' : { 132 | edge = "."; 133 | break; 134 | } 135 | // 处理特殊占位符 136 | case '\\' : { 137 | char nextCh = reader.next(); 138 | switch (nextCh) { 139 | case 'd': { 140 | edge = "\\d"; 141 | break; 142 | } 143 | case 'D': { 144 | edge = "\\D"; 145 | break; 146 | } 147 | case 'w': { 148 | edge = "\\w"; 149 | break; 150 | } 151 | case 'W': { 152 | edge = "\\W"; 153 | break; 154 | } 155 | case 's': { 156 | edge = "\\s"; 157 | break; 158 | } 159 | case 'S': { 160 | edge = "\\S"; 161 | break; 162 | } 163 | // 转义后的字符匹配 164 | default:{ 165 | edge = String.valueOf(nextCh); 166 | break; 167 | } 168 | } 169 | break; 170 | } 171 | 172 | default : { // 处理普通字符 173 | edge = String.valueOf(ch); 174 | break; 175 | } 176 | } 177 | if (edge != null) { 178 | NFAState start = new NFAState(); 179 | NFAState end = new NFAState(); 180 | start.addNext(edge, end); 181 | NFAGraph newNFAGraph = new NFAGraph(start, end); 182 | checkRepeat(reader, newNFAGraph); 183 | if (nfaGraph == null) { 184 | nfaGraph = newNFAGraph; 185 | } else { 186 | nfaGraph.addSeriesGraph(newNFAGraph); 187 | } 188 | } 189 | } 190 | return nfaGraph; 191 | } 192 | 193 | /** 194 | * 使用子集构造法把nfa转成dfa,具体可以参考博客 https://blog.csdn.net/xindoo/article/details/106458165 195 | */ 196 | private static DFAGraph convertNfa2Dfa(NFAGraph nfaGraph) { 197 | DFAGraph dfaGraph = new DFAGraph(); 198 | Set startStates = new HashSet<>(); 199 | // 用NFA图的起始节点构造DFA的起始节点 200 | startStates.addAll(getNextEStates(nfaGraph.start, new HashSet<>())); 201 | if (startStates.size() == 0) { 202 | startStates.add(nfaGraph.start); 203 | } 204 | dfaGraph.start = dfaGraph.getOrBuild(startStates); 205 | Queue queue = new LinkedList<>(); 206 | Set finishedStates = new HashSet<>(); 207 | // 如果BFS的方式从已找到的起始节点遍历并构建DFA 208 | queue.add(dfaGraph.start); 209 | 210 | while (!queue.isEmpty()) { 211 | // 对当前节点已添加的边做去重,不放到queue和next里. 212 | Set addedNextStates = new HashSet<>(); 213 | DFAState curState = queue.poll(); 214 | for (State nfaState : curState.nfaStates) { 215 | Set nextStates = new HashSet<>(); 216 | Set finishedEdges = new HashSet<>(); 217 | finishedEdges.add(Constant.EPSILON); 218 | for (String edge : nfaState.next.keySet()) { 219 | if (finishedEdges.contains(edge)) { 220 | continue; 221 | } 222 | finishedEdges.add(edge); 223 | Set efinishedState = new HashSet<>(); 224 | for (State state : curState.nfaStates) { 225 | Set edgeStates = state.next.getOrDefault(edge, Collections.emptySet()); 226 | nextStates.addAll(edgeStates); 227 | for (State eState : edgeStates) { 228 | // 添加E可达节点 229 | if (efinishedState.contains(eState)) { 230 | continue; 231 | } 232 | nextStates.addAll(getNextEStates(eState, efinishedState)); 233 | efinishedState.add(eState); 234 | } 235 | } 236 | // 将NFA节点列表转化为DFA节点,如果已经有对应的DFA节点就返回,否则创建一个新的DFA节点 237 | DFAState nextDFAstate = dfaGraph.getOrBuild(nextStates); 238 | if (!finishedStates.contains(nextDFAstate) && !addedNextStates.contains(nextDFAstate)) { 239 | queue.add(nextDFAstate); 240 | addedNextStates.add(nextDFAstate); // 对queue里的数据做去重 241 | curState.addNext(edge, nextDFAstate); 242 | } 243 | } 244 | } 245 | finishedStates.add(curState); 246 | } 247 | return dfaGraph; 248 | } 249 | 250 | private static void checkRepeat(Reader reader, NFAGraph newNFAGraph) { 251 | char nextCh = reader.peak(); 252 | switch (nextCh) { 253 | case '*': { 254 | newNFAGraph.repeatStar(); 255 | reader.next(); 256 | break; 257 | } case '+': { 258 | newNFAGraph.repeatPlus(); 259 | reader.next(); 260 | break; 261 | } case '?' : { 262 | newNFAGraph.addSToE(); 263 | reader.next(); 264 | break; 265 | } case '{' : { 266 | // 暂未支持{}指定重复次数 267 | break; 268 | } default : { 269 | return; 270 | } 271 | } 272 | } 273 | 274 | /** 275 | * 获取[]中表示的字符集,只支持字母 数字 276 | * */ 277 | private static String getCharSetMatch(Reader reader) { 278 | String charSet = ""; 279 | char ch; 280 | while ((ch = reader.next()) != ']') { 281 | charSet += ch; 282 | } 283 | return charSet; 284 | } 285 | 286 | private static int[] getRange(Reader reader) { 287 | String rangeStr = ""; 288 | char ch; 289 | while ((ch = reader.next()) != '}') { 290 | if (ch == ' ') { 291 | continue; 292 | } 293 | rangeStr += ch; 294 | } 295 | int[] res = new int[2]; 296 | if (!rangeStr.contains(",")) { 297 | res[0] = Integer.parseInt(rangeStr); 298 | res[1] = res[0]; 299 | } else { 300 | String[] se = rangeStr.split(",", -1); 301 | res[0] = Integer.parseInt(se[0]); 302 | if (se[1].length() == 0) { 303 | res[1] = Integer.MAX_VALUE; 304 | } else { 305 | res[1] = Integer.parseInt(se[1]); 306 | } 307 | } 308 | return res; 309 | } 310 | 311 | // 获取Epsilon可达节点列表 312 | private static Set getNextEStates(State curState, Set stateSet) { 313 | if (!curState.next.containsKey(Constant.EPSILON)) { 314 | return Collections.emptySet(); 315 | } 316 | Set res = new HashSet<>(); 317 | for (State state : curState.next.get(Constant.EPSILON)) { 318 | if (stateSet.contains(state)) { 319 | continue; 320 | } 321 | res.add(state); 322 | res.addAll(getNextEStates(state, stateSet)); 323 | stateSet.add(state); 324 | } 325 | return res; 326 | } 327 | 328 | public boolean isMatch(String text) { 329 | return isMatch(text, 0); 330 | } 331 | 332 | public boolean isMatch(String text, int mode) { 333 | State start = nfaGraph.start; 334 | if (mode == 1) { 335 | start = dfaGraph.start; 336 | } 337 | return isMatch(text, 0, start); 338 | } 339 | 340 | /** 341 | * 匹配过程就是根据输入遍历图的过程, 这里DFA和NFA用了同样的代码, 但实际上因为DFA的特性是不会产生回溯的, 342 | * 所以DFA可以换成非递归的形式 343 | */ 344 | private boolean isMatch(String text, int pos, State curState) { 345 | if (pos == text.length()) { 346 | for (State nextState : curState.next.getOrDefault(Constant.EPSILON, Collections.emptySet())) { 347 | if (isMatch(text, pos, nextState)) { 348 | return true; 349 | } 350 | } 351 | if (curState.isEndState()) { 352 | return true; 353 | } 354 | return false; 355 | } 356 | 357 | for (Map.Entry> entry : curState.next.entrySet()) { 358 | String edge = entry.getKey(); 359 | // 这个if和else的先后顺序决定了是贪婪匹配还是非贪婪匹配 360 | if (Constant.EPSILON.equals(edge)) { 361 | // 如果是DFA模式,不会有EPSILON边,所以不会进这 362 | for (State nextState : entry.getValue()) { 363 | if (isMatch(text, pos, nextState)) { 364 | return true; 365 | } 366 | } 367 | } else { 368 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge); 369 | if (!matchStrategy.isMatch(text.charAt(pos), edge)) { 370 | continue; 371 | } 372 | // 遍历匹配策略 373 | for (State nextState : entry.getValue()) { 374 | // 如果是DFA匹配模式,entry.getValue()虽然是set,但里面只会有一个元素,所以不需要回溯 375 | if (nextState instanceof DFAState) { 376 | return isMatch(text, pos + 1, nextState); 377 | } 378 | if (isMatch(text, pos + 1, nextState)) { 379 | return true; 380 | } 381 | } 382 | } 383 | } 384 | return false; 385 | } 386 | 387 | public boolean isDfaMatch(String text) { 388 | return isDfaMatch(text, 0, dfaGraph.start); 389 | } 390 | 391 | private boolean isDfaMatch(String text, int pos, State startState) { 392 | State curState = startState; 393 | while (pos < text.length()) { 394 | boolean canContinue = false; 395 | for (Map.Entry> entry : curState.next.entrySet()) { 396 | String edge = entry.getKey(); 397 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge); 398 | if (matchStrategy.isMatch(text.charAt(pos), edge)) { 399 | curState = entry.getValue().stream().findFirst().orElse(null); 400 | pos++; 401 | canContinue = true; 402 | break; 403 | } 404 | } 405 | if (!canContinue) { 406 | return false; 407 | } 408 | } 409 | return curState.isEndState(); 410 | } 411 | 412 | public List match(String text) { 413 | return match(text, 0); 414 | } 415 | 416 | public List match(String text, int mod) { 417 | int s = 0; 418 | int e = -1; 419 | List res = new LinkedList<>(); 420 | while (s != text.length()) { 421 | e = getMatchEnd(text, s, dfaGraph.start); 422 | if (e != -1) { 423 | res.add(text.substring(s, e)); 424 | s = e; 425 | } else { 426 | s++; 427 | } 428 | } 429 | return res; 430 | } 431 | 432 | // 获取正则表达式在字符串中能匹配到的结尾的位置 433 | private int getMatchEnd(String text, int pos, State curState) { 434 | int end = -1; 435 | if (curState.isEndState()) { 436 | return pos; 437 | } 438 | 439 | if (pos == text.length()) { 440 | for (State nextState : curState.next.getOrDefault(Constant.EPSILON, Collections.emptySet())) { 441 | end = getMatchEnd(text, pos, nextState); 442 | if (end != -1) { 443 | return end; 444 | } 445 | } 446 | } 447 | 448 | for (Map.Entry> entry : curState.next.entrySet()) { 449 | String edge = entry.getKey(); 450 | if (Constant.EPSILON.equals(edge)) { 451 | for (State nextState : entry.getValue()) { 452 | end = getMatchEnd(text, pos, nextState); 453 | if (end != -1) { 454 | return end; 455 | } 456 | } 457 | } else { 458 | MatchStrategy matchStrategy = MatchStrategyManager.getStrategy(edge); 459 | if (!matchStrategy.isMatch(text.charAt(pos), edge)) { 460 | continue; 461 | } 462 | // 遍历匹配策略 463 | for (State nextState : entry.getValue()) { 464 | end = getMatchEnd(text, pos + 1, nextState); 465 | if (end != -1) { 466 | return end; 467 | } 468 | } 469 | } 470 | } 471 | return -1; 472 | } 473 | // todo, 使用hopcraft算法将dfa最小化 474 | private DFAGraph hopcroft(DFAGraph dfaGraph){ 475 | return new DFAGraph(); 476 | } 477 | } 478 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/RegexTest.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re; 2 | 3 | import org.openjdk.jmh.annotations.Benchmark; 4 | import org.openjdk.jmh.annotations.Fork; 5 | import org.openjdk.jmh.annotations.Measurement; 6 | import org.openjdk.jmh.annotations.Threads; 7 | import org.openjdk.jmh.annotations.Warmup; 8 | import org.openjdk.jmh.runner.Runner; 9 | import org.openjdk.jmh.runner.options.Options; 10 | import org.openjdk.jmh.runner.options.OptionsBuilder; 11 | 12 | import java.util.List; 13 | 14 | public class RegexTest { 15 | 16 | // private static Regex regex = Regex.compile("a(b|c)*"); 17 | // private static String[] strs = {"ac", "acc", "a", "a bcccdb", "ab", "abcd", "a3abcd", "a33333defd", "aabcabcabcabcabcabcdb", 18 | // "abbbbbbbbb", "acccccccbad", "acccccccccccccccccccccccccb", "abbbbbbbbbbbbbbbc"}; 19 | // 20 | // @Benchmark 21 | // @Measurement(iterations = 2) 22 | // @Threads(1) 23 | // @Fork(0) 24 | // @Warmup(iterations = 0) 25 | // public void nfa() { 26 | // for (String str : strs) { 27 | // regex.isMatch(str); 28 | // } 29 | // } 30 | // 31 | // @Benchmark 32 | // @Measurement(iterations = 2) 33 | // @Threads(1) 34 | // @Fork(0) 35 | // @Warmup(iterations = 0) 36 | // public void dfaRecursion() { 37 | // for (String str : strs) { 38 | // regex.isMatch(str, 1); 39 | // } 40 | // } 41 | // 42 | // @Benchmark 43 | // @Measurement(iterations = 2) 44 | // @Threads(1) 45 | // @Fork(0) 46 | // @Warmup(iterations = 0) 47 | // public void dfaNonRecursion() { 48 | // for (String str : strs) { 49 | // regex.isDfaMatch(str); 50 | // } 51 | // } 52 | 53 | public static void main(String[] args) { 54 | test(); 55 | // Options options = new OptionsBuilder().include(RegexTest.class.getSimpleName()).build(); 56 | // try { 57 | // new Runner(options).run(); 58 | // } catch (Exception e) { 59 | // System.out.println(e.fillInStackTrace()); 60 | // } finally { 61 | // System.out.println("finshed"); 62 | // } 63 | } 64 | 65 | private static void test() { 66 | String str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"; 67 | Regex regex1 = Regex.compile("a*aaaaaaaaaaaaaaaaaaaaaab"); 68 | System.out.println(regex1.isDfaMatch(str)); 69 | System.out.println("_________________"); 70 | System.out.println(regex1.isMatch(str)); 71 | System.out.println("_________________"); 72 | // Regex regex = Regex.compile("a(b|c)*"); 73 | // List res = regex.match("aabacabbbcaccc"); 74 | // regex.printNfa(); 75 | // System.out.println(""); 76 | // regex.printDfa(); 77 | // 78 | // System.out.println(regex.isMatch("ac")); 79 | // System.out.println(regex.isMatch("acc")); 80 | // System.out.println(regex.isMatch("a")); 81 | // System.out.println(regex.isMatch("a bcccdb")); 82 | // System.out.println(regex.isMatch("ab")); 83 | // System.out.println(regex.isMatch("abcd")); 84 | // System.out.println(regex.isMatch("a3abcd")); 85 | // System.out.println(regex.isMatch("a33333defd")); 86 | // System.out.println(regex.isMatch("aabcabcabcabcabcabcdb")); 87 | // 88 | // System.out.println("*********"); 89 | // System.out.println(regex.isDfaMatch("ac")); 90 | // System.out.println(regex.isDfaMatch("acc")); 91 | // System.out.println(regex.isDfaMatch("a")); 92 | // System.out.println(regex.isDfaMatch("a bcccdb")); 93 | // System.out.println(regex.isDfaMatch("ab")); 94 | // System.out.println(regex.isDfaMatch("abcd")); 95 | // System.out.println(regex.isDfaMatch("a3abcd")); 96 | // System.out.println(regex.isDfaMatch("a33333defd")); 97 | // System.out.println(regex.isDfaMatch("aabcabcabcabcabcabcdb")); 98 | // 99 | // System.out.println("*********"); 100 | // System.out.println(regex.isMatch("ac", 1)); 101 | // System.out.println(regex.isMatch("acc", 1)); 102 | // System.out.println(regex.isMatch("a", 1)); 103 | // System.out.println(regex.isMatch("a bcccdb", 1)); 104 | // System.out.println(regex.isMatch("ab", 1)); 105 | // System.out.println(regex.isMatch("abcd", 1)); 106 | // System.out.println(regex.isMatch("a3abcd", 1)); 107 | // System.out.println(regex.isMatch("a33333defd", 1)); 108 | // System.out.println(regex.isMatch("aabcabcabcabcabcabcdb", 1)); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/common/Constant.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.common; 2 | 3 | public interface Constant { 4 | String EPSILON = "Epsilon"; 5 | String CHAR = "char"; 6 | String CHARSET = "charSet"; 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/common/Reader.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.common; 2 | 3 | public class Reader { 4 | private int cur = 0; 5 | private char[] chars; 6 | public Reader(String regex) { 7 | this.chars = regex.toCharArray(); 8 | } 9 | public char peak() { 10 | if (cur == chars.length) { 11 | return '\0'; 12 | } 13 | return chars[cur]; 14 | } 15 | 16 | public char next() { 17 | if (cur == chars.length) { 18 | return '\0'; 19 | } 20 | return chars[cur++]; 21 | } 22 | 23 | public boolean hasNext() { 24 | return cur < chars.length; 25 | } 26 | 27 | public String getSubRegex(Reader reader) { 28 | int cntParem = 1; 29 | String regex = ""; 30 | while (reader.hasNext()) { 31 | char ch = reader.next(); 32 | if (ch == '(') { 33 | cntParem++; 34 | } else if (ch == ')') { 35 | cntParem--; 36 | if (cntParem == 0) { 37 | break; 38 | } else { 39 | } 40 | } 41 | regex += ch; 42 | } 43 | return regex; 44 | } 45 | 46 | public String getRemainRegex(Reader reader) { 47 | String regex = ""; 48 | while (reader.hasNext()) { 49 | regex += reader.next(); 50 | } 51 | return regex; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/common/State.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.common; 2 | 3 | import java.util.HashMap; 4 | import java.util.HashSet; 5 | import java.util.Map; 6 | import java.util.Set; 7 | 8 | public class State { 9 | protected static int idCnt = 0; 10 | protected int id; 11 | protected StateType stateType; 12 | 13 | public State() { 14 | this.id = idCnt++; 15 | this.stateType = StateType.GENERAL; 16 | } 17 | 18 | public Map> next = new HashMap<>(); 19 | 20 | public void addNext(String edge, State nfaState) { 21 | Set set = next.get(edge); 22 | if (set == null) { 23 | set = new HashSet<>(); 24 | next.put(edge, set); 25 | } 26 | set.add(nfaState); 27 | } 28 | 29 | public void setStateType(StateType stateType) { 30 | this.stateType = stateType; 31 | } 32 | 33 | public boolean isEndState() { 34 | return stateType == StateType.END; 35 | } 36 | 37 | public int getId() { 38 | return this.id; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/common/StateType.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.common; 2 | 3 | public enum StateType { 4 | GENERAL, END 5 | } 6 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/dfa/DFAGraph.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.dfa; 2 | 3 | import xyz.xindoo.re.common.State; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | import java.util.Set; 8 | 9 | public class DFAGraph { 10 | 11 | private Map nfaStates2dfaState = new HashMap<>(); 12 | public DFAState start = new DFAState(); 13 | 14 | // 这里用map保存NFAState结合是已有对应的DFAState, 有就直接拿出来用 15 | public DFAState getOrBuild(Set states) { 16 | String allStateIds = ""; 17 | int[] ids = states.stream() 18 | .mapToInt(state -> state.getId()) 19 | .sorted() 20 | .toArray(); 21 | for (int id : ids) { 22 | allStateIds += "#"; 23 | allStateIds += id; 24 | } 25 | if (!nfaStates2dfaState.containsKey(allStateIds)) { 26 | DFAState dfaState = new DFAState(allStateIds, states); 27 | nfaStates2dfaState.put(allStateIds, dfaState); 28 | } 29 | return nfaStates2dfaState.get(allStateIds); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/dfa/DFAState.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.dfa; 2 | 3 | import xyz.xindoo.re.common.State; 4 | import xyz.xindoo.re.common.StateType; 5 | 6 | import java.util.HashSet; 7 | import java.util.Objects; 8 | import java.util.Set; 9 | 10 | public class DFAState extends State { 11 | public Set nfaStates = new HashSet<>(); 12 | // 保存对应NFAState的id,一个DFAState可能是多个NFAState的集合,所以拼接成String 13 | private String allStateIds; 14 | public DFAState() { 15 | this.stateType = StateType.GENERAL; 16 | } 17 | 18 | public DFAState(String allStateIds, Set states) { 19 | this.allStateIds = allStateIds; 20 | this.nfaStates.addAll(states); 21 | 22 | for (State state : states) { 23 | if (state.isEndState()) { // 如果有任意节点是终止态,新建的DFA节点就是终止态 24 | this.stateType = StateType.END; 25 | } 26 | } 27 | } 28 | 29 | public String getAllStateIds() { 30 | return allStateIds; 31 | } 32 | 33 | @Override 34 | public boolean equals(Object o) { 35 | if (this == o) { 36 | return true; 37 | } 38 | return allStateIds.equals(((DFAState)o).allStateIds); 39 | } 40 | 41 | @Override 42 | public int hashCode() { 43 | return Objects.hash(allStateIds); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/NFAGraph.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa; 2 | 3 | import xyz.xindoo.re.common.Constant; 4 | 5 | public class NFAGraph { 6 | public NFAState start; 7 | public NFAState end; 8 | 9 | public NFAGraph(NFAState start, NFAState end) { 10 | this.start = start; 11 | this.end = end; 12 | } 13 | 14 | // | 15 | public void addParallelGraph(NFAGraph NFAGraph) { 16 | NFAState newStart = new NFAState(); 17 | NFAState newEnd = new NFAState(); 18 | newStart.addNext(Constant.EPSILON, this.start); 19 | newStart.addNext(Constant.EPSILON, NFAGraph.start); 20 | this.end.addNext(Constant.EPSILON, newEnd); 21 | NFAGraph.end.addNext(Constant.EPSILON, newEnd); 22 | this.start = newStart; 23 | this.end = newEnd; 24 | } 25 | 26 | // 27 | public void addSeriesGraph(NFAGraph NFAGraph) { 28 | this.end.addNext(Constant.EPSILON, NFAGraph.start); 29 | this.end = NFAGraph.end; 30 | } 31 | 32 | // * 重复0-n次 33 | public void repeatStar() { 34 | repeatPlus(); 35 | addSToE(); // 重复0 36 | } 37 | 38 | // ? 重复0次哦 39 | public void addSToE() { 40 | start.addNext(Constant.EPSILON, end); 41 | } 42 | 43 | // + 重复1-n次 44 | public void repeatPlus() { 45 | NFAState newStart = new NFAState(); 46 | NFAState newEnd = new NFAState(); 47 | newStart.addNext(Constant.EPSILON, this.start); 48 | end.addNext(Constant.EPSILON, newEnd); 49 | end.addNext(Constant.EPSILON, start); 50 | this.start = newStart; 51 | this.end = newEnd; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/NFAState.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa; 2 | 3 | import xyz.xindoo.re.common.State; 4 | 5 | public class NFAState extends State { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/CharMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class CharMatchStrategy extends MatchStrategy{ 4 | 5 | @Override 6 | public boolean isMatch(char c, String edge) { 7 | return edge.charAt(0) == c; 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/CharSetMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class CharSetMatchStrategy extends MatchStrategy { 4 | @Override 5 | public boolean isMatch(char c, String charSet) { 6 | boolean res = false; 7 | for (int i = 0; i < charSet.length(); i++) { 8 | if (charSet.charAt(0) == '^') { 9 | continue; 10 | } 11 | if ('-' == charSet.charAt(i)) { 12 | return c >= charSet.charAt(i-1) && c <= charSet.charAt(i+1); 13 | } 14 | if (c == charSet.charAt(i)) { 15 | res = true; 16 | break; 17 | } 18 | } 19 | if (charSet.charAt(0) == '^') { 20 | return !res; 21 | } 22 | return res; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/DigitalMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class DigitalMatchStrategy extends MatchStrategy { 4 | private boolean isReverse; 5 | 6 | public DigitalMatchStrategy(boolean isReverse) { 7 | this.isReverse = isReverse; 8 | } 9 | 10 | @Override 11 | public boolean isMatch(char c, String edge) { 12 | boolean res = c >= '0' && c <= '9'; 13 | if (isReverse) { 14 | return !res; 15 | } 16 | return res; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/DotMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class DotMatchStrategy extends MatchStrategy { 4 | @Override 5 | public boolean isMatch(char c, String edge) { 6 | return c != '\n' && c != '\r'; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/EpsilonMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class EpsilonMatchStrategy extends MatchStrategy { 4 | @Override 5 | public boolean isMatch(char c, String edge) { 6 | return true; 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/MatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class MatchStrategy { 4 | protected boolean isReverse = false; 5 | 6 | public boolean isMatch(char c, String edge) { 7 | return false; 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/MatchStrategyManager.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | import xyz.xindoo.re.common.Constant; 4 | 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | public class MatchStrategyManager { 9 | private static Map matchStrategyMap; 10 | 11 | static { 12 | matchStrategyMap = new HashMap<>(); 13 | matchStrategyMap.put("\\d", new DigitalMatchStrategy(false)); 14 | matchStrategyMap.put("\\D", new DigitalMatchStrategy(true)); 15 | matchStrategyMap.put("\\w", new WMatchStrategy(false)); 16 | matchStrategyMap.put("\\W", new WMatchStrategy(true)); 17 | matchStrategyMap.put("\\s", new SpaceMatchStrategy(false)); 18 | matchStrategyMap.put("\\S", new SpaceMatchStrategy(true)); 19 | matchStrategyMap.put(".", new DotMatchStrategy()); 20 | matchStrategyMap.put(Constant.EPSILON, new EpsilonMatchStrategy()); 21 | matchStrategyMap.put(Constant.CHAR, new CharMatchStrategy()); 22 | matchStrategyMap.put(Constant.CHARSET, new CharSetMatchStrategy()); 23 | } 24 | 25 | public static MatchStrategy getStrategy(String key) { 26 | // 特殊字符的匹配 27 | if (matchStrategyMap.containsKey(key)) { 28 | return matchStrategyMap.get(key); 29 | } 30 | // 单字符和字符集的匹配 31 | if (key.length() == 1) { 32 | return matchStrategyMap.get(Constant.CHAR); 33 | } else { 34 | return matchStrategyMap.get(Constant.CHARSET); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/SpaceMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | public class SpaceMatchStrategy extends MatchStrategy { 4 | private boolean isReverse; 5 | 6 | public SpaceMatchStrategy(boolean isReverse) { 7 | this.isReverse = isReverse; 8 | } 9 | 10 | @Override 11 | public boolean isMatch(char c, String edge) { 12 | boolean res = (c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == ' '); 13 | if (isReverse) { 14 | return !res; 15 | } 16 | return res; 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/xyz/xindoo/re/nfa/strategy/WMatchStrategy.java: -------------------------------------------------------------------------------- 1 | package xyz.xindoo.re.nfa.strategy; 2 | 3 | /** 4 | * 匹配 \w和\W 5 | */ 6 | public class WMatchStrategy extends MatchStrategy { 7 | 8 | public WMatchStrategy(boolean isReverse) { 9 | this.isReverse = isReverse; 10 | } 11 | 12 | @Override 13 | public boolean isMatch(char c, String edge) { 14 | boolean res = c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9'; 15 | if (isReverse) { 16 | return !res; 17 | } 18 | return res; 19 | } 20 | } 21 | --------------------------------------------------------------------------------