├── .gitignore ├── LICENSE ├── README.md ├── benchmark ├── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── github │ │ └── s4ke │ │ └── moar │ │ └── benchmark │ │ ├── HashMapBench.java │ │ ├── JavaPatternBench.java │ │ ├── Main.java │ │ ├── MoaPatternBench.java │ │ └── Regex.java │ └── resources │ └── 128sonnets.txt ├── cli ├── exactMatch-test.txt ├── multiline-test.txt ├── pom.xml ├── regex-test.txt └── src │ └── main │ └── java │ └── com │ └── github │ └── s4ke │ └── moar │ └── cli │ └── Main.java ├── documentation ├── img │ ├── CharSeq.png │ ├── EdgeGraph.png │ ├── Edges.png │ ├── MatchInfo.png │ ├── Moa.png │ ├── MoaMatcher.png │ ├── MoaPattern.png │ ├── Regex_Interface.png │ ├── Regex_interface_DSL.png │ ├── Variable.png │ └── state_interface.png ├── implementation.md └── implementation.pdf ├── engine ├── pom.xml └── src │ ├── main │ ├── antlr4 │ │ └── com │ │ │ └── github │ │ │ └── s4ke │ │ │ └── moar │ │ │ └── regex │ │ │ └── parser │ │ │ └── Regex.g4 │ └── java │ │ └── com │ │ └── github │ │ └── s4ke │ │ └── moar │ │ ├── MoaMatcher.java │ │ ├── MoaPattern.java │ │ ├── NonDeterministicException.java │ │ ├── moa │ │ ├── Moa.java │ │ ├── MoaMatcherImpl.java │ │ ├── edgegraph │ │ │ ├── ActionType.java │ │ │ ├── CurStateHolder.java │ │ │ ├── EdgeGraph.java │ │ │ └── MemoryAction.java │ │ └── states │ │ │ ├── BasicState.java │ │ │ ├── BoundState.java │ │ │ ├── MatchInfo.java │ │ │ ├── SetState.java │ │ │ ├── State.java │ │ │ ├── Variable.java │ │ │ └── VariableState.java │ │ ├── regex │ │ ├── Binding.java │ │ ├── BoundConstants.java │ │ ├── BoundaryRegex.java │ │ ├── CharacterClassesUtils.java │ │ ├── Choice.java │ │ ├── Concat.java │ │ ├── EdgeContributor.java │ │ ├── Epsilon.java │ │ ├── Plus.java │ │ ├── Primitive.java │ │ ├── Reference.java │ │ ├── Regex.java │ │ ├── SetRegex.java │ │ ├── StateContributor.java │ │ ├── Symbol.java │ │ ├── VariableOccurence.java │ │ └── parser │ │ │ ├── RegexCompiler.java │ │ │ ├── RegexGroupNameListener.java │ │ │ └── RegexTreeListener.java │ │ ├── strings │ │ ├── CodePointSet.java │ │ └── EfficientString.java │ │ └── util │ │ ├── Accessor.java │ │ ├── CharSeq.java │ │ ├── ExecStack.java │ │ ├── IntCharSeq.java │ │ ├── Perf.java │ │ └── RangeRep.java │ └── test │ └── java │ └── com │ └── github │ └── s4ke │ └── moar │ ├── regex │ ├── FullTextTest.java │ ├── JavaMoaEqualityTest.java │ ├── MatchReplaceTest.java │ ├── MoaWithDSLTest.java │ ├── ParserTest.java │ ├── TestUtil.java │ └── VSJavaPattern.java │ └── util │ ├── GenericMatcher.java │ ├── GenericMoaMatcher.java │ └── PatternMatcher.java ├── json ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── github │ │ └── s4ke │ │ └── moar │ │ └── json │ │ └── MoarJSONSerializer.java │ └── test │ └── java │ └── com │ └── github │ └── s4ke │ └── moar │ └── json │ └── MoarJSONSerializerTest.java ├── lucene ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── github │ │ └── s4ke │ │ └── moar │ │ └── lucene │ │ └── query │ │ ├── ByteCharSeq.java │ │ └── MoarQuery.java │ └── test │ └── java │ └── com │ └── github │ └── s4ke │ └── moar │ └── lucene │ └── query │ └── test │ ├── BaseLuceneTest.java │ ├── MoarQueryPerfTest.java │ └── MoarQueryTest.java ├── pom.xml └── presentation ├── README.md ├── automata.pdf ├── automata.tex └── presentation.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | 14 | *.iml 15 | .idea/* 16 | 17 | target/ 18 | 19 | lucene_dir/ 20 | 21 | _config.yml 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Martin Braun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # moar 2 | Deterministic Regular Expressions with Backreferences. Uses Memory Occurrence Automata to match the input. 3 | 4 | ANTLR is used for Pattern compilation. 5 | 6 | ## Why? 7 | Java's Patterns are not deterministic and might get you into trouble. All deterministic alternatives don't support backreferences. This library does. This however is no drop in replacement for Java's patterns as some things can not be expressed while keeping the determinism. 8 | 9 | ## Technical documentation 10 | 11 | The technical documentation can be found in the [documentation folder](documentation/implementation.md). 12 | 13 | ## Theoretical Background 14 | 15 | I held a talk at university covering the basics of this library as well. It can be found in the [presentation folder](presentation/presentation.md). 16 | 17 | A Paper explaining the theoretical background can be found [here](http://ddfy.de/sci/det.pdf). 18 | 19 | ## Supported Syntax 20 | 21 | While somewhat similar to Java's Pattern syntax, moar's supported syntax might differ in some cases. The full supported Pattern syntax can easily be seen in the ANTLR [grammar]( 22 | https://github.com/s4ke/moar/blob/master/engine/src/main/antlr4/com/github/s4ke/moar/regex/parser/Regex.g4): 23 | 24 | ## Examples 25 | 26 | ```Java 27 | MoaPattern pattern = MoaPattern.compile("^Deterministic|OrNot$"); 28 | MoaMatcher matcher = pattern.matcher("Deterministic"); 29 | if ( matcher.matches() ) { 30 | System.out.println("yay"); 31 | } 32 | ``` 33 | 34 | Or this cool language: 35 | 36 | ```Java 37 | MoaPattern pattern = MoaPattern.compile("((?\\k)(?\\ka))+"); 38 | MoaMatcher matcher = pattern.matcher("aaaa"); 39 | if( matcher.matches() ) { 40 | System.out.println("yay again."); 41 | } 42 | ``` 43 | -------------------------------------------------------------------------------- /benchmark/pom.xml: -------------------------------------------------------------------------------- 1 | 31 | 32 | 34 | 35 | moar-parent 36 | com.github.s4ke 37 | 1.0-SNAPSHOT 38 | 39 | 4.0.0 40 | 41 | moar-benchmark 42 | jar 43 | 44 | moar-benchmark 45 | 46 | 50 | 51 | 52 | 53 | 54 | com.github.s4ke 55 | moar-engine 56 | ${project.version} 57 | 58 | 59 | 60 | com.github.s4ke 61 | moar-json 62 | ${project.version} 63 | 64 | 65 | 66 | com.google.re2j 67 | re2j 68 | 1.1 69 | 70 | 71 | 72 | commons-cli 73 | commons-cli 74 | 1.3.1 75 | 76 | 77 | 78 | org.openjdk.jmh 79 | jmh-core 80 | ${jmh.version} 81 | 82 | 83 | org.openjdk.jmh 84 | jmh-generator-annprocess 85 | ${jmh.version} 86 | provided 87 | 88 | 89 | 90 | 91 | UTF-8 92 | 93 | 96 | 1.17.2 97 | 98 | 101 | 1.8 102 | 103 | 106 | benchmarks 107 | 108 | 109 | 110 | 111 | 112 | 113 | org.apache.maven.plugins 114 | maven-compiler-plugin 115 | 3.1 116 | 117 | ${javac.target} 118 | ${javac.target} 119 | ${javac.target} 120 | 121 | 122 | 123 | org.apache.maven.plugins 124 | maven-shade-plugin 125 | 2.2 126 | 127 | 128 | package 129 | 130 | shade 131 | 132 | 133 | ${uberjar.name} 134 | 135 | 137 | com.github.s4ke.moar.benchmark.Main 138 | 139 | 140 | 141 | 142 | 146 | *:* 147 | 148 | META-INF/*.SF 149 | META-INF/*.DSA 150 | META-INF/*.RSA 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | maven-clean-plugin 163 | 2.5 164 | 165 | 166 | maven-deploy-plugin 167 | 2.8.1 168 | 169 | 170 | maven-install-plugin 171 | 2.5.1 172 | 173 | 174 | maven-jar-plugin 175 | 2.4 176 | 177 | 178 | maven-javadoc-plugin 179 | 2.9.1 180 | 181 | 182 | maven-resources-plugin 183 | 2.6 184 | 185 | 186 | maven-site-plugin 187 | 3.3 188 | 189 | 190 | maven-source-plugin 191 | 2.2.1 192 | 193 | 194 | maven-surefire-plugin 195 | 2.17 196 | 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /benchmark/src/main/java/com/github/s4ke/moar/benchmark/HashMapBench.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.benchmark; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.Random; 29 | 30 | import org.openjdk.jmh.annotations.Benchmark; 31 | import org.openjdk.jmh.annotations.Scope; 32 | import org.openjdk.jmh.annotations.Setup; 33 | import org.openjdk.jmh.annotations.State; 34 | 35 | /** 36 | * @author Martin Braun 37 | */ 38 | @State(Scope.Benchmark) 39 | public class HashMapBench { 40 | 41 | private int[] arr; 42 | private Map map; 43 | private static final int ARR_SIZE = 4; 44 | 45 | private static final int GET_COUNT = 10_000; 46 | 47 | @Setup 48 | public void setup() { 49 | Random random = new Random(42); 50 | this.arr = new int[ARR_SIZE]; 51 | this.map = new HashMap<>(); 52 | for(int i = 0; i < ARR_SIZE; ++i) { 53 | int val = random.nextInt(); 54 | this.arr[i] = val; 55 | this.map.put(i, val); 56 | } 57 | } 58 | 59 | int hack = 0; 60 | 61 | @Benchmark 62 | public void benchArray() { 63 | hack = 0; 64 | Random random = new Random( 43 ); 65 | for(int i = 0; i < GET_COUNT; ++i) { 66 | hack += this.arr[random.nextInt(ARR_SIZE)]; 67 | } 68 | } 69 | 70 | @Benchmark 71 | public void benchMap() { 72 | hack = 0; 73 | Random random = new Random( 43 ); 74 | for(int i = 0; i < GET_COUNT; ++i) { 75 | hack += this.map.get(random.nextInt(ARR_SIZE)); 76 | } 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /benchmark/src/main/java/com/github/s4ke/moar/benchmark/JavaPatternBench.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.benchmark; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.IOException; 28 | import java.io.InputStreamReader; 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | import java.util.regex.Matcher; 32 | import java.util.regex.Pattern; 33 | 34 | import org.openjdk.jmh.annotations.Benchmark; 35 | import org.openjdk.jmh.annotations.Scope; 36 | import org.openjdk.jmh.annotations.Setup; 37 | import org.openjdk.jmh.annotations.State; 38 | import org.openjdk.jmh.annotations.TearDown; 39 | 40 | @State(Scope.Benchmark) 41 | public class JavaPatternBench { 42 | 43 | private List patterns = new ArrayList<>(); 44 | private String sonnets; 45 | private final Pattern twoPowerOfN = Pattern.compile( Regex.TWO_TO_POWER_OF_N_MOA ); 46 | 47 | @Setup 48 | public void setup() { 49 | for ( String str : Regex.REGEX_TO_BENCH ) { 50 | patterns.add( Pattern.compile( str, Pattern.DOTALL ) ); 51 | } 52 | try (BufferedReader reader = new BufferedReader( new InputStreamReader( Main.class.getResourceAsStream( 53 | "/128sonnets.txt" ) ) )) { 54 | StringBuilder builder = new StringBuilder(); 55 | String str; 56 | while ( (str = reader.readLine()) != null ) { 57 | builder.append( str ).append( "\n" ); 58 | } 59 | this.sonnets = builder.toString(); 60 | } 61 | catch (IOException e) { 62 | throw new AssertionError( e ); 63 | } 64 | 65 | } 66 | 67 | int matches = 0; 68 | 69 | @Benchmark 70 | public void benchJavaPattern() { 71 | this.matches = 0; 72 | for ( Pattern pattern : this.patterns ) { 73 | Matcher matcher = pattern.matcher( sonnets ); 74 | while ( matcher.find() ) { 75 | ++matches; 76 | } 77 | } 78 | } 79 | 80 | @TearDown 81 | public void tearDown() { 82 | System.out.println(this.matches); 83 | } 84 | 85 | @Benchmark 86 | public void benchJavaPatternEasy() { 87 | for ( Pattern pattern : this.patterns ) { 88 | for ( String easy : Regex.EASY_MATCHES ) { 89 | if ( !pattern.matcher( easy ).matches() ) { 90 | //throw new AssertionError( pattern + " did not match " + easy ); 91 | } 92 | } 93 | } 94 | } 95 | 96 | //this will never work -.- 97 | //Java Patterns do not support stuff like this 98 | //@Benchmark 99 | public void benchMoaTwoPowerN() { 100 | this.matches = 0; 101 | for ( int i = 0; i < Regex.TWO_TO_POWER_OF_N_AND_OTHERS.length; ++i ) { 102 | if ( this.twoPowerOfN.matcher( Regex.TWO_TO_POWER_OF_N_AND_OTHERS[i] ).matches() ) { 103 | ++this.matches; 104 | } 105 | } 106 | if(this.matches == 0) { 107 | throw new AssertionError(); 108 | } 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /benchmark/src/main/java/com/github/s4ke/moar/benchmark/Main.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.benchmark; 25 | 26 | 27 | import java.io.IOException; 28 | 29 | import org.apache.commons.cli.ParseException; 30 | import org.openjdk.jmh.runner.Runner; 31 | import org.openjdk.jmh.runner.RunnerException; 32 | import org.openjdk.jmh.runner.options.OptionsBuilder; 33 | 34 | /** 35 | * @author Martin Braun 36 | */ 37 | public class Main { 38 | 39 | public static void main(String[] args) throws ParseException, IOException, RunnerException { 40 | org.openjdk.jmh.runner.options.Options opt = new OptionsBuilder() 41 | .include( JavaPatternBench.class.getSimpleName() ) 42 | .include( MoaPatternBench.class.getSimpleName() ) 43 | .include( HashMapBench.class.getSimpleName() ) 44 | .warmupIterations( 15 ) 45 | .measurementIterations( 100 ) 46 | .forks( 1 ) 47 | .jvmArgs( "-ea" ) 48 | .shouldFailOnError( false ) // switch to "true" to fail the complete run 49 | .build(); 50 | 51 | new Runner( opt ).run(); 52 | } 53 | 54 | } 55 | 56 | -------------------------------------------------------------------------------- /benchmark/src/main/java/com/github/s4ke/moar/benchmark/MoaPatternBench.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.benchmark; 25 | 26 | import java.io.BufferedReader; 27 | import java.io.IOException; 28 | import java.io.InputStreamReader; 29 | import java.util.ArrayList; 30 | import java.util.List; 31 | 32 | import com.github.s4ke.moar.MoaMatcher; 33 | import com.github.s4ke.moar.MoaPattern; 34 | import com.github.s4ke.moar.util.CharSeq; 35 | import com.github.s4ke.moar.util.IntCharSeq; 36 | import org.openjdk.jmh.annotations.Benchmark; 37 | import org.openjdk.jmh.annotations.Scope; 38 | import org.openjdk.jmh.annotations.Setup; 39 | import org.openjdk.jmh.annotations.State; 40 | import org.openjdk.jmh.annotations.TearDown; 41 | 42 | /** 43 | * @author Martin Braun 44 | */ 45 | @State(Scope.Benchmark) 46 | public class MoaPatternBench { 47 | 48 | private List patterns = new ArrayList<>(); 49 | private CharSeq sonnets; 50 | private CharSeq[] easyMatches; 51 | private final MoaPattern twoPowerOfN = MoaPattern.compile( Regex.TWO_TO_POWER_OF_N_MOA ); 52 | 53 | @Setup 54 | public void setup() { 55 | for ( String str : Regex.REGEX_TO_BENCH ) { 56 | patterns.add( MoaPattern.compile( str ) ); 57 | } 58 | try (BufferedReader reader = new BufferedReader( new InputStreamReader( Main.class.getResourceAsStream( 59 | "/128sonnets.txt" ) ) )) { 60 | StringBuilder builder = new StringBuilder(); 61 | String str; 62 | while ( (str = reader.readLine()) != null ) { 63 | builder.append( str ).append( "\n" ); 64 | } 65 | this.sonnets = new IntCharSeq( builder.toString() ); 66 | } 67 | catch (IOException e) { 68 | throw new AssertionError( e ); 69 | } 70 | 71 | this.easyMatches = new CharSeq[Regex.EASY_MATCHES.length]; 72 | for(int i = 0; i < Regex.EASY_MATCHES.length; ++i) { 73 | this.easyMatches[i] = new IntCharSeq( Regex.EASY_MATCHES[i] ); 74 | } 75 | 76 | } 77 | 78 | int matches = 0; 79 | 80 | @TearDown 81 | public void tearDown() { 82 | System.out.println(this.matches); 83 | } 84 | 85 | @Benchmark 86 | public void benchMoaPattern() { 87 | this.matches = 0; 88 | for ( MoaPattern pattern : this.patterns ) { 89 | MoaMatcher matcher = pattern.matcher( this.sonnets ); 90 | while ( matcher.nextMatch() ) { 91 | ++matches; 92 | } 93 | } 94 | } 95 | 96 | @Benchmark 97 | public void benchMoaPatternEasy() { 98 | for ( MoaPattern pattern : this.patterns ) { 99 | for ( CharSeq easy : this.easyMatches ) { 100 | if ( !pattern.matcher( easy ).matches() ) { 101 | //throw new AssertionError( pattern + " did not match " + easy ); 102 | } 103 | } 104 | } 105 | } 106 | 107 | @Benchmark 108 | public void benchMoaTwoPowerN() { 109 | this.matches = 0; 110 | for(int i = 0; i < Regex.TWO_TO_POWER_OF_N_AND_OTHERS_CHARSEQ.length; ++i) { 111 | if(this.twoPowerOfN.matcher( Regex.TWO_TO_POWER_OF_N_AND_OTHERS_CHARSEQ[i] ).matches()) { 112 | ++this.matches; 113 | } 114 | } 115 | if(this.matches == 0) { 116 | throw new AssertionError(); 117 | } 118 | } 119 | 120 | } 121 | -------------------------------------------------------------------------------- /benchmark/src/main/java/com/github/s4ke/moar/benchmark/Regex.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.benchmark; 25 | 26 | import com.github.s4ke.moar.util.CharSeq; 27 | import com.github.s4ke.moar.util.IntCharSeq; 28 | 29 | /** 30 | * @author Martin Braun 31 | */ 32 | public class Regex { 33 | 34 | public static final String[] REGEX_TO_BENCH = new String[] { 35 | "th(e)\\1+", 36 | "fairest", 37 | "from", 38 | "beauty", 39 | "foe", 40 | "f((riend)|(oe))", 41 | "[A-Z]([a-z])+", 42 | "shall besiege", 43 | "(c)?old" 44 | }; 45 | 46 | public static final String[] EASY_MATCHES = new String[] { 47 | "the", 48 | "thee", 49 | "fairest", 50 | "from", 51 | "beauty", 52 | "foe", 53 | "friend", 54 | "Asdfwqekadkweiqkdkqew", 55 | "shall besiege", 56 | "cold", 57 | "old" 58 | }; 59 | 60 | public static final String[] TWO_TO_POWER_OF_N_AND_OTHERS = new String[128]; 61 | public static final CharSeq[] TWO_TO_POWER_OF_N_AND_OTHERS_CHARSEQ = new CharSeq[128]; 62 | static { 63 | StringBuilder builder = new StringBuilder( "a" ); 64 | //so that we start at 2 a's in arr[0] 65 | for(int i = 0; i < TWO_TO_POWER_OF_N_AND_OTHERS.length; ++i) { 66 | String curStr = builder.append( "a" ).toString(); 67 | TWO_TO_POWER_OF_N_AND_OTHERS[i] = curStr; 68 | TWO_TO_POWER_OF_N_AND_OTHERS_CHARSEQ[i] = new IntCharSeq( curStr ); 69 | } 70 | } 71 | 72 | public static final String TWO_TO_POWER_OF_N_MOA = "(a(\\1)+)"; 73 | 74 | } 75 | -------------------------------------------------------------------------------- /cli/exactMatch-test.txt: -------------------------------------------------------------------------------- 1 | test -------------------------------------------------------------------------------- /cli/multiline-test.txt: -------------------------------------------------------------------------------- 1 | This is a test. 2 | 3 | This is yet another test. 4 | 5 | This also. -------------------------------------------------------------------------------- /cli/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | moar-parent 7 | com.github.s4ke 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | moar-cli 13 | 14 | 15 | 16 | com.github.s4ke 17 | moar-engine 18 | ${project.version} 19 | 20 | 21 | 22 | com.github.s4ke 23 | moar-json 24 | ${project.version} 25 | 26 | 27 | 28 | commons-cli 29 | commons-cli 30 | 1.3.1 31 | 32 | 33 | 34 | 35 | 36 | 37 | maven-assembly-plugin 38 | 2.6 39 | 40 | 41 | jar-with-dependencies 42 | 43 | 44 | 45 | com.github.s4ke.moar.cli.Main 46 | 47 | 48 | 49 | 50 | 51 | make-assembly 52 | 53 | package 54 | 55 | 56 | single 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /cli/regex-test.txt: -------------------------------------------------------------------------------- 1 | "test 2 | 3 | test 4 | 5 | anotherTest -------------------------------------------------------------------------------- /documentation/img/CharSeq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/CharSeq.png -------------------------------------------------------------------------------- /documentation/img/EdgeGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/EdgeGraph.png -------------------------------------------------------------------------------- /documentation/img/Edges.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/Edges.png -------------------------------------------------------------------------------- /documentation/img/MatchInfo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/MatchInfo.png -------------------------------------------------------------------------------- /documentation/img/Moa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/Moa.png -------------------------------------------------------------------------------- /documentation/img/MoaMatcher.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/MoaMatcher.png -------------------------------------------------------------------------------- /documentation/img/MoaPattern.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/MoaPattern.png -------------------------------------------------------------------------------- /documentation/img/Regex_Interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/Regex_Interface.png -------------------------------------------------------------------------------- /documentation/img/Regex_interface_DSL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/Regex_interface_DSL.png -------------------------------------------------------------------------------- /documentation/img/Variable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/Variable.png -------------------------------------------------------------------------------- /documentation/img/state_interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/img/state_interface.png -------------------------------------------------------------------------------- /documentation/implementation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/documentation/implementation.pdf -------------------------------------------------------------------------------- /engine/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | moar-engine 8 | 9 | 10 | moar-parent 11 | com.github.s4ke 12 | 1.0-SNAPSHOT 13 | ../pom.xml 14 | 15 | 16 | 17 | UTF-8 18 | 19 | 4.5.3 20 | 21 | 22 | 23 | 24 | 25 | org.apache.maven.plugins 26 | maven-compiler-plugin 27 | 3.2 28 | 29 | 1.8 30 | 1.8 31 | 32 | 33 | 34 | 35 | org.antlr 36 | antlr4-maven-plugin 37 | ${antlr.version} 38 | 39 | 40 | antlr 41 | 42 | antlr4 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | junit 53 | junit 54 | 4.12 55 | test 56 | 57 | 58 | 59 | com.google.guava 60 | guava 61 | 19.0 62 | 63 | 64 | 65 | org.antlr 66 | antlr4-runtime 67 | ${antlr.version} 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /engine/src/main/antlr4/com/github/s4ke/moar/regex/parser/Regex.g4: -------------------------------------------------------------------------------- 1 | grammar Regex; 2 | 3 | /** 4 | * Grammar for parsing Perl/Java-style Regexes 5 | * after: http://www.cs.sfu.ca/~cameron/Teaching/384/99-3/regexp-plg.html 6 | * but with left recursion eliminated 7 | */ 8 | 9 | regex: 10 | EOF 11 | | startBoundary? union endBoundary? EOF; 12 | 13 | startBoundary : 14 | START 15 | | prevMatch; 16 | prevMatch : ESC 'G'; 17 | 18 | endBoundary : 19 | EOS 20 | | endOfInput; 21 | endOfInput : ESC 'z'; 22 | 23 | union : 24 | concatenation 25 | | union '|' concatenation; 26 | 27 | concatenation : 28 | basicRegex 29 | | basicRegex concatenation; 30 | 31 | basicRegex : 32 | star 33 | | plus 34 | | orEpsilon 35 | | elementaryRegex; 36 | 37 | star : 38 | elementaryRegex '*'; 39 | plus : 40 | elementaryRegex '+'; 41 | orEpsilon: 42 | elementaryRegex '?'; 43 | 44 | elementaryRegex : 45 | backRef 46 | | group 47 | | set 48 | | charOrEscaped 49 | | stockSets 50 | | ANY; 51 | 52 | group : 53 | '(' (capturingGroup | nonCapturingGroup) ')'; 54 | capturingGroup : ('?' '<' groupName '>')? union?; 55 | nonCapturingGroup: '?' ':' union?; 56 | groupName : character+; 57 | 58 | backRef : 59 | ESC number 60 | | ESC 'k' '<' groupName '>'; 61 | 62 | set : 63 | positiveSet 64 | | negativeSet; 65 | positiveSet : '[' setItems ']'; 66 | negativeSet : '[^' setItems ']'; 67 | setItems : 68 | setItem 69 | | setItem setItems; 70 | setItem : 71 | range 72 | | charOrEscaped; 73 | range : 74 | charOrEscaped '-' charOrEscaped; 75 | 76 | //should these be handled in the TreeListener? 77 | //if so, we could patch stuff easily without changing 78 | //the grammar 79 | stockSets: 80 | whiteSpace 81 | | nonWhiteSpace 82 | | digit 83 | | nonDigit 84 | | wordCharacter 85 | | nonWordCharacter; 86 | whiteSpace : ESC 's'; 87 | nonWhiteSpace : ESC 'S'; 88 | digit : ESC 'd'; 89 | nonDigit : ESC 'D'; 90 | wordCharacter : ESC 'w'; 91 | nonWordCharacter : ESC 'W'; 92 | 93 | charOrEscaped : 94 | character 95 | | escapeSeq 96 | | UTF_32_MARKER utf32 UTF_32_MARKER; 97 | // this odd separation of unused chars and "used chars" is due to ANTLR 98 | // processing in two phases. At first, only the token rules (in CAPS) are 99 | // and then the rules are used. For normal grammars (for programming languages) 100 | // this is fine, but in our case this means this extra (and ugly) work. 101 | // Due to this, every single char that is to be matched must be tokenized 102 | // (the ones that are not part of a NAMED token rule are just implicitly made 103 | // into their own token rule). Every "non special" char 104 | // (the ones that are not explicitly mentioned) is therefore tokenized 105 | // into UNUSED_CHAR. 106 | // This approach is by far easier than a hand written parser, though. 107 | character : (UNUSED_CHARS | ZERO | ONE_TO_NINE | 's' | 'S' | 'd' | 'D' | 'w' | 'W' | 'k' | 'z' | 'G' | ':' | '<' | '>' ); 108 | escapeSeq : ESC escapee; 109 | escapee : '[' | ']' | '(' | ')' 110 | | ESC | ANY | EOS | START | UTF_32_MARKER 111 | | '*' | '+' | '?' 112 | | '-' ; 113 | utf32 : (character | escapeSeq)+; 114 | 115 | number : ONE_TO_NINE (ZERO | ONE_TO_NINE)*; 116 | 117 | ZERO : '0'; 118 | ONE_TO_NINE : [1-9]; 119 | ESC : '\\'; 120 | ANY : '.'; 121 | EOS : '$'; 122 | START : '^'; 123 | UTF_32_MARKER : '~'; 124 | 125 | UNUSED_CHARS : 126 | ~('0' .. '9' 127 | | '[' | ']' | '(' | ')' 128 | | '\\' | '.' | '$' | '^' 129 | | '*' | '+' | '?' 130 | | ':' 131 | | 's' | 'S' | 'd' | 'D' | 'w' | 'W' | 'k' | 'z' | 'G' 132 | | '~'); -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/MoaMatcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar; 25 | 26 | import com.github.s4ke.moar.util.CharSeq; 27 | 28 | /** 29 | * Similar to Java Patterns this encapsulates the 30 | * automaton state during matching. These objects 31 | * are usually much more lightweight, but can still 32 | * be reused for minimum object creation overhead. 33 | * 34 | * @author Martin Braun 35 | */ 36 | public interface MoaMatcher { 37 | 38 | /** 39 | * same as {@link MoaMatcher#reuse(CharSeq)} but with a {@link com.github.s4ke.moar.util.IntCharSeq} to represent 40 | * basic Java CharSequences 41 | * @param str the CharSequence that this object is to be used with 42 | * @return this (for chaining purposes) 43 | */ 44 | MoaMatcher reuse(CharSequence str); 45 | 46 | /** 47 | * reuse this instance without recreating it 48 | * @param seq the CharSeq that this object is to be used with 49 | * @return this (for chaining purposes) 50 | */ 51 | MoaMatcher reuse(CharSeq seq); 52 | 53 | /** 54 | * replaces the first match of the underlying {@link MoaPattern} with the given String 55 | * and returns the result 56 | * @param replacement the String to replace the match with 57 | * @return the resulting String 58 | */ 59 | String replaceFirst(String replacement); 60 | 61 | /** 62 | * replaces all matches of the underlying {@link MoaPattern} with the given String 63 | * and returns the result 64 | * @param replacement the string to replace the matches with 65 | * @return the resulting string 66 | */ 67 | String replaceAll(String replacement); 68 | 69 | /** 70 | * @return the start of the current match 71 | */ 72 | int getStart(); 73 | 74 | /** 75 | * @return the end of the current match (exclusive) 76 | */ 77 | int getEnd(); 78 | 79 | /** 80 | * goes through the input and searches for the next match 81 | * (maximum length) the underlying {@link MoaPattern} can produce. 82 | * The next call of this method will start at the end of the last match 83 | * (if any) 84 | * 85 | * @return true iff there was a match 86 | */ 87 | boolean nextMatch(); 88 | 89 | /** 90 | * treats the whole input atomically and tries to match the underlying {@link MoaPattern} against it 91 | */ 92 | boolean matches(); 93 | 94 | /** 95 | * @param occurence the variable occurence index to return (1-based) 96 | * @return the contents of the variable as a String 97 | * @throws IllegalArgumentException if Variable is non-existent 98 | */ 99 | String getVariableContent(int occurence); 100 | 101 | /** 102 | * @param name the name of the variable to return 103 | * @return the contents of the variable as a String 104 | * @throws IllegalArgumentException if Variable is non-existent 105 | */ 106 | String getVariableContent(String name); 107 | 108 | } 109 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/MoaPattern.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar; 25 | 26 | import com.github.s4ke.moar.moa.Moa; 27 | import com.github.s4ke.moar.regex.Regex; 28 | import com.github.s4ke.moar.regex.parser.RegexCompiler; 29 | import com.github.s4ke.moar.util.Accessor; 30 | import com.github.s4ke.moar.util.CharSeq; 31 | 32 | /** 33 | * Deterministic Regexes with BackReferences using {@link Moa}'s. 34 | * Works with CodePoints so that matching against UTF-32 Strings is 35 | * possible. 36 | * 37 | * @author Martin Braun 38 | */ 39 | public final class MoaPattern { 40 | 41 | private final Moa moa; 42 | private final String regex; 43 | 44 | private MoaPattern(Moa moa, String regex) { 45 | this.moa = moa; 46 | this.regex = regex; 47 | } 48 | 49 | /** 50 | * @return the underlying regex as a String or null if no String representation is available 51 | */ 52 | public String getRegex() { 53 | return this.regex; 54 | } 55 | 56 | /** 57 | * compiles the given Regex String into a {@link MoaPattern}. 58 | * 59 | * @param regexStr the Regex String to parse into a {@link MoaPattern} 60 | * 61 | * @return the {@link MoaPattern} that represents the given Regex String 62 | */ 63 | public static MoaPattern compile(String regexStr) { 64 | try { 65 | return new MoaPattern( RegexCompiler.compile( regexStr ).toMoa(), regexStr ); 66 | } 67 | catch (NonDeterministicException e) { 68 | throw new NonDeterministicException( "The regex \"" + regexStr + "\" is not deterministic", e ); 69 | } 70 | } 71 | 72 | /** 73 | * compiles the given Regex into a {@link MoaPattern}. 74 | * 75 | * @param regex the Regex to compile into a {@link MoaPattern} 76 | * 77 | * @return the {@link MoaPattern} that represents the given Regex 78 | */ 79 | public static MoaPattern compile(Regex regex) { 80 | return new MoaPattern( regex.toMoa(), regex.toString() ); 81 | } 82 | 83 | /** 84 | * constructs a {@link MoaPattern} from a manually built {@link Moa} 85 | * 86 | * @param moa the underlying Moa to use with this {@link MoaPattern} 87 | * @param regex the Regex String for description purposes 88 | * 89 | * @return the {@link MoaPattern} that uses the given {@link Moa} 90 | */ 91 | public static MoaPattern build(Moa moa, String regex) { 92 | return new MoaPattern( moa, regex ); 93 | } 94 | 95 | /** 96 | * same as {@link MoaPattern#matcher(CharSequence)} but with native Java CharSequences 97 | * 98 | * @param str the CharSequence to match against 99 | * 100 | * @return the resulting {@link MoaMatcher} 101 | */ 102 | public MoaMatcher matcher(CharSequence str) { 103 | return this.moa.matcher( str ); 104 | } 105 | 106 | /** 107 | * constructs a {@link MoaMatcher} that uses the {@link Moa} represented by this object 108 | * for matching against the given {@link CharSeq} 109 | * 110 | * @param seq the CharSeq to match against 111 | * 112 | * @return the resulting {@link MoaMatcher} 113 | */ 114 | public MoaMatcher matcher(CharSeq seq) { 115 | return this.moa.matcher( seq ); 116 | } 117 | 118 | /** 119 | * EXPERTS-ONLY 120 | * direct access to the underlying {@link Moa} 121 | * 122 | * @param accessor the accessor Function 123 | */ 124 | public void accessMoa(Accessor accessor) { 125 | accessor.access( this.moa ); 126 | } 127 | 128 | @Override 129 | public String toString() { 130 | return this.regex; 131 | } 132 | 133 | } 134 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/NonDeterministicException.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar; 25 | 26 | /** 27 | * Exception that is thrown if non determinism is found during the compilation or while running the {@link com.github.s4ke.moar.moa.Moa} 28 | * 29 | * @author Martin Braun 30 | */ 31 | public class NonDeterministicException extends RuntimeException { 32 | 33 | public NonDeterministicException(String message) { 34 | super( message ); 35 | } 36 | 37 | public NonDeterministicException(String message, Throwable cause) { 38 | super( message, cause ); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/Moa.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa; 25 | 26 | import java.util.Collections; 27 | import java.util.HashMap; 28 | import java.util.HashSet; 29 | import java.util.Map; 30 | import java.util.Set; 31 | 32 | import com.github.s4ke.moar.MoaMatcher; 33 | import com.github.s4ke.moar.NonDeterministicException; 34 | import com.github.s4ke.moar.moa.edgegraph.ActionType; 35 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 36 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 37 | import com.github.s4ke.moar.moa.states.State; 38 | import com.github.s4ke.moar.moa.states.Variable; 39 | import com.github.s4ke.moar.util.CharSeq; 40 | import com.github.s4ke.moar.util.IntCharSeq; 41 | 42 | 43 | /** 44 | * Basic implementation of a Memory Occurence Automaton with support 45 | * for backreferences using Variable States. 46 | * 47 | * @author Martin Braun 48 | */ 49 | public final class Moa { 50 | 51 | public static final State SRC = EdgeGraph.SRC; 52 | public static final State SNK = EdgeGraph.SNK; 53 | 54 | private Map vars = new HashMap<>(); 55 | private EdgeGraph edges = new EdgeGraph(); 56 | 57 | private boolean frozen = false; 58 | 59 | public void freeze() { 60 | this.frozen = true; 61 | this.edges.freeze(); 62 | if ( !this.edges.isDeterministic() ) { 63 | throw new NonDeterministicException( "this moa is not deterministic" ); 64 | } 65 | } 66 | 67 | public void checkNotFrozen() { 68 | if ( this.frozen ) { 69 | throw new IllegalStateException( "this Moa is frozen" ); 70 | } 71 | this.edges.checkNotFrozen(); 72 | } 73 | 74 | public void checkFrozen() { 75 | if ( !this.frozen ) { 76 | throw new IllegalStateException( "this Moa is not frozen" ); 77 | } 78 | } 79 | 80 | public EdgeGraph getEdges() { 81 | this.checkFrozen(); 82 | return this.edges; 83 | } 84 | 85 | public void setEdges(EdgeGraph edges) { 86 | this.checkNotFrozen(); 87 | this.edges = edges; 88 | } 89 | 90 | public static Set f(Set a1, Set a2) { 91 | Set ret = new HashSet<>(); 92 | Set variablesHandled = new HashSet<>(); 93 | for ( MemoryAction ma : a2 ) { 94 | if ( ma.actionType == ActionType.OPEN || ma.actionType == ActionType.RESET ) { 95 | ret.add( ma ); 96 | variablesHandled.add( ma.variable ); 97 | } 98 | } 99 | for ( MemoryAction ma : a1 ) { 100 | if ( !variablesHandled.contains( ma.variable ) ) { 101 | if ( ma.actionType == ActionType.OPEN || ma.actionType == ActionType.RESET || ma.actionType == ActionType.CLOSE ) { 102 | ret.add( ma ); 103 | } 104 | } 105 | } 106 | return ret; 107 | } 108 | 109 | public void setVariables(Map variables) { 110 | this.checkNotFrozen(); 111 | this.vars = variables; 112 | } 113 | 114 | public Map getVars() { 115 | return Collections.unmodifiableMap( this.vars ); 116 | } 117 | 118 | public MoaMatcher matcher(CharSeq charSeq) { 119 | this.checkFrozen(); 120 | Map varCopy = new HashMap<>( this.vars.size() ); 121 | for ( Map.Entry entry : this.vars.entrySet() ) { 122 | varCopy.put( entry.getKey(), new Variable( entry.getValue() ) ); 123 | } 124 | return new MoaMatcherImpl( this.edges, varCopy, charSeq ); 125 | } 126 | 127 | public MoaMatcher matcher(CharSequence str) { 128 | return this.matcher( new IntCharSeq( str ) ); 129 | } 130 | 131 | public boolean check(CharSequence str) { 132 | return this.matcher( str ).matches(); 133 | } 134 | 135 | public boolean check(CharSeq charSeq) { 136 | return this.matcher( charSeq ).matches(); 137 | } 138 | 139 | @Override 140 | public boolean equals(Object o) { 141 | if ( this == o ) { 142 | return true; 143 | } 144 | if ( o == null || getClass() != o.getClass() ) { 145 | return false; 146 | } 147 | 148 | Moa moa = (Moa) o; 149 | 150 | if ( frozen != moa.frozen ) { 151 | return false; 152 | } 153 | if ( vars != null ? !vars.equals( moa.vars ) : moa.vars != null ) { 154 | return false; 155 | } 156 | return !(edges != null ? !edges.equals( moa.edges ) : moa.edges != null); 157 | 158 | } 159 | 160 | @Override 161 | public int hashCode() { 162 | int result = vars != null ? vars.hashCode() : 0; 163 | result = 31 * result + (edges != null ? edges.hashCode() : 0); 164 | result = 31 * result + (frozen ? 1 : 0); 165 | return result; 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/edgegraph/ActionType.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.edgegraph; 25 | 26 | import com.github.s4ke.moar.moa.states.Variable; 27 | 28 | /** 29 | * @author Martin Braun 30 | */ 31 | public enum ActionType { 32 | OPEN { 33 | @Override 34 | public void act(String variableName, Variable val) { 35 | val.open(); 36 | //important so we don't merge stuff like this x{a}x{b} into something like x{ab} 37 | val.contents.reset(); 38 | } 39 | 40 | @Override 41 | public String toString(String variableName) { 42 | return String.format( "o(%s)", variableName ); 43 | } 44 | }, 45 | CLOSE { 46 | @Override 47 | public void act(String variableName, Variable val) { 48 | val.close(); 49 | } 50 | 51 | @Override 52 | public String toString(String variableName) { 53 | return String.format( "c(%s)", variableName ); 54 | } 55 | }, 56 | RESET { 57 | @Override 58 | public void act(String variableName, Variable val) { 59 | val.close(); 60 | val.contents.reset(); 61 | } 62 | 63 | @Override 64 | public String toString(String variableName) { 65 | return String.format( "r(%s)", variableName ); 66 | } 67 | }; 68 | 69 | public static ActionType fromString(String str) { 70 | switch ( str ) { 71 | case "o": 72 | return OPEN; 73 | case "c": 74 | return CLOSE; 75 | case "r": 76 | return RESET; 77 | default: 78 | throw new IllegalArgumentException( "unrecognized ActionType String representation: " + str ); 79 | } 80 | } 81 | 82 | public abstract void act(String variableName, Variable val); 83 | 84 | public abstract String toString(String variableName); 85 | } 86 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/edgegraph/CurStateHolder.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.edgegraph; 25 | 26 | import com.github.s4ke.moar.moa.states.State; 27 | 28 | /** 29 | * @author Martin Braun 30 | */ 31 | public interface CurStateHolder { 32 | 33 | State getState(); 34 | 35 | void setState(State state); 36 | } 37 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/edgegraph/MemoryAction.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.edgegraph; 25 | 26 | import java.util.Map; 27 | 28 | import com.github.s4ke.moar.moa.states.Variable; 29 | 30 | /** 31 | * @author Martin Braun 32 | */ 33 | public class MemoryAction { 34 | 35 | public static final MemoryAction NO_OP = null; 36 | 37 | public final ActionType actionType; 38 | public final String variable; 39 | 40 | public MemoryAction(ActionType actionType, String variable) { 41 | this.actionType = actionType; 42 | this.variable = variable; 43 | } 44 | 45 | public void act(Map variables) { 46 | Variable val = variables.get( this.variable ); 47 | if ( val == null ) { 48 | throw new AssertionError( "variable with name " + this.variable + " not found" ); 49 | } 50 | this.actionType.act( this.variable, val ); 51 | } 52 | 53 | @Override 54 | public String toString() { 55 | return "var=" + this.variable + 56 | "action=" + this.actionType; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/BasicState.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import java.util.Map; 27 | 28 | import com.github.s4ke.moar.strings.EfficientString; 29 | 30 | /** 31 | * Basic implementation of {@link State} that represents a 32 | * a String as a token. Currently this is expected to be a String 33 | * of length 1. 34 | * 35 | * @author Martin Braun 36 | */ 37 | public class BasicState implements State { 38 | 39 | public final int idx; 40 | public final EfficientString string; 41 | 42 | public BasicState(int idx, String string) { 43 | this.idx = idx; 44 | this.string = new EfficientString( string ); 45 | } 46 | 47 | public EfficientString getToken() { 48 | return this.string; 49 | } 50 | 51 | @Override 52 | public int getIdx() { 53 | return this.idx; 54 | } 55 | 56 | @Override 57 | public EfficientString getEdgeString(Map variables) { 58 | return this.string; 59 | } 60 | 61 | @Override 62 | public boolean canConsume(EfficientString string) { 63 | throw new UnsupportedOperationException(); 64 | } 65 | 66 | @Override 67 | public boolean canConsume(MatchInfo matchInfo) { 68 | throw new UnsupportedOperationException(); 69 | } 70 | 71 | @Override 72 | public boolean isStatic() { 73 | return true; 74 | } 75 | 76 | @Override 77 | public boolean isSet() { 78 | return false; 79 | } 80 | 81 | @Override 82 | public boolean isVariable() { 83 | return false; 84 | } 85 | 86 | @Override 87 | public boolean isBound() { 88 | return false; 89 | } 90 | 91 | @Override 92 | public String toString() { 93 | return "BasicState{" + 94 | "idx=" + idx + 95 | ", string='" + string + '\'' + 96 | '}'; 97 | } 98 | 99 | @Override 100 | public boolean equals(Object o) { 101 | if ( this == o ) { 102 | return true; 103 | } 104 | if ( o == null || getClass() != o.getClass() ) { 105 | return false; 106 | } 107 | 108 | BasicState that = (BasicState) o; 109 | 110 | if ( idx != that.idx ) { 111 | return false; 112 | } 113 | return !(string != null ? !string.equals( that.string ) : that.string != null); 114 | 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/BoundState.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import java.util.Map; 27 | import java.util.function.Function; 28 | 29 | import com.github.s4ke.moar.strings.EfficientString; 30 | 31 | /** 32 | * Implementation of {@link State} that represents boundary matches like the start of input or the end of input. 33 | * 34 | * @author Martin Braun 35 | */ 36 | public class BoundState implements State { 37 | 38 | private final int idx; 39 | private final String boundHandled; 40 | private final Function condition; 41 | 42 | public BoundState(int idx, String boundHandled, Function condition) { 43 | this.idx = idx; 44 | this.condition = condition; 45 | this.boundHandled = boundHandled; 46 | } 47 | 48 | public String getBoundHandled() { 49 | return this.boundHandled; 50 | } 51 | 52 | @Override 53 | public int getIdx() { 54 | return this.idx; 55 | } 56 | 57 | @Override 58 | public EfficientString getEdgeString(Map variables) { 59 | throw new UnsupportedOperationException(); 60 | } 61 | 62 | @Override 63 | public boolean canConsume(EfficientString string) { 64 | throw new UnsupportedOperationException(); 65 | } 66 | 67 | @Override 68 | public boolean canConsume(MatchInfo matchInfo) { 69 | return this.condition.apply( matchInfo ); 70 | } 71 | 72 | @Override 73 | public boolean isStatic() { 74 | return false; 75 | } 76 | 77 | @Override 78 | public boolean isSet() { 79 | return false; 80 | } 81 | 82 | @Override 83 | public boolean isVariable() { 84 | return false; 85 | } 86 | 87 | @Override 88 | public boolean isBound() { 89 | return true; 90 | } 91 | 92 | @Override 93 | public boolean equals(Object o) { 94 | if ( this == o ) { 95 | return true; 96 | } 97 | if ( o == null || getClass() != o.getClass() ) { 98 | return false; 99 | } 100 | 101 | BoundState that = (BoundState) o; 102 | 103 | if ( idx != that.idx ) { 104 | return false; 105 | } 106 | return !(boundHandled != null ? !boundHandled.equals( that.boundHandled ) : that.boundHandled != null); 107 | 108 | } 109 | 110 | @Override 111 | public String toString() { 112 | return "BoundState{" + 113 | "idx=" + idx + 114 | ", boundHandled='" + boundHandled + '\'' + 115 | ", condition=" + condition + 116 | '}'; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/MatchInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import com.github.s4ke.moar.strings.EfficientString; 27 | import com.github.s4ke.moar.util.CharSeq; 28 | 29 | /** 30 | * @author Martin Braun 31 | */ 32 | public class MatchInfo { 33 | 34 | private EfficientString string; 35 | private CharSeq wholeString; 36 | private int pos = 0; 37 | private int lastMatch = -1; 38 | 39 | public int getPos() { 40 | return pos; 41 | } 42 | 43 | public void setPos(int pos) { 44 | this.pos = pos; 45 | } 46 | 47 | public EfficientString getString() { 48 | return string; 49 | } 50 | 51 | public CharSeq getWholeString() { 52 | return wholeString; 53 | } 54 | 55 | public void setWholeString(CharSeq wholeString) { 56 | this.wholeString = wholeString; 57 | } 58 | 59 | public void setString(EfficientString string) { 60 | this.string = string; 61 | } 62 | 63 | public int getLastMatch() { 64 | return this.lastMatch; 65 | } 66 | 67 | public void setLastMatch(int lastMatch) { 68 | this.lastMatch = lastMatch; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/SetState.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import java.util.Map; 27 | 28 | import com.github.s4ke.moar.strings.CodePointSet; 29 | import com.github.s4ke.moar.strings.EfficientString; 30 | 31 | /** 32 | * Implementation of a {@link State} that represents a whole range of characters. These 33 | * are represented by a {@link CodePointSet}. With this we don't have to create that 34 | * many states if we just want to allow every character that is part of a set (like [a-z], 1 state instead of 26) 35 | * 36 | * @author Martin Braun 37 | */ 38 | public class SetState implements State { 39 | 40 | public final int idx; 41 | public final int length; 42 | public final CodePointSet criterion; 43 | public final String stringRepresentation; 44 | 45 | public SetState(int idx, int length, CodePointSet criterion, String stringRepresentation) { 46 | this.idx = idx; 47 | this.length = length; 48 | this.criterion = criterion; 49 | this.stringRepresentation = stringRepresentation; 50 | } 51 | 52 | public String getStringRepresentation() { 53 | return this.stringRepresentation; 54 | } 55 | 56 | @Override 57 | public int getIdx() { 58 | return this.idx; 59 | } 60 | 61 | @Override 62 | public EfficientString getEdgeString(Map variables) { 63 | throw new UnsupportedOperationException(); 64 | } 65 | 66 | @Override 67 | public boolean canConsume(EfficientString string) { 68 | if ( string.codePointLength() == 0 ) { 69 | return false; 70 | } 71 | if ( string.codePointLength() > 1 ) { 72 | throw new AssertionError( "string's codePointLength was greater than 1" ); 73 | } 74 | return this.criterion.intersects( string.codePoint( 0 ) ); 75 | } 76 | 77 | @Override 78 | public boolean canConsume(MatchInfo matchInfo) { 79 | throw new UnsupportedOperationException(); 80 | } 81 | 82 | @Override 83 | public boolean isStatic() { 84 | return false; 85 | } 86 | 87 | @Override 88 | public boolean isSet() { 89 | return true; 90 | } 91 | 92 | @Override 93 | public boolean isVariable() { 94 | return false; 95 | } 96 | 97 | @Override 98 | public boolean isBound() { 99 | return false; 100 | } 101 | 102 | @Override 103 | public String toString() { 104 | if(this.stringRepresentation != null) { 105 | return this.stringRepresentation; 106 | } else { 107 | return this.criterion.toString(); 108 | } 109 | } 110 | 111 | @Override 112 | public boolean equals(Object o) { 113 | if ( this == o ) { 114 | return true; 115 | } 116 | if ( o == null || getClass() != o.getClass() ) { 117 | return false; 118 | } 119 | 120 | SetState setState = (SetState) o; 121 | 122 | if ( idx != setState.idx ) { 123 | return false; 124 | } 125 | if ( length != setState.length ) { 126 | return false; 127 | } 128 | return !(stringRepresentation != null ? 129 | !stringRepresentation.equals( setState.stringRepresentation ) : 130 | setState.stringRepresentation != null); 131 | 132 | } 133 | 134 | } 135 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/State.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import java.util.Map; 27 | 28 | import com.github.s4ke.moar.strings.EfficientString; 29 | 30 | /** 31 | * Interface that represents the States in a Memory Occurrence Automaton 32 | * 33 | * @author Martin Braun 34 | */ 35 | public interface State { 36 | 37 | /** 38 | * @return the idx of this state in the underlying {@link com.github.s4ke.moar.moa.edgegraph.EdgeGraph}. 39 | * This is an implementation detail that is needed for faster access. 40 | */ 41 | int getIdx(); 42 | 43 | /** 44 | * can only be used if either {@link State#isVariable()} or {@link State#isStatic()} returns true 45 | * 46 | * @param variables the current state of the variables 47 | * 48 | * @return the string that has to be read if the MOA is allowed to go to this state 49 | */ 50 | EfficientString getEdgeString(Map variables); 51 | 52 | /** 53 | * can only be used if either {@link State#isSet()} or {@link State#isStatic()} returns true 54 | * 55 | * @param string the {@link EfficientString} to check 56 | * 57 | * @return true if this State can "consume" the input 58 | */ 59 | boolean canConsume(EfficientString string); 60 | 61 | /** 62 | * can only be used if {@link State#isBound()} returns true. 63 | * 64 | * @param matchInfo the current Matching State 65 | * 66 | * @return true if this State can "consume" the input 67 | */ 68 | boolean canConsume(MatchInfo matchInfo); 69 | 70 | /** 71 | * @return true if this is a {@link BasicState} 72 | */ 73 | boolean isStatic(); 74 | 75 | /** 76 | * @return true if this is a {@link SetState} 77 | */ 78 | boolean isSet(); 79 | 80 | /** 81 | * @return true if this is a {@link VariableState} 82 | */ 83 | boolean isVariable(); 84 | 85 | /** 86 | * @return true if this is a {@link BoundState} 87 | */ 88 | boolean isBound(); 89 | 90 | } 91 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/Variable.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import com.github.s4ke.moar.strings.EfficientString; 27 | 28 | /** 29 | * This represents a Variable in a Memory Occurence Automaton 30 | * 31 | * @author Martin Braun 32 | */ 33 | public class Variable { 34 | 35 | public final EfficientString contents = new EfficientString(); 36 | public final String name; 37 | private boolean open = false; 38 | private int occurenceInRegex = -1; 39 | 40 | /** 41 | * does not copy state 42 | */ 43 | public Variable(Variable variable) { 44 | this( variable.name ); 45 | this.occurenceInRegex = variable.occurenceInRegex; 46 | } 47 | 48 | public Variable(String name) { 49 | this.name = name; 50 | } 51 | 52 | public String getName() { 53 | return this.name; 54 | } 55 | 56 | public void open() { 57 | this.open = true; 58 | } 59 | 60 | public boolean isOpen() { 61 | return this.open; 62 | } 63 | 64 | public void close() { 65 | this.open = false; 66 | } 67 | 68 | public int getOccurenceInRegex() { 69 | return this.occurenceInRegex; 70 | } 71 | 72 | public void setOccurenceInRegex(int occurenceInRegex) { 73 | this.occurenceInRegex = occurenceInRegex; 74 | } 75 | 76 | @Override 77 | public String toString() { 78 | return "Variable{" + 79 | "contents=" + contents + 80 | ", name='" + name + '\'' + 81 | ", open=" + open + 82 | '}'; 83 | } 84 | 85 | public EfficientString getEdgeString() { 86 | return this.contents; 87 | } 88 | 89 | public String getContents() { 90 | return this.contents.toString(); 91 | } 92 | 93 | public void reset() { 94 | this.contents.reset(); 95 | } 96 | 97 | public boolean canConsume() { 98 | return this.isOpen(); 99 | } 100 | 101 | public void consume(EfficientString str) { 102 | if ( !this.canConsume() ) { 103 | throw new IllegalStateException( "cannot consume at the moment!" ); 104 | } 105 | this.contents.appendOrOverwrite( str ); 106 | } 107 | 108 | @Override 109 | public boolean equals(Object o) { 110 | if ( this == o ) { 111 | return true; 112 | } 113 | if ( o == null || getClass() != o.getClass() ) { 114 | return false; 115 | } 116 | 117 | Variable variable = (Variable) o; 118 | 119 | if ( open != variable.open ) { 120 | return false; 121 | } 122 | if ( occurenceInRegex != variable.occurenceInRegex ) { 123 | return false; 124 | } 125 | return !(name != null ? !name.equals( variable.name ) : variable.name != null); 126 | 127 | } 128 | 129 | @Override 130 | public int hashCode() { 131 | int result = name != null ? name.hashCode() : 0; 132 | result = 31 * result + (open ? 1 : 0); 133 | result = 31 * result + occurenceInRegex; 134 | return result; 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/moa/states/VariableState.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.moa.states; 25 | 26 | import java.util.Map; 27 | 28 | import com.github.s4ke.moar.strings.EfficientString; 29 | 30 | /** 31 | * This represents a Variable State in a Memory Occurence Automaton 32 | * 33 | * @author Martin Braun 34 | */ 35 | public class VariableState implements State { 36 | 37 | private final int idx; 38 | private final String variableName; 39 | 40 | public VariableState(int idx, String variableName) { 41 | this.idx = idx; 42 | this.variableName = variableName; 43 | } 44 | 45 | public String getVariableName() { 46 | return this.variableName; 47 | } 48 | 49 | @Override 50 | public int getIdx() { 51 | return this.idx; 52 | } 53 | 54 | @Override 55 | public EfficientString getEdgeString(Map variables) { 56 | return variables.get( this.variableName ).getEdgeString(); 57 | } 58 | 59 | @Override 60 | public boolean canConsume(EfficientString string) { 61 | throw new UnsupportedOperationException(); 62 | } 63 | 64 | @Override 65 | public boolean canConsume(MatchInfo matchInfo) { 66 | throw new UnsupportedOperationException(); 67 | } 68 | 69 | @Override 70 | public boolean isStatic() { 71 | return false; 72 | } 73 | 74 | @Override 75 | public boolean isSet() { 76 | return false; 77 | } 78 | 79 | @Override 80 | public boolean isVariable() { 81 | return true; 82 | } 83 | 84 | @Override 85 | public boolean isBound() { 86 | return false; 87 | } 88 | 89 | @Override 90 | public String toString() { 91 | return "VariableState{" + 92 | "idx=" + idx + 93 | ", variableName=" + variableName + 94 | '}'; 95 | } 96 | 97 | @Override 98 | public boolean equals(Object o) { 99 | if ( this == o ) { 100 | return true; 101 | } 102 | if ( o == null || getClass() != o.getClass() ) { 103 | return false; 104 | } 105 | 106 | VariableState that = (VariableState) o; 107 | 108 | if ( idx != that.idx ) { 109 | return false; 110 | } 111 | return !(variableName != null ? !variableName.equals( that.variableName ) : that.variableName != null); 112 | 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Binding.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.Set; 28 | import java.util.function.Supplier; 29 | 30 | import com.github.s4ke.moar.NonDeterministicException; 31 | import com.github.s4ke.moar.moa.Moa; 32 | import com.github.s4ke.moar.moa.edgegraph.ActionType; 33 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 34 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 35 | import com.github.s4ke.moar.moa.states.State; 36 | import com.github.s4ke.moar.moa.states.Variable; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | final class Binding implements Regex { 42 | 43 | private final String name; 44 | private final Regex regex; 45 | 46 | Binding(String name, Regex regex) { 47 | this.name = name; 48 | this.regex = regex; 49 | } 50 | 51 | @Override 52 | public String toString() { 53 | return "(?<" + this.name + ">" + this.regex.toString() + ")"; 54 | } 55 | 56 | @Override 57 | public Regex copy() { 58 | return new Binding( this.name, this.regex.copy() ); 59 | } 60 | 61 | @Override 62 | public void contributeStates( 63 | Map variables, 64 | Set states, 65 | Map> selfRelevant, 66 | Supplier idxSupplier) { 67 | this.regex.contributeStates( variables, states, selfRelevant, idxSupplier ); 68 | } 69 | 70 | @Override 71 | public void contributeEdges( 72 | EdgeGraph edgeGraph, 73 | Map variables, 74 | Set states, 75 | Map> selfRelevant) { 76 | if ( !variables.containsKey( this.name ) ) { 77 | Variable var = new Variable( name ); 78 | variables.put( name, var ); 79 | } 80 | 81 | this.regex.contributeEdges( edgeGraph, variables, states, selfRelevant ); 82 | 83 | for ( EdgeGraph.Edge edge : edgeGraph.getEdges( Moa.SRC ) ) { 84 | if ( edge.destination != Moa.SNK.getIdx() ) { 85 | edge.memoryAction.add( new MemoryAction( ActionType.OPEN, this.name ) ); 86 | } 87 | else { 88 | edge.memoryAction.add( new MemoryAction( ActionType.RESET, this.name ) ); 89 | } 90 | } 91 | edgeGraph.getStates().stream().filter( state -> state != Moa.SRC ).forEach( 92 | state -> { 93 | edgeGraph.getEdges( state ).stream().filter( edge -> edge.destination == Moa.SNK.getIdx() ).forEach( 94 | edge -> { 95 | edge.memoryAction.add( new MemoryAction( ActionType.CLOSE, this.name ) ); 96 | } 97 | ); 98 | } 99 | ); 100 | } 101 | 102 | @Override 103 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 104 | Variable variable = variables.get( this.name ); 105 | variable.setOccurenceInRegex( varIdxSupplier.get() ); 106 | 107 | this.regex.calculateVariableOccurences( variables, varIdxSupplier ); 108 | } 109 | 110 | } 111 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/BoundConstants.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Arrays; 27 | import java.util.Set; 28 | import java.util.function.Function; 29 | import java.util.stream.Collectors; 30 | 31 | import com.github.s4ke.moar.moa.states.MatchInfo; 32 | import com.github.s4ke.moar.strings.EfficientString; 33 | import com.github.s4ke.moar.util.CharSeq; 34 | 35 | /** 36 | * Constants for the supported boundary checks 37 | * 38 | * @author Martin Braun 39 | */ 40 | public final class BoundConstants { 41 | 42 | private BoundConstants() { 43 | //can't touch this! 44 | } 45 | 46 | public static final String START_OF_LINE = "^"; 47 | public static final String END_OF_LINE = "$"; 48 | public static final String END_OF_INPUT = "\\z"; 49 | public static final String END_OF_LAST_MATCH = "\\G"; 50 | 51 | public static Function getFN(String boundIdent) { 52 | switch ( boundIdent ) { 53 | case START_OF_LINE: 54 | return CARET_FN; 55 | case END_OF_LINE: 56 | return DOLLAR_FN; 57 | case END_OF_INPUT: 58 | return END_OF_INPUT_FN; 59 | case END_OF_LAST_MATCH: 60 | return END_OF_LAST_MATCH_FN; 61 | default: 62 | throw new IllegalArgumentException( "boundIdent " + boundIdent + " not found." ); 63 | } 64 | } 65 | 66 | public static Function CARET_FN = (mi) -> { 67 | // Perl does not match ^ at end of input even after newline 68 | if ( mi.getPos() > mi.getWholeString().codePointLength() - 1 ) { 69 | return false; 70 | } 71 | if ( mi.getPos() == 0 ) { 72 | return true; 73 | } 74 | for ( EfficientString eff : BoundConstants.LINE_BREAK_CHARS ) { 75 | int length = eff.codePointLength(); 76 | CharSeq whole = mi.getWholeString(); 77 | //zero-based position 78 | if ( mi.getPos() >= length ) { 79 | boolean eq = true; 80 | int charPos = 0; 81 | for ( int i = length; i > 0; --i ) { 82 | if ( whole.codePoint( mi.getPos() - i ) != eff.codePoint( charPos++ ) ) { 83 | eq = false; 84 | break; 85 | } 86 | } 87 | if ( eq ) { 88 | return true; 89 | } 90 | } 91 | } 92 | return false; 93 | }; 94 | 95 | public static final Function DOLLAR_FN = (mi) -> { 96 | //we are at the end, so match the dollar sign 97 | if ( mi.getPos() == mi.getWholeString().codePointLength() ) { 98 | return true; 99 | } 100 | //check if the following stuff is the end of input 101 | for ( EfficientString eff : BoundConstants.LINE_BREAK_CHARS ) { 102 | int length = eff.codePointLength(); 103 | CharSeq whole = mi.getWholeString(); 104 | //zero-based position 105 | if ( mi.getPos() + length <= mi.getWholeString().codePointLength() ) { 106 | boolean eq = true; 107 | for ( int i = 0; i < length; ++i ) { 108 | if ( whole.codePoint( mi.getPos() + i ) != eff.codePoint( i ) ) { 109 | eq = false; 110 | break; 111 | } 112 | } 113 | if ( eq ) { 114 | return true; 115 | } 116 | } 117 | } 118 | return false; 119 | }; 120 | 121 | public static final Function END_OF_INPUT_FN = (mi) -> mi.getPos() == mi.getWholeString() 122 | .codePointLength(); 123 | 124 | public static final Function END_OF_LAST_MATCH_FN = (mi) -> 125 | mi.getLastMatch() == -1 || mi.getPos() == mi.getLastMatch(); 126 | 127 | public static final Set LINE_BREAK_CHARS = Arrays.asList( 128 | "\n", 129 | "\r\n", 130 | "\u2029", 131 | //Java pattern logic... 132 | String.valueOf( (char) ('\u2029' - 1) ), 133 | "\u0085" 134 | ).stream().map( EfficientString::new ).collect( 135 | Collectors.toSet() 136 | ); 137 | 138 | } 139 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/BoundaryRegex.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.Set; 29 | import java.util.function.Function; 30 | import java.util.function.Supplier; 31 | 32 | import com.github.s4ke.moar.moa.Moa; 33 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 34 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 35 | import com.github.s4ke.moar.moa.states.BoundState; 36 | import com.github.s4ke.moar.moa.states.MatchInfo; 37 | import com.github.s4ke.moar.moa.states.State; 38 | import com.github.s4ke.moar.moa.states.Variable; 39 | 40 | /** 41 | * @author Martin Braun 42 | */ 43 | class BoundaryRegex implements Regex { 44 | 45 | private static final String SELF_RELEVANT_KEY = ""; 46 | 47 | private final String boundHandled; 48 | private final Function matchDescriptor; 49 | 50 | public BoundaryRegex(String boundHandled, Function matchDescriptor) { 51 | this.boundHandled = boundHandled; 52 | this.matchDescriptor = matchDescriptor; 53 | } 54 | 55 | @Override 56 | public void contributeEdges( 57 | EdgeGraph edgeGraph, 58 | Map variables, 59 | Set states, 60 | Map> selfRelevant) { 61 | State state = selfRelevant.get( this ).get( SELF_RELEVANT_KEY ); 62 | edgeGraph.addEdgeWithDeterminismCheck( Moa.SRC, new EdgeGraph.Edge( MemoryAction.NO_OP, state ), this ); 63 | edgeGraph.addEdgeWithDeterminismCheck( state, new EdgeGraph.Edge( MemoryAction.NO_OP, Moa.SNK ), this ); 64 | } 65 | 66 | @Override 67 | public void contributeStates( 68 | Map variables, 69 | Set states, 70 | Map> selfRelevant, 71 | Supplier idxSupplier) { 72 | State state = new BoundState( idxSupplier.get(), this.boundHandled, this.matchDescriptor ); 73 | states.add( state ); 74 | states.add( Moa.SRC ); 75 | states.add( Moa.SNK ); 76 | selfRelevant.put( this, new HashMap<>() ); 77 | selfRelevant.get( this ).put( SELF_RELEVANT_KEY, state ); 78 | } 79 | 80 | @Override 81 | public void calculateVariableOccurences( 82 | Map variables, Supplier varIdxSupplier) { 83 | 84 | } 85 | 86 | @Override 87 | public Regex copy() { 88 | return new BoundaryRegex( this.boundHandled, this.matchDescriptor ); 89 | } 90 | 91 | @Override 92 | public String toString() { 93 | if ( this.boundHandled != null ) { 94 | return this.boundHandled; 95 | } 96 | else { 97 | return this.matchDescriptor.toString(); 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/CharacterClassesUtils.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Arrays; 27 | import java.util.HashSet; 28 | import java.util.Set; 29 | import java.util.stream.Collectors; 30 | 31 | import com.github.s4ke.moar.strings.CodePointSet; 32 | import com.github.s4ke.moar.util.RangeRep; 33 | import com.google.common.collect.RangeSet; 34 | import com.google.common.collect.TreeRangeSet; 35 | 36 | /** 37 | * Utilities for Character Classes 38 | * 39 | * @author Martin Braun 40 | */ 41 | public class CharacterClassesUtils { 42 | 43 | private static final Set WHITE_SPACE_CHARS = new HashSet<>( 44 | Arrays.asList( 45 | new Integer[] { 46 | (int) ' ', 47 | (int) '\t', 48 | (int) '\n', 49 | 0x0B, 50 | (int) '\f', 51 | (int) '\r' 52 | } 53 | ) 54 | ); 55 | 56 | private static final Set DIGITS = 57 | Arrays.asList( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ) 58 | .stream() 59 | .map( String::valueOf ) 60 | .map( (str) -> str.codePointAt( 0 ) ).collect( Collectors.toSet() ); 61 | 62 | private CharacterClassesUtils() { 63 | //can't touch this! 64 | } 65 | 66 | public static final String ANY = "."; 67 | public static final String WHITE_SPACE = "\\s"; 68 | public static final String NON_WHITE_SPACE = "\\S"; 69 | public static final String DIGIT = "\\d"; 70 | public static final String NON_DIGIT = "\\D"; 71 | public static final String WORD_CHARACTER = "\\w"; 72 | public static final String NON_WORD_CHARACTER = "\\W"; 73 | 74 | public static CodePointSet getFn(String identifier) { 75 | switch ( identifier ) { 76 | case ANY: 77 | return ANY_FN; 78 | case WHITE_SPACE: 79 | return WHITE_SPACE_FN; 80 | case NON_WHITE_SPACE: 81 | return NON_WHITE_SPACE_FN; 82 | case DIGIT: 83 | return DIGIT_FN; 84 | case NON_DIGIT: 85 | return NON_DIGIT_FN; 86 | case WORD_CHARACTER: 87 | return WORD_CHARACTER_FN; 88 | case NON_WORD_CHARACTER: 89 | return NON_WORD_CHARACTER_FN; 90 | default: 91 | throw new IllegalArgumentException( "unrecognized character class identifier: " + identifier ); 92 | } 93 | } 94 | 95 | public static final CodePointSet ANY_FN = CodePointSet.range( RangeRep.of( Integer.MIN_VALUE, Integer.MAX_VALUE ) ); 96 | 97 | public static final CodePointSet WHITE_SPACE_FN = CodePointSet.set( WHITE_SPACE_CHARS ); 98 | 99 | public static final CodePointSet NON_WHITE_SPACE_FN = WHITE_SPACE_FN.negative(); 100 | 101 | public static final CodePointSet DIGIT_FN = CodePointSet.set( DIGITS ); 102 | 103 | public static final CodePointSet NON_DIGIT_FN = DIGIT_FN.negative(); 104 | 105 | public static final CodePointSet WORD_CHARACTER_FN = CodePointSet.range( 106 | fromTo( 107 | 'a', 108 | 'z' 109 | ), fromTo( 'A', 'Z' ), fromTo( '0', '9' ), fromTo( '_', '_' ) 110 | ); 111 | 112 | public static final CodePointSet NON_WORD_CHARACTER_FN = WORD_CHARACTER_FN.negative(); 113 | 114 | static RangeRep fromTo(int from, int to) { 115 | return RangeRep.of( from, to ); 116 | } 117 | 118 | public static CodePointSet positiveFn(Set ranges) { 119 | RangeSet rangeSet = TreeRangeSet.create(); 120 | for ( RangeRep rangeRep : ranges ) { 121 | rangeSet.addAll( rangeRep.getRangeSet() ); 122 | } 123 | return CodePointSet.range( RangeRep.of( rangeSet ) ); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Choice.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.Set; 28 | import java.util.function.Supplier; 29 | 30 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 31 | import com.github.s4ke.moar.moa.states.State; 32 | import com.github.s4ke.moar.moa.states.Variable; 33 | 34 | /** 35 | * @author Martin Braun 36 | */ 37 | final class Choice implements Regex { 38 | 39 | private final Regex fst; 40 | private final Regex snd; 41 | 42 | public Choice(Regex fst, Regex snd) { 43 | this.fst = fst; 44 | this.snd = snd; 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return this.fst.toString() + "|" + this.snd.toString(); 50 | } 51 | 52 | @Override 53 | public Regex copy() { 54 | return new Choice( this.fst.copy(), this.snd.copy() ); 55 | } 56 | 57 | @Override 58 | public void contributeStates( 59 | Map variables, 60 | Set states, 61 | Map> selfRelevant, 62 | Supplier idxSupplier) { 63 | this.fst.contributeStates( variables, states, selfRelevant, idxSupplier ); 64 | this.snd.contributeStates( variables, states, selfRelevant, idxSupplier ); 65 | } 66 | 67 | @Override 68 | public void contributeEdges( 69 | EdgeGraph edgeGraph, 70 | Map variables, 71 | Set states, 72 | Map> selfRelevant) { 73 | EdgeGraph eg1 = new EdgeGraph(); 74 | for ( State state : states ) { 75 | eg1.addState( state ); 76 | } 77 | this.fst.contributeEdges( eg1, variables, states, selfRelevant ); 78 | 79 | EdgeGraph eg2 = new EdgeGraph(); 80 | for ( State state : states ) { 81 | eg2.addState( state ); 82 | } 83 | this.snd.contributeEdges( eg2, variables, states, selfRelevant ); 84 | 85 | for ( State state : states ) { 86 | edgeGraph.addEdgesWithDeterminismCheck( state, eg1.getEdges( state ) ); 87 | edgeGraph.addEdgesWithDeterminismCheck( state, eg2.getEdges( state ) ); 88 | } 89 | } 90 | 91 | @Override 92 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 93 | this.fst.calculateVariableOccurences( variables, varIdxSupplier ); 94 | this.snd.calculateVariableOccurences( variables, varIdxSupplier ); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Concat.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Collection; 27 | import java.util.HashMap; 28 | import java.util.HashSet; 29 | import java.util.List; 30 | import java.util.Map; 31 | import java.util.Set; 32 | import java.util.function.Supplier; 33 | 34 | import com.github.s4ke.moar.moa.Moa; 35 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 36 | import com.github.s4ke.moar.moa.states.State; 37 | import com.github.s4ke.moar.moa.states.Variable; 38 | 39 | /** 40 | * @author Martin Braun 41 | */ 42 | final class Concat implements Regex { 43 | 44 | private final Regex fst; 45 | private final Regex snd; 46 | 47 | Concat(Regex fst, Regex snd) { 48 | this.fst = fst; 49 | this.snd = snd; 50 | } 51 | 52 | @Override 53 | public String toString() { 54 | return this.fst.toString() + this.snd.toString(); 55 | } 56 | 57 | @Override 58 | public Regex copy() { 59 | return new Concat( this.fst.copy(), this.snd.copy() ); 60 | } 61 | 62 | 63 | @Override 64 | public void contributeStates( 65 | Map variables, 66 | Set states, 67 | Map> selfRelevant, 68 | Supplier idxSupplier) { 69 | this.fst.contributeStates( variables, states, selfRelevant, idxSupplier ); 70 | this.snd.contributeStates( variables, states, selfRelevant, idxSupplier ); 71 | } 72 | 73 | @Override 74 | public void contributeEdges( 75 | EdgeGraph edgeGraph, 76 | Map variables, 77 | Set states, 78 | Map> selfRelevant) { 79 | EdgeGraph eg1 = new EdgeGraph(); 80 | for ( State state : states ) { 81 | eg1.addState( state ); 82 | } 83 | this.fst.contributeEdges( eg1, variables, states, selfRelevant ); 84 | 85 | EdgeGraph eg2 = new EdgeGraph(); 86 | for ( State state : states ) { 87 | eg2.addState( state ); 88 | } 89 | this.snd.contributeEdges( eg2, variables, states, selfRelevant ); 90 | 91 | Map> snkEdges = new HashMap<>(); 92 | Set srcEdges = new HashSet<>(); 93 | 94 | for ( State state : states ) { 95 | Collection edges = eg1.getEdges( state ); 96 | for ( EdgeGraph.Edge edge : edges ) { 97 | if ( !edge.destination.equals( Moa.SNK.getIdx() ) ) { 98 | edgeGraph.addEdgeWithDeterminismCheck( state, edge, this ); 99 | } 100 | else { 101 | snkEdges.computeIfAbsent( state, (key) -> new HashSet<>() ).add( edge ); 102 | } 103 | } 104 | } 105 | 106 | for ( State state : states ) { 107 | Collection edges = eg2.getEdges( state ); 108 | for ( EdgeGraph.Edge edge : edges ) { 109 | if ( state.getIdx() != (Moa.SRC.getIdx()) ) { 110 | edgeGraph.addEdgeWithDeterminismCheck( state, edge, this ); 111 | } 112 | else { 113 | srcEdges.add( edge ); 114 | } 115 | } 116 | } 117 | 118 | for ( Map.Entry> snkEdgeEntry : snkEdges.entrySet() ) { 119 | State toSnkState = snkEdgeEntry.getKey(); 120 | for ( EdgeGraph.Edge snkEdge : snkEdgeEntry.getValue() ) { 121 | for ( EdgeGraph.Edge srcEdge : srcEdges ) { 122 | Integer fromSrcState = srcEdge.destination; 123 | edgeGraph.addEdgeWithDeterminismCheck( 124 | toSnkState, new EdgeGraph.Edge( 125 | Moa.f( 126 | snkEdge.memoryAction, 127 | srcEdge.memoryAction 128 | ), fromSrcState 129 | ), 130 | this 131 | ); 132 | } 133 | } 134 | } 135 | } 136 | 137 | @Override 138 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 139 | this.fst.calculateVariableOccurences( variables, varIdxSupplier ); 140 | this.snd.calculateVariableOccurences( variables, varIdxSupplier ); 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/EdgeContributor.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.Set; 28 | 29 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 30 | import com.github.s4ke.moar.moa.states.State; 31 | import com.github.s4ke.moar.moa.states.Variable; 32 | 33 | /** 34 | * @author Martin Braun 35 | */ 36 | interface EdgeContributor { 37 | 38 | void contributeEdges( 39 | EdgeGraph edgeGraph, Map variables, Set states, 40 | Map> selfRelevant); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Epsilon.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.Set; 28 | import java.util.function.Supplier; 29 | 30 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 31 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 32 | import com.github.s4ke.moar.moa.Moa; 33 | import com.github.s4ke.moar.moa.states.State; 34 | import com.github.s4ke.moar.moa.states.Variable; 35 | 36 | /** 37 | * @author Martin Braun 38 | */ 39 | final class Epsilon implements Regex { 40 | 41 | private Epsilon() { 42 | 43 | } 44 | 45 | public static final Epsilon INSTANCE = new Epsilon(); 46 | 47 | @Override 48 | public String toString() { 49 | return "{epsilon}"; 50 | } 51 | 52 | @Override 53 | public Regex copy() { 54 | return this; 55 | } 56 | 57 | @Override 58 | public void contributeStates( 59 | Map variables, 60 | Set states, 61 | Map> selfRelevant, 62 | Supplier idxSupplier) { 63 | states.add( Moa.SRC ); 64 | states.add( Moa.SNK ); 65 | } 66 | 67 | @Override 68 | public void contributeEdges( 69 | EdgeGraph edgeGraph, 70 | Map variables, 71 | Set states, 72 | Map> selfRelevant) { 73 | edgeGraph.addEdgeWithDeterminismCheck( Moa.SRC, new EdgeGraph.Edge( MemoryAction.NO_OP, Moa.SNK ), this ); 74 | } 75 | 76 | @Override 77 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 78 | //no-op 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Plus.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.ArrayList; 27 | import java.util.Collection; 28 | import java.util.List; 29 | import java.util.Map; 30 | import java.util.Set; 31 | import java.util.function.Supplier; 32 | 33 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 34 | import com.github.s4ke.moar.moa.Moa; 35 | import com.github.s4ke.moar.moa.states.State; 36 | import com.github.s4ke.moar.moa.states.Variable; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | final class Plus implements Regex { 42 | 43 | private final Regex regex; 44 | 45 | Plus(Regex regex) { 46 | this.regex = regex; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return this.regex.toString() + "+"; 52 | } 53 | 54 | @Override 55 | public Regex copy() { 56 | return new Plus( this.regex.copy() ); 57 | } 58 | 59 | @Override 60 | public void contributeStates( 61 | Map variables, 62 | Set states, 63 | Map> selfRelevant, 64 | Supplier idxSupplier) { 65 | this.regex.contributeStates( variables, states, selfRelevant, idxSupplier ); 66 | } 67 | 68 | @Override 69 | public void contributeEdges( 70 | EdgeGraph edgeGraph, 71 | Map variables, 72 | Set states, 73 | Map> selfRelevant) { 74 | this.regex.contributeEdges( edgeGraph, variables, states, selfRelevant ); 75 | 76 | Collection srcEdges = edgeGraph.getEdges( Moa.SRC ); 77 | 78 | //used to avoid concurrent modification 79 | List> addActions = new ArrayList<>(); 80 | 81 | for ( State toSnkState : states ) { 82 | for ( EdgeGraph.Edge snkEdge : edgeGraph.getEdges( toSnkState ) ) { 83 | if ( snkEdge.destination.equals( Moa.SNK.getIdx() ) ) { 84 | //now we are a real SNK edge 85 | for ( EdgeGraph.Edge srcEdge : srcEdges ) { 86 | Integer fromSrcState = srcEdge.destination; 87 | Supplier val = () -> { 88 | edgeGraph.addEdgeWithDeterminismCheck( 89 | toSnkState, new EdgeGraph.Edge( 90 | Moa.f( 91 | snkEdge.memoryAction, 92 | srcEdge.memoryAction 93 | ), fromSrcState 94 | ), 95 | this 96 | ); 97 | return null; 98 | }; 99 | addActions.add( val ); 100 | } 101 | } 102 | } 103 | } 104 | 105 | addActions.forEach( Supplier::get ); 106 | } 107 | 108 | @Override 109 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 110 | this.regex.calculateVariableOccurences( variables, varIdxSupplier ); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Primitive.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.Set; 29 | import java.util.function.Supplier; 30 | 31 | import com.github.s4ke.moar.moa.states.BasicState; 32 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 33 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 34 | import com.github.s4ke.moar.moa.Moa; 35 | import com.github.s4ke.moar.moa.states.State; 36 | import com.github.s4ke.moar.moa.states.Variable; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | final class Primitive implements Regex { 42 | 43 | private Symbol symbol; 44 | 45 | Primitive(Symbol symbol) { 46 | this.symbol = symbol; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return this.symbol.toString(); 52 | } 53 | 54 | @Override 55 | public Regex copy() { 56 | return new Primitive( new Symbol( this.symbol.symbol ) ); 57 | } 58 | 59 | @Override 60 | public void contributeStates( 61 | Map variables, 62 | Set states, 63 | Map> selfRelevant, 64 | Supplier idxSupplier) { 65 | State state = new BasicState( idxSupplier.get(), this.symbol.symbol ); 66 | states.add( state ); 67 | states.add( Moa.SRC ); 68 | states.add( Moa.SNK ); 69 | selfRelevant.put( this, new HashMap<>() ); 70 | selfRelevant.get( this ).put( this.symbol.symbol, state ); 71 | } 72 | 73 | @Override 74 | public void contributeEdges( 75 | EdgeGraph edgeGraph, 76 | Map variables, 77 | Set states, 78 | Map> selfRelevant) { 79 | State state = selfRelevant.get( this ).get( this.symbol.symbol ); 80 | edgeGraph.addEdgeWithDeterminismCheck( Moa.SRC, new EdgeGraph.Edge( MemoryAction.NO_OP, state ), this ); 81 | edgeGraph.addEdgeWithDeterminismCheck( state, new EdgeGraph.Edge( MemoryAction.NO_OP, Moa.SNK ), this ); 82 | } 83 | 84 | @Override 85 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 86 | //no-op 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Reference.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.Set; 29 | import java.util.function.Supplier; 30 | 31 | import com.github.s4ke.moar.moa.Moa; 32 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 33 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 34 | import com.github.s4ke.moar.moa.states.State; 35 | import com.github.s4ke.moar.moa.states.Variable; 36 | import com.github.s4ke.moar.moa.states.VariableState; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | final class Reference implements Regex { 42 | 43 | private final String reference; 44 | 45 | Reference(String reference) { 46 | this.reference = reference; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return "\\k<" + this.reference + ">"; 52 | } 53 | 54 | @Override 55 | public Regex copy() { 56 | return this; 57 | } 58 | 59 | @Override 60 | public void contributeStates( 61 | Map variables, 62 | Set states, 63 | Map> selfRelevant, 64 | Supplier idxSupplier) { 65 | if ( !variables.containsKey( this.reference ) ) { 66 | Variable var = new Variable( this.reference ); 67 | variables.put( this.reference, var ); 68 | } 69 | states.add( Moa.SRC ); 70 | states.add( Moa.SNK ); 71 | 72 | VariableState varState = new VariableState( idxSupplier.get(), this.reference ); 73 | states.add( varState ); 74 | selfRelevant.put( this, new HashMap<>() ); 75 | selfRelevant.get( this ).put( this.reference, varState ); 76 | } 77 | 78 | @Override 79 | public void contributeEdges( 80 | EdgeGraph edgeGraph, 81 | Map variables, 82 | Set states, 83 | Map> selfRelevant) { 84 | VariableState varState = (VariableState) selfRelevant.get( this ).get( this.reference ); 85 | edgeGraph.addEdgeWithDeterminismCheck( Moa.SRC, new EdgeGraph.Edge( MemoryAction.NO_OP, varState ), this ); 86 | edgeGraph.addEdgeWithDeterminismCheck( varState, new EdgeGraph.Edge( MemoryAction.NO_OP, Moa.SNK ), this ); 87 | } 88 | 89 | @Override 90 | public void calculateVariableOccurences(Map variables, Supplier varIdxSupplier) { 91 | //no-op 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Regex.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.HashMap; 27 | import java.util.HashSet; 28 | import java.util.Map; 29 | import java.util.Set; 30 | import java.util.concurrent.atomic.AtomicInteger; 31 | import java.util.function.Supplier; 32 | 33 | import com.github.s4ke.moar.NonDeterministicException; 34 | import com.github.s4ke.moar.moa.Moa; 35 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 36 | import com.github.s4ke.moar.moa.states.State; 37 | import com.github.s4ke.moar.moa.states.Variable; 38 | import com.github.s4ke.moar.strings.CodePointSet; 39 | import com.github.s4ke.moar.util.RangeRep; 40 | 41 | import static com.github.s4ke.moar.regex.CharacterClassesUtils.NON_WORD_CHARACTER_FN; 42 | 43 | /** 44 | * Interface representing a Regex in Java Code. This interface serves two purposes: 45 | *
    46 | *
  1. representation of parsed Regex Strings
  2. 47 | *
  3. DSL-style creation of Regexes in Java Code
  4. 48 | *
49 | *
50 | *
51 | * The DSL style for the Regex (a|b)c looks like this: 52 | * {@code Regex.str("a").or("b").and("c")}. 53 | * 54 | * @author Martin Braun 55 | */ 56 | public interface Regex extends StateContributor, EdgeContributor, VariableOccurence { 57 | 58 | Regex CARET = new BoundaryRegex( 59 | BoundConstants.START_OF_LINE, BoundConstants.CARET_FN 60 | ); 61 | 62 | Regex DOLLAR = new BoundaryRegex( 63 | BoundConstants.END_OF_LINE, BoundConstants.DOLLAR_FN 64 | ); 65 | 66 | Regex END_OF_INPUT = new BoundaryRegex( 67 | BoundConstants.END_OF_INPUT, 68 | BoundConstants.END_OF_INPUT_FN 69 | ); 70 | 71 | Regex END_OF_LAST_MATCH = new BoundaryRegex( 72 | BoundConstants.END_OF_LAST_MATCH, BoundConstants.END_OF_LAST_MATCH_FN 73 | ); 74 | 75 | static Regex caret() { 76 | return CARET; 77 | } 78 | 79 | static Regex dollar_() { 80 | return DOLLAR; 81 | } 82 | 83 | static Regex end_() { 84 | return END_OF_INPUT; 85 | } 86 | 87 | static Regex endOfLastMatch() { 88 | return END_OF_LAST_MATCH; 89 | } 90 | 91 | static Regex reference(String reference) { 92 | return new Reference( reference ); 93 | } 94 | 95 | static Regex eps() { 96 | return Epsilon.INSTANCE; 97 | } 98 | 99 | static Regex str(String str) { 100 | Regex ret = null; 101 | int[] codePoints = str.codePoints().toArray(); 102 | for ( int codePoint : codePoints ) { 103 | String codePointStr = new String( new int[] {codePoint}, 0, 1 ); 104 | if ( ret == null ) { 105 | ret = new Primitive( new Symbol( codePointStr ) ); 106 | } 107 | else { 108 | ret = ret.and( new Primitive( new Symbol( codePointStr ) ) ); 109 | } 110 | } 111 | if ( ret == null ) { 112 | return Regex.eps(); 113 | } 114 | return ret; 115 | } 116 | 117 | static Regex set(int from, int to) { 118 | return new SetRegex( 119 | CodePointSet.range( RangeRep.of( from, to ) ), 120 | "[" + RangeRep.of( from, to ).toString() + "]" 121 | ); 122 | } 123 | 124 | static Regex set(RangeRep... ranges) { 125 | if ( ranges.length == 0 ) { 126 | throw new IllegalArgumentException(); 127 | } 128 | StringBuilder stringRepresentation = new StringBuilder(); 129 | stringRepresentation.append( "[" ); 130 | for ( RangeRep range : ranges ) { 131 | range.append( stringRepresentation ); 132 | } 133 | stringRepresentation.append( "]" ); 134 | return new SetRegex( 135 | CodePointSet.range( ranges ), 136 | stringRepresentation.toString() 137 | ); 138 | } 139 | 140 | static Regex negativeSet(final RangeRep... ranges) { 141 | if ( ranges.length == 0 ) { 142 | throw new IllegalArgumentException( "ranges.length was equal to zero" ); 143 | } 144 | StringBuilder stringRepresentation = new StringBuilder(); 145 | stringRepresentation.append( "[^" ); 146 | for ( RangeRep range : ranges ) { 147 | range.append( stringRepresentation ); 148 | } 149 | stringRepresentation.append( "]" ); 150 | return new SetRegex( 151 | CodePointSet.range( ranges ).negative(), 152 | stringRepresentation.toString() 153 | ); 154 | } 155 | 156 | static Regex any_() { 157 | return new SetRegex( CharacterClassesUtils.ANY_FN, CharacterClassesUtils.ANY ); 158 | } 159 | 160 | static Regex whiteSpace() { 161 | return new SetRegex( CharacterClassesUtils.WHITE_SPACE_FN, CharacterClassesUtils.WHITE_SPACE ); 162 | } 163 | 164 | static Regex nonWhiteSpace() { 165 | return new SetRegex( CharacterClassesUtils.NON_WHITE_SPACE_FN, CharacterClassesUtils.NON_WHITE_SPACE ); 166 | } 167 | 168 | static Regex digit() { 169 | return new SetRegex( CharacterClassesUtils.DIGIT_FN, CharacterClassesUtils.DIGIT ); 170 | } 171 | 172 | static Regex nonDigit() { 173 | return new SetRegex( CharacterClassesUtils.NON_DIGIT_FN, CharacterClassesUtils.NON_DIGIT ); 174 | } 175 | 176 | static Regex wordCharacter() { 177 | return new SetRegex( 178 | CharacterClassesUtils.WORD_CHARACTER_FN 179 | , CharacterClassesUtils.WORD_CHARACTER 180 | ); 181 | } 182 | 183 | static Regex nonWordCharacter() { 184 | return new SetRegex( 185 | NON_WORD_CHARACTER_FN, CharacterClassesUtils.NON_WORD_CHARACTER 186 | ); 187 | } 188 | 189 | static Regex set(String from, String to) { 190 | return set( from.codePointAt( 0 ), to.codePointAt( 0 ) ); 191 | } 192 | 193 | default Regex or(Regex other) { 194 | return new Choice( this.copy(), other.copy() ); 195 | } 196 | 197 | default Regex or(String other) { 198 | return new Choice( this.copy(), str( other ) ); 199 | } 200 | 201 | default Regex and(Regex other) { 202 | return new Concat( this.copy(), other.copy() ); 203 | } 204 | 205 | default Regex and(String other) { 206 | return new Concat( this.copy(), str( other ) ); 207 | } 208 | 209 | default Regex plus() { 210 | return new Plus( this.copy() ); 211 | } 212 | 213 | default Regex star() { 214 | return this.plus().or( Regex.eps() ); 215 | } 216 | 217 | default Regex bind(String name) { 218 | return new Binding( name, this.copy() ); 219 | } 220 | 221 | default Regex dollar() { 222 | return this.and( dollar_() ); 223 | } 224 | 225 | default Regex end() { 226 | return this.and( end_() ); 227 | } 228 | 229 | default Regex any() { 230 | return this.and( any_() ); 231 | } 232 | 233 | //TODO: this can be done with a Stack and some clever handling 234 | //instead of recursion (or with a Trampoline) 235 | default Moa toMoa() { 236 | Moa moa = new Moa(); 237 | Map variables = new HashMap<>(); 238 | Set states = new HashSet<>(); 239 | Map> selfRelevant = new HashMap<>(); 240 | AtomicInteger stateIdxStart = new AtomicInteger( 2 ); 241 | Supplier idxSupplier = stateIdxStart::getAndIncrement; 242 | this.contributeStates( variables, states, selfRelevant, idxSupplier ); 243 | EdgeGraph edgeGraph = new EdgeGraph(); 244 | for ( State state : states ) { 245 | edgeGraph.addState( state ); 246 | } 247 | this.contributeEdges( edgeGraph, variables, states, selfRelevant ); 248 | //we start at 1 for variables just like Java Regexes 249 | AtomicInteger varIdxStart = new AtomicInteger( 1 ); 250 | this.calculateVariableOccurences( variables, varIdxStart::getAndIncrement ); 251 | moa.setVariables( variables ); 252 | moa.setEdges( edgeGraph ); 253 | moa.freeze(); 254 | return moa; 255 | } 256 | 257 | Regex copy(); 258 | 259 | String toString(); 260 | 261 | } 262 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/SetRegex.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | import java.util.Set; 29 | import java.util.function.Function; 30 | import java.util.function.Supplier; 31 | 32 | import com.github.s4ke.moar.moa.Moa; 33 | import com.github.s4ke.moar.moa.edgegraph.EdgeGraph; 34 | import com.github.s4ke.moar.moa.edgegraph.MemoryAction; 35 | import com.github.s4ke.moar.moa.states.SetState; 36 | import com.github.s4ke.moar.moa.states.State; 37 | import com.github.s4ke.moar.moa.states.Variable; 38 | import com.github.s4ke.moar.strings.CodePointSet; 39 | import com.github.s4ke.moar.strings.EfficientString; 40 | 41 | /** 42 | * @author Martin Braun 43 | */ 44 | class SetRegex implements Regex { 45 | 46 | private static final String SELF_RELEVANT_KEY = ""; 47 | 48 | private final CodePointSet setDescriptor; 49 | private final String stringRepresentation; 50 | 51 | public SetRegex(CodePointSet setDescriptor, String stringRepresentation) { 52 | this.setDescriptor = setDescriptor; 53 | this.stringRepresentation = stringRepresentation; 54 | } 55 | 56 | @Override 57 | public Regex copy() { 58 | return new SetRegex( this.setDescriptor, this.stringRepresentation ); 59 | } 60 | 61 | @Override 62 | public void contributeEdges( 63 | EdgeGraph edgeGraph, 64 | Map variables, 65 | Set states, 66 | Map> selfRelevant) { 67 | State state = selfRelevant.get( this ).get( SELF_RELEVANT_KEY ); 68 | edgeGraph.addEdgeWithDeterminismCheck( Moa.SRC, new EdgeGraph.Edge( MemoryAction.NO_OP, state ), this ); 69 | edgeGraph.addEdgeWithDeterminismCheck( state, new EdgeGraph.Edge( MemoryAction.NO_OP, Moa.SNK ), this ); 70 | } 71 | 72 | @Override 73 | public void contributeStates( 74 | Map variables, 75 | Set states, 76 | Map> selfRelevant, 77 | Supplier idxSupplier) { 78 | //we default to length 1 79 | State state = new SetState( idxSupplier.get(), 1, this.setDescriptor, stringRepresentation ); 80 | 81 | states.add( state ); 82 | states.add( Moa.SRC ); 83 | states.add( Moa.SNK ); 84 | selfRelevant.put( this, new HashMap<>() ); 85 | selfRelevant.get( this ).put( SELF_RELEVANT_KEY, state ); 86 | } 87 | 88 | @Override 89 | public void calculateVariableOccurences( 90 | Map variables, Supplier varIdxSupplier) { 91 | 92 | } 93 | 94 | @Override 95 | public String toString() { 96 | if ( this.stringRepresentation != null ) { 97 | return this.stringRepresentation; 98 | } 99 | else { 100 | return this.setDescriptor.toString(); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/StateContributor.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.Set; 28 | import java.util.function.Supplier; 29 | 30 | import com.github.s4ke.moar.moa.states.State; 31 | import com.github.s4ke.moar.moa.states.Variable; 32 | 33 | /** 34 | * @author Martin Braun 35 | */ 36 | interface StateContributor { 37 | 38 | void contributeStates( 39 | Map variables, Set states, 40 | Map> selfRelevant, Supplier idxSupplier); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/Symbol.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | final class Symbol { 30 | 31 | public final String symbol; 32 | 33 | Symbol(String symbol) { 34 | this.symbol = symbol; 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | return this.symbol; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/VariableOccurence.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.Map; 27 | import java.util.function.Supplier; 28 | 29 | import com.github.s4ke.moar.moa.states.Variable; 30 | 31 | /** 32 | * @author Martin Braun 33 | */ 34 | interface VariableOccurence { 35 | 36 | void calculateVariableOccurences(Map variables, Supplier varIdxSupplier); 37 | 38 | } 39 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/parser/RegexCompiler.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex.parser; 25 | 26 | import com.github.s4ke.moar.regex.Regex; 27 | import org.antlr.v4.runtime.ANTLRInputStream; 28 | import org.antlr.v4.runtime.BaseErrorListener; 29 | import org.antlr.v4.runtime.CommonToken; 30 | import org.antlr.v4.runtime.CommonTokenStream; 31 | import org.antlr.v4.runtime.RecognitionException; 32 | import org.antlr.v4.runtime.Recognizer; 33 | import org.antlr.v4.runtime.tree.ParseTreeWalker; 34 | 35 | /** 36 | * ANTLR based Regex compiler (end users should probably use {@link com.github.s4ke.moar.MoaPattern#compile(String)} instead of manually using this class) 37 | * 38 | * @author Martin Braun 39 | */ 40 | public final class RegexCompiler { 41 | 42 | private static RegexParser regexParser(String regexStr) { 43 | RegexLexer lexer = new RegexLexer( new ANTLRInputStream( regexStr ) ); 44 | RegexParser parser = new RegexParser( new CommonTokenStream( lexer ) ); 45 | parser.setBuildParseTree( true ); 46 | return parser; 47 | } 48 | 49 | public static Regex compile(String regexStr) { 50 | StringBuilder additionalMessage = new StringBuilder(); 51 | RegexParser parser = regexParser( regexStr ); 52 | parser.getErrorListeners().clear(); 53 | parser.addErrorListener( 54 | new BaseErrorListener() { 55 | @Override 56 | public void syntaxError( 57 | Recognizer recognizer, 58 | Object offendingSymbol, 59 | int line, 60 | int charPositionInLine, 61 | String msg, 62 | RecognitionException e) { 63 | additionalMessage.append( "SyntaxEception in Regex: \"" ) 64 | .append( regexStr ) 65 | .append( "\": " ) 66 | .append( msg ); 67 | if ( offendingSymbol instanceof CommonToken ) { 68 | CommonToken token = (CommonToken) offendingSymbol; 69 | if ( token.getText().equals( "*" ) || token.getText().equals( "+" ) || token.getText() 70 | .equals( "?" ) ) { 71 | additionalMessage.append( ", dangling metacharacter: '" ) 72 | .append( ((CommonToken) offendingSymbol).getText() ) 73 | .append( "' at line " ) 74 | .append( token.getLine() ) 75 | .append( ", pos " ) 76 | .append( token.getCharPositionInLine() ); 77 | } 78 | } 79 | } 80 | } 81 | ); 82 | RegexParser.RegexContext regexTree = parser.regex(); 83 | if ( parser.getNumberOfSyntaxErrors() > 0 ) { 84 | throw new IllegalArgumentException( "malformed regex found : " + regexStr + "\n" + additionalMessage.toString() ); 85 | } 86 | ParseTreeWalker walker = new ParseTreeWalker(); 87 | RegexGroupNameListener nameListener = new RegexGroupNameListener(); 88 | walker.walk( nameListener, regexTree ); 89 | 90 | RegexTreeListener listener = new RegexTreeListener( nameListener.getGroupNames() ); 91 | walker.walk( listener, regexTree ); 92 | 93 | return listener.finalRegex(); 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/regex/parser/RegexGroupNameListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex.parser; 25 | 26 | import java.util.HashMap; 27 | import java.util.Map; 28 | 29 | /** 30 | * This is meant to be used in a first pass while converting the Regex in order 31 | * to get the correct groupNames for the \\number references that can occur, 32 | * before the actual group is known 33 | * 34 | * @author Martin Braun 35 | */ 36 | public class RegexGroupNameListener extends RegexBaseListener implements RegexListener { 37 | 38 | private int groupCount = 0; 39 | private final Map groupNames = new HashMap<>(); 40 | 41 | @Override 42 | public void enterCapturingGroup(RegexParser.CapturingGroupContext ctx) { 43 | int groupIdx = ++this.groupCount; 44 | String regexName; 45 | if ( ctx.groupName() == null ) { 46 | regexName = String.valueOf( groupIdx ); 47 | } 48 | else { 49 | regexName = ctx.groupName().getText(); 50 | } 51 | this.groupNames.put( groupIdx, regexName ); 52 | } 53 | 54 | public Map getGroupNames() { 55 | return groupNames; 56 | } 57 | 58 | 59 | } 60 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/strings/CodePointSet.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.strings; 25 | 26 | import java.util.AbstractSet; 27 | import java.util.Iterator; 28 | import java.util.Set; 29 | 30 | import com.github.s4ke.moar.util.RangeRep; 31 | import com.google.common.collect.TreeRangeSet; 32 | 33 | /** 34 | * @author Martin Braun 35 | */ 36 | public class CodePointSet { 37 | 38 | private final Set set; 39 | private final RangeRep range; 40 | 41 | private CodePointSet(Set set, RangeRep ranges) { 42 | this.set = set; 43 | this.range = ranges; 44 | } 45 | 46 | public static CodePointSet range(RangeRep ranges) { 47 | return new CodePointSet( null, ranges ); 48 | } 49 | 50 | public static CodePointSet range(RangeRep... ranges) { 51 | TreeRangeSet rangeSet = TreeRangeSet.create(); 52 | for ( RangeRep rangeRep : ranges ) { 53 | rangeSet.addAll( rangeRep.getRangeSet() ); 54 | } 55 | return new CodePointSet( null, RangeRep.of( rangeSet ) ); 56 | } 57 | 58 | public static CodePointSet set(Set set) { 59 | return new CodePointSet( set, null ); 60 | } 61 | 62 | public CodePointSet negative() { 63 | RangeRep negativeRange = null; 64 | if ( this.range != null ) { 65 | negativeRange = this.range.negative(); 66 | } 67 | Set negativeSet = null; 68 | if ( this.set != null ) { 69 | negativeSet = new AbstractSet() { 70 | @Override 71 | public Iterator iterator() { 72 | throw new UnsupportedOperationException(); 73 | } 74 | 75 | @Override 76 | public int size() { 77 | throw new UnsupportedOperationException(); 78 | } 79 | 80 | @Override 81 | public boolean contains(Object o) { 82 | return !CodePointSet.this.set.contains( o ); 83 | } 84 | }; 85 | } 86 | return new CodePointSet( negativeSet, negativeRange ); 87 | } 88 | 89 | public boolean intersects(int codePoint) { 90 | if ( this.range != null ) { 91 | return this.range.intersects( codePoint ); 92 | } 93 | else if ( this.set != null ) { 94 | return this.set.contains( codePoint ); 95 | } 96 | else { 97 | throw new AssertionError(); 98 | } 99 | } 100 | 101 | public boolean intersects(CodePointSet other) { 102 | if ( this.set != null ) { 103 | if ( other.set != null ) { 104 | return intersects( this.set, other.set ); 105 | } 106 | else if ( other.range != null ) { 107 | for ( Integer val : this.set ) { 108 | if ( other.intersects( val ) ) { 109 | return true; 110 | } 111 | } 112 | return false; 113 | } 114 | else { 115 | throw new AssertionError( "wtf" ); 116 | } 117 | } 118 | else if ( this.range != null ) { 119 | if ( other.set != null ) { 120 | for ( Integer val : other.set ) { 121 | if ( this.intersects( val ) ) { 122 | return true; 123 | } 124 | } 125 | return false; 126 | } 127 | else if ( other.range != null ) { 128 | return this.range.intersects( other.range ); 129 | } 130 | else { 131 | throw new AssertionError( "wtf" ); 132 | } 133 | } 134 | else { 135 | throw new AssertionError( "wtf" ); 136 | } 137 | } 138 | 139 | private static boolean intersects(Set fst, Set snd) { 140 | for ( Integer f : fst ) { 141 | if ( snd.contains( f ) ) { 142 | return true; 143 | } 144 | } 145 | for ( Integer s : snd ) { 146 | if ( fst.contains( s ) ) { 147 | return true; 148 | } 149 | } 150 | return false; 151 | } 152 | 153 | 154 | } 155 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/strings/EfficientString.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.strings; 25 | 26 | import com.github.s4ke.moar.util.CharSeq; 27 | import com.github.s4ke.moar.util.IntCharSeq; 28 | 29 | /** 30 | *

31 | * A String representation implementation that allows us to 32 | * create SubSequences without 33 | * having to reallocate arrays. 34 | *

35 | *

36 | * This helps to reduce the amount of time spent 37 | * building String objects and also reduces the amount 38 | * of memory used compared to basic Strings. 39 | *

40 | * 41 | * @author Martin Braun 42 | */ 43 | public class EfficientString implements Comparable { 44 | 45 | private CharSeq underlying; 46 | private int start; 47 | private int end; 48 | 49 | public EfficientString() { 50 | this.underlying = null; 51 | this.start = 0; 52 | this.end = 0; 53 | } 54 | 55 | public EfficientString(CharSeq underlying, int start, int end) { 56 | this.underlying = underlying; 57 | this.start = start; 58 | this.end = end; 59 | } 60 | 61 | public EfficientString(CharSeq underlying) { 62 | this.underlying = underlying; 63 | this.start = 0; 64 | this.end = underlying.codePointLength(); 65 | } 66 | 67 | public EfficientString(CharSequence underlying) { 68 | this( new IntCharSeq( underlying ) ); 69 | } 70 | 71 | public void update(EfficientString underlying, int start, int end) { 72 | this.underlying = underlying.underlying; 73 | this.start = start; 74 | this.end = end; 75 | } 76 | 77 | public void appendOrOverwrite(EfficientString substring) { 78 | if ( this.underlying == null ) { 79 | this.update( substring, substring.start, substring.end ); 80 | } 81 | else { 82 | if ( this.end != substring.start || this.underlying != substring.underlying ) { 83 | this.update( substring, substring.start, substring.end ); 84 | } 85 | this.end = substring.end; 86 | } 87 | } 88 | 89 | public void update(CharSeq underlying, int start, int end) { 90 | this.underlying = underlying; 91 | this.start = start; 92 | this.end = end; 93 | } 94 | 95 | public void reset() { 96 | this.underlying = null; 97 | this.start = 0; 98 | this.end = 0; 99 | } 100 | 101 | @Override 102 | public boolean equals(Object o) { 103 | if ( this == o ) { 104 | return true; 105 | } 106 | 107 | //these checks are important so we can have our special char behaviour 108 | //for i.e. ^ and $ 109 | if ( o == null || getClass() != o.getClass() ) { 110 | return false; 111 | } 112 | 113 | EfficientString efficientString = (EfficientString) o; 114 | 115 | return this.equalTo( efficientString ); 116 | } 117 | 118 | public boolean equalTo(EfficientString str) { 119 | int ownLength = this.codePointLength(); 120 | if ( ownLength != str.codePointLength() ) { 121 | return false; 122 | } 123 | for ( int i = 0; i < ownLength; ++i ) { 124 | if ( this.codePoint( i ) != str.codePoint( i ) ) { 125 | return false; 126 | } 127 | } 128 | return true; 129 | } 130 | 131 | @Override 132 | public int hashCode() { 133 | int result = 0; 134 | for ( int i = 0; i < this.codePointLength(); ++i ) { 135 | result = 31 * result + Integer.hashCode( this.codePoint( i ) ); 136 | } 137 | result = 31 * result + Integer.hashCode( this.codePointLength() ); 138 | return result; 139 | } 140 | 141 | public int codePointLength() { 142 | return this.end - start; 143 | } 144 | 145 | /** 146 | * @param index the nth codepoint 147 | */ 148 | public int codePoint(int index) { 149 | if ( this.underlying == null ) { 150 | throw new IndexOutOfBoundsException(); 151 | } 152 | return this.underlying.codePoint( this.start + index ); 153 | } 154 | 155 | public EfficientString subSequence(int start, int end) { 156 | if ( end - start > this.codePointLength() ) { 157 | throw new IndexOutOfBoundsException(); 158 | } 159 | return new EfficientString( this.underlying, this.start + start, this.end + end ); 160 | } 161 | 162 | @Override 163 | public String toString() { 164 | if ( this.underlying == null ) { 165 | return ""; 166 | } 167 | if ( this.codePointLength() == this.underlying.codePointLength() ) { 168 | return this.underlying.toString(); 169 | } 170 | return this.underlying.subSequence( this.start, this.end ); 171 | } 172 | 173 | @Override 174 | public int compareTo(EfficientString o) { 175 | //similar to Java's String comparison fn 176 | 177 | int len1 = this.codePointLength(); 178 | int len2 = o.codePointLength(); 179 | int lim = Math.min( len1, len2 ); 180 | 181 | int k = 0; 182 | while ( k < lim ) { 183 | int c1 = this.codePoint( k ); 184 | int c2 = o.codePoint( k ); 185 | if ( c1 != c2 ) { 186 | return c1 - c2; 187 | } 188 | k++; 189 | } 190 | return len1 - len2; 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/Accessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | public interface Accessor { 30 | 31 | void access(T moa); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/CharSeq.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | public interface CharSeq { 30 | int codePointLength(); 31 | 32 | int codePoint(int index); 33 | 34 | String subSequence(int start, int end); 35 | 36 | @Override 37 | String toString(); 38 | } 39 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/ExecStack.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | import java.util.Stack; 27 | import java.util.function.Function; 28 | 29 | /** 30 | * @author Martin Braun 31 | */ 32 | public class ExecStack { 33 | 34 | private Stack> stack = new Stack<>(); 35 | 36 | public void add(Function supplier) { 37 | this.stack.add( supplier ); 38 | } 39 | 40 | public X exec(X start) { 41 | X res = start; 42 | while(this.stack.size() > 0) { 43 | Function cur = this.stack.pop(); 44 | res = cur.apply( res ); 45 | } 46 | return res; 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/IntCharSeq.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | public class IntCharSeq implements CharSeq { 30 | private final int[] codePoints; 31 | 32 | public IntCharSeq(CharSequence seq) { 33 | this.codePoints = seq.codePoints().toArray(); 34 | } 35 | 36 | @Override 37 | public int codePointLength() { 38 | return this.codePoints.length; 39 | } 40 | 41 | /** 42 | * @param index the nth codepoint 43 | */ 44 | @Override 45 | public int codePoint(int index) { 46 | return this.codePoints[index]; 47 | } 48 | 49 | @Override 50 | public String subSequence(int start, int end) { 51 | if ( end - start > 0 ) { 52 | int[] codePointArr = new int[end - start]; 53 | System.arraycopy(this.codePoints, start, codePointArr, 0, end - start); 54 | return new String( codePointArr, 0, end - start ); 55 | } 56 | else { 57 | return ""; 58 | } 59 | } 60 | 61 | @Override 62 | public String toString() { 63 | return new String( this.codePoints, 0, this.codePoints.length ); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/Perf.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | public class Perf { 30 | 31 | private final boolean report; 32 | 33 | private long pre; 34 | private long after; 35 | 36 | public Perf(boolean report) { 37 | this.report = report; 38 | } 39 | 40 | public void pre() { 41 | this.pre = System.nanoTime(); 42 | } 43 | 44 | public void after() { 45 | this.after = System.nanoTime(); 46 | } 47 | 48 | public long diff() { 49 | return this.after - this.pre; 50 | } 51 | 52 | public void report(String name) { 53 | if ( this.report ) { 54 | System.out.println( name + " took " + this.diff() + "ns" ); 55 | } 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /engine/src/main/java/com/github/s4ke/moar/util/RangeRep.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | import com.google.common.collect.Range; 27 | import com.google.common.collect.RangeSet; 28 | import com.google.common.collect.TreeRangeSet; 29 | 30 | /** 31 | * @author Martin Braun 32 | */ 33 | public class RangeRep { 34 | 35 | private final RangeSet rangeSet; 36 | 37 | private RangeRep(int from, int to) { 38 | this.rangeSet = TreeRangeSet.create(); 39 | this.rangeSet.add( Range.closed( from, to ) ); 40 | } 41 | 42 | private RangeRep(RangeSet rangeSet) { 43 | this.rangeSet = rangeSet; 44 | } 45 | 46 | public static RangeRep of(RangeSet rangeSet) { 47 | return new RangeRep( rangeSet ); 48 | } 49 | 50 | public static RangeRep of(int from, int to) { 51 | return new RangeRep( from, to ); 52 | } 53 | 54 | public RangeRep negative() { 55 | return of( this.rangeSet.complement() ); 56 | } 57 | 58 | public RangeSet getRangeSet() { 59 | return this.rangeSet; 60 | } 61 | 62 | public boolean intersects(RangeRep range) { 63 | return !intersect( this.rangeSet, range.rangeSet ).isEmpty(); 64 | } 65 | 66 | public boolean intersects(int value) { 67 | return this.rangeSet.contains( value ); 68 | } 69 | 70 | public StringBuilder append(StringBuilder builder) { 71 | //this is only ever called for things in the normal char range so we dont 72 | //have to check whether the the range is valid 73 | for ( Range range : this.rangeSet.asRanges() ) { 74 | if(range.lowerEndpoint().equals( range.upperEndpoint() )) { 75 | builder = builder.appendCodePoint( (Integer) range.lowerEndpoint() ); 76 | } else { 77 | builder = builder.appendCodePoint( (Integer) range.lowerEndpoint() ).append( "-" ).appendCodePoint( 78 | (Integer) range.upperEndpoint() 79 | ); 80 | } 81 | } 82 | return builder; 83 | } 84 | 85 | @Override 86 | public boolean equals(Object o) { 87 | if ( this == o ) { 88 | return true; 89 | } 90 | if ( o == null || getClass() != o.getClass() ) { 91 | return false; 92 | } 93 | 94 | RangeRep rangeRep = (RangeRep) o; 95 | 96 | return !(rangeSet != null ? !rangeSet.equals( rangeRep.rangeSet ) : rangeRep.rangeSet != null); 97 | 98 | } 99 | 100 | @Override 101 | public int hashCode() { 102 | return rangeSet != null ? rangeSet.hashCode() : 0; 103 | } 104 | 105 | @Override 106 | public String toString() { 107 | return this.append( new StringBuilder() ).toString(); 108 | } 109 | 110 | static RangeSet intersect(RangeSet a, RangeSet b) { 111 | RangeSet copy = TreeRangeSet.create( a ); 112 | copy.removeAll( b.complement() ); 113 | return copy; 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/regex/FullTextTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import com.github.s4ke.moar.MoaMatcher; 27 | import com.github.s4ke.moar.moa.Moa; 28 | 29 | import org.junit.Test; 30 | 31 | import static org.junit.Assert.assertEquals; 32 | import static org.junit.Assert.assertFalse; 33 | import static org.junit.Assert.assertTrue; 34 | 35 | /** 36 | * @author Martin Braun 37 | */ 38 | public class FullTextTest { 39 | 40 | @Test 41 | public void testMultiLine() { 42 | { 43 | Regex regex = Regex.str( "toast" ).or( "or is it?" ).bind( "x" ); 44 | Moa moa = regex.toMoa(); 45 | MoaMatcher matcher = moa.matcher( "toast is not a beverage\nno wait, or is it?\nb" ); 46 | int matchCount = 0; 47 | while ( matcher.nextMatch() ) { 48 | ++matchCount; 49 | assertTrue( moa.matcher( matcher.getVariableContent( 1 ) ).matches() ); 50 | } 51 | assertEquals( 2, matchCount ); 52 | } 53 | } 54 | 55 | @Test 56 | public void testCoolLanguage() { 57 | Regex regex = Regex.reference( "x" ) 58 | .bind( "y" ) 59 | .and( Regex.reference( "y" ).and( "a" ).bind( "x" ) ) 60 | .plus().bind( "all" ); 61 | System.out.println( regex.toString() ); 62 | Moa moa = regex.toMoa(); 63 | { 64 | MoaMatcher matcher = moa.matcher( "aaaa" ); 65 | assertTrue( matcher.nextMatch() ); 66 | assertEquals( "aaaa", matcher.getVariableContent( "all" ) ); 67 | } 68 | { 69 | MoaMatcher matcher = moa.matcher( "aaaaa" ); 70 | assertFalse( moa.check( "aaaaa" ) ); 71 | assertTrue( matcher.nextMatch() ); 72 | assertEquals( "aaaa", matcher.getVariableContent( "all" ) ); 73 | } 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/regex/JavaMoaEqualityTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.regex.Matcher; 27 | import java.util.regex.Pattern; 28 | 29 | import com.github.s4ke.moar.MoaMatcher; 30 | import com.github.s4ke.moar.MoaPattern; 31 | 32 | import org.junit.Test; 33 | 34 | import static org.junit.Assert.assertEquals; 35 | import static org.junit.Assert.assertTrue; 36 | 37 | /** 38 | * @author Martin Braun 39 | */ 40 | public class JavaMoaEqualityTest { 41 | 42 | private static final String someSonnet = " 1\n" + 43 | " From fairest creatures we desire increase,\n" + 44 | " That thereby beauty's rose might never die,\n" + 45 | " But as the riper should by time decease,\n" + 46 | " His tender heir might bear his memory:\n" + 47 | " But thou contracted to thine own bright eyes,\n" + 48 | " Feed'st thy light's flame with self-substantial fuel,\n" + 49 | " Making a famine where abundance lies,\n" + 50 | " Thy self thy foe, to thy sweet self too cruel:\n" + 51 | " Thou that art now the world's fresh ornament,\n" + 52 | " And only herald to the gaudy spring,\n" + 53 | " Within thine own bud buriest thy content,\n" + 54 | " And tender churl mak'st waste in niggarding:\n" + 55 | " Pity the world, or else this glutton be,\n" + 56 | " To eat the world's due, by the grave and thee."; 57 | 58 | public static final String[] REGEX_TO_CHECK = new String[] { 59 | "th(e)\\1+", 60 | "fairest", 61 | "from", 62 | "beauty", 63 | "foe", 64 | "f((riend)|(oe))", 65 | "[A-Z]([a-z])+", 66 | "shall besiege", 67 | "(c)?old" 68 | }; 69 | 70 | @Test 71 | public void testSimple() { 72 | assertTrue(Pattern.compile( "th(e)\\1+" ).matcher( " thee." ).find()); 73 | assertTrue(MoaPattern.compile( "th(e)\\1+" ).matcher( " thee." ).nextMatch()); 74 | 75 | assertTrue(Pattern.compile( "th(e)\\1+" ).matcher( " thethee" ).find()); 76 | assertTrue(MoaPattern.compile( "th(e)\\1+" ).matcher( " thethee" ).nextMatch()); 77 | 78 | assertTrue(Pattern.compile( "th(e)\\1*" ).matcher( " thethee" ).find()); 79 | assertTrue(MoaPattern.compile( "th(e)\\1*" ).matcher( " thethee" ).nextMatch()); 80 | 81 | assertTrue(Pattern.compile( "th(e)\\1+" ).matcher( someSonnet ).find()); 82 | assertTrue(MoaPattern.compile( "th(e)\\1+" ).matcher( someSonnet ).nextMatch()); 83 | 84 | assertTrue(Pattern.compile( "th(e)\\1*" ).matcher( someSonnet ).find()); 85 | assertTrue(MoaPattern.compile( "th(e)\\1*" ).matcher( someSonnet ).nextMatch()); 86 | } 87 | 88 | @Test 89 | public void testWeird() { 90 | assertTrue(Pattern.compile( "th(e)\\1ater" ).matcher( " theeater" ).find()); 91 | assertTrue(MoaPattern.compile( "th(e)\\1ater" ).matcher( " theeater" ).nextMatch()); 92 | } 93 | 94 | @Test 95 | public void testEqualityFullText() { 96 | int matchCountFirst = 0; 97 | for ( String str : REGEX_TO_CHECK ) { 98 | Pattern pattern = Pattern.compile( str ); 99 | Matcher matcher = pattern.matcher( someSonnet ); 100 | while ( matcher.find() ) { 101 | ++matchCountFirst; 102 | } 103 | } 104 | 105 | int matchCountSnd = 0; 106 | for ( String str : REGEX_TO_CHECK ) { 107 | MoaPattern pattern = MoaPattern.compile( str ); 108 | MoaMatcher matcher = pattern.matcher( someSonnet ); 109 | while ( matcher.nextMatch() ) { 110 | ++matchCountSnd; 111 | } 112 | } 113 | 114 | assertEquals( matchCountFirst, matchCountSnd ); 115 | } 116 | 117 | 118 | } 119 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/regex/MatchReplaceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.regex.Matcher; 27 | import java.util.regex.Pattern; 28 | 29 | import com.github.s4ke.moar.MoaMatcher; 30 | import com.github.s4ke.moar.moa.Moa; 31 | 32 | import org.junit.Test; 33 | 34 | import static junit.framework.Assert.assertEquals; 35 | 36 | /** 37 | * @author Martin Braun 38 | */ 39 | public class MatchReplaceTest { 40 | 41 | @Test 42 | public void testReplaceFirst() { 43 | //check if the a previous match changes the outcome 44 | //of replaceFirst 45 | { 46 | Pattern p = Pattern.compile( "a" ); 47 | Matcher matcher = p.matcher( "aa" ); 48 | matcher.replaceFirst( "b" ); 49 | String res = matcher.replaceFirst( "b" ); 50 | assertEquals( "ba", res ); 51 | } 52 | //it does not. 53 | 54 | //now check the same for the GenericMoaMatcher 55 | { 56 | Regex regex = Regex.str( "a" ); 57 | Moa moa = regex.toMoa(); 58 | MoaMatcher moaMatcher = moa.matcher( "aa" ); 59 | moaMatcher.replaceFirst( "b" ); 60 | String res = moaMatcher.replaceFirst( "b" ); 61 | assertEquals( "ba", res ); 62 | } 63 | } 64 | 65 | @Test 66 | public void testReplaceAll() { 67 | { 68 | Moa moa = Regex.str( "aa" ).toMoa(); 69 | MoaMatcher matcher = moa.matcher( "aabaabaabaabaa" ); 70 | assertEquals( "bbbb", matcher.replaceAll( "" ) ); 71 | assertEquals( "ccbccbccbccbcc", matcher.replaceAll( "cc" ) ); 72 | } 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/regex/TestUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import com.github.s4ke.moar.NonDeterministicException; 27 | import com.github.s4ke.moar.moa.Moa; 28 | 29 | import org.junit.Assert; 30 | 31 | import static org.junit.Assert.fail; 32 | 33 | /** 34 | * @author Martin Braun 35 | */ 36 | public class TestUtil { 37 | public static String repeat(String str, int times) { 38 | String ret = ""; 39 | for ( int i = 0; i < times; ++i ) { 40 | ret += str; 41 | } 42 | return ret; 43 | } 44 | 45 | private static void assertMatch(boolean shouldMatch, Regex regex, String input) { 46 | Assert.assertEquals( shouldMatch, regex.toMoa().check( input ) ); 47 | } 48 | 49 | public static void assertMatch(boolean shouldMatch, Moa moa, String input) { 50 | Assert.assertEquals( shouldMatch, moa.check( input ) ); 51 | } 52 | 53 | public static void assertNonDet(Regex regex) { 54 | try { 55 | regex.toMoa(); 56 | fail( "regex " + regex + " was not recognized as non-deterministic" ); 57 | } 58 | catch (NonDeterministicException e) { 59 | System.out.println( "successfully got Exception while building the MOA: " + e.getMessage() ); 60 | } 61 | } 62 | 63 | public static void assertDet(Regex regex) { 64 | regex.toMoa(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/regex/VSJavaPattern.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.regex; 25 | 26 | import java.util.regex.Pattern; 27 | 28 | import com.github.s4ke.moar.moa.Moa; 29 | import com.github.s4ke.moar.util.GenericMatcher; 30 | import com.github.s4ke.moar.util.GenericMoaMatcher; 31 | import com.github.s4ke.moar.util.PatternMatcher; 32 | 33 | import org.junit.Test; 34 | 35 | import static org.junit.Assert.assertEquals; 36 | import static org.junit.Assert.assertTrue; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | public class VSJavaPattern { 42 | 43 | @Test 44 | public void testSimple() { 45 | time( gen( Regex.str( "a" ) ), "a", true ); 46 | time( gen( Pattern.compile( "a" ) ), "a", true ); 47 | System.out.println(); 48 | 49 | time( gen( Regex.str( "test" ) ), "test", true ); 50 | time( gen( Pattern.compile( "test" ) ), "test", true ); 51 | System.out.println( "---------------------" ); 52 | } 53 | 54 | @Test 55 | public void testBackRef() { 56 | String testStr = "aaaaaaaaaaaaaaaaaaa|aaaaaaaaaaaaaaaaaaa"; 57 | 58 | time( 59 | gen( 60 | Regex.str( "a" ) 61 | .plus() 62 | .bind( "x" ) 63 | .and( "|" ) 64 | .and( Regex.reference( "x" ) ) 65 | ), testStr 66 | , true 67 | ); 68 | time( 69 | gen( 70 | Pattern.compile( "(a+)\\|\\1" ) 71 | ), testStr, true 72 | ); 73 | } 74 | 75 | private GenericMatcher gen(Object obj) { 76 | if ( obj instanceof Moa ) { 77 | return new GenericMoaMatcher( (Moa) obj ); 78 | } 79 | if ( obj instanceof Regex ) { 80 | return new GenericMoaMatcher( (Regex) obj ); 81 | } 82 | if ( obj instanceof Pattern ) { 83 | return new PatternMatcher( (Pattern) obj ); 84 | } 85 | return null; 86 | } 87 | 88 | public void time(GenericMatcher matcher, String string, boolean expectedResult) { 89 | for ( int i = 0; i < 1000000; ++i ) { 90 | assertEquals( expectedResult, matcher.check( string ) ); 91 | } 92 | //warm up 93 | long totalDiff = 0; 94 | for ( int i = 0; i < 100000; ++i ) { 95 | long pre = System.nanoTime(); 96 | assertEquals( expectedResult, matcher.check( string ) ); 97 | long after = System.nanoTime(); 98 | long diff = after - pre; 99 | totalDiff += diff; 100 | } 101 | System.out.println( matcher + " took " + totalDiff / 100000 + "ns" ); 102 | } 103 | 104 | } 105 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/util/GenericMatcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | /** 27 | * @author Martin Braun 28 | */ 29 | public interface GenericMatcher { 30 | 31 | boolean check(String str); 32 | 33 | String toString(); 34 | 35 | } 36 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/util/GenericMoaMatcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | import com.github.s4ke.moar.moa.Moa; 27 | import com.github.s4ke.moar.regex.Regex; 28 | 29 | /** 30 | * @author Martin Braun 31 | */ 32 | public class GenericMoaMatcher implements GenericMatcher { 33 | 34 | private final Moa moa; 35 | 36 | public GenericMoaMatcher(Moa moa) { 37 | this.moa = moa; 38 | } 39 | 40 | public GenericMoaMatcher(Regex regex) { 41 | this.moa = regex.toMoa(); 42 | } 43 | 44 | @Override 45 | public boolean check(String str) { 46 | return this.moa.check( str ); 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return "GenericMoaMatcher{" + 52 | "moa=" + moa + 53 | '}'; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /engine/src/test/java/com/github/s4ke/moar/util/PatternMatcher.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.util; 25 | 26 | import java.util.regex.Matcher; 27 | import java.util.regex.Pattern; 28 | 29 | /** 30 | * @author Martin Braun 31 | */ 32 | public class PatternMatcher implements GenericMatcher { 33 | 34 | private final Pattern pattern; 35 | 36 | public PatternMatcher(Pattern pattern) { 37 | this.pattern = pattern; 38 | } 39 | 40 | @Override 41 | public boolean check(String str) { 42 | return this.pattern.matcher( str ).matches(); 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "PatternMatcher{" + 48 | "pattern=" + pattern + 49 | '}'; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /json/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | moar-parent 7 | com.github.s4ke 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | moar-json 13 | 14 | 15 | 16 | com.github.s4ke 17 | moar-engine 18 | ${project.version} 19 | 20 | 21 | 22 | org.json 23 | json 24 | 20160212 25 | 26 | 27 | junit 28 | junit 29 | 4.12 30 | test 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /json/src/test/java/com/github/s4ke/moar/json/MoarJSONSerializerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.json; 25 | 26 | import com.github.s4ke.moar.MoaPattern; 27 | 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.assertEquals; 31 | import static org.junit.Assert.assertTrue; 32 | 33 | /** 34 | * @author Martin Braun 35 | */ 36 | public class MoarJSONSerializerTest { 37 | 38 | @Test 39 | public void testJSONSerialization() { 40 | MoaPattern pattern = MoaPattern.compile( "^(?[a-z]b[^b]\\w)\\k.$" ); 41 | String jsonString = MoarJSONSerializer.toJSON( pattern ); 42 | System.out.println( jsonString ); 43 | assertTrue( pattern.matcher( "abcdabcde" ).matches() ); 44 | 45 | MoaPattern fromJSON = MoarJSONSerializer.fromJSON( jsonString ); 46 | System.out.println( "\n" + MoarJSONSerializer.toJSON( fromJSON ) ); 47 | assertTrue( fromJSON.matcher( "abcdabcde" ).matches() ); 48 | 49 | assertEquals( jsonString, MoarJSONSerializer.toJSON( fromJSON ) ); 50 | } 51 | 52 | //TODO: maybe test JSON serialization of MOAs generated by the DSL? 53 | 54 | } 55 | -------------------------------------------------------------------------------- /lucene/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | moar-parent 7 | com.github.s4ke 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | moar-lucene 13 | 14 | 15 | 16 | org.apache.lucene 17 | lucene-core 18 | 6.1.0 19 | 20 | 21 | 22 | org.apache.lucene 23 | lucene-analyzers-common 24 | 6.1.0 25 | test 26 | 27 | 28 | 29 | com.github.s4ke 30 | moar-engine 31 | ${project.version} 32 | 33 | 34 | 35 | junit 36 | junit 37 | 4.12 38 | test 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /lucene/src/main/java/com/github/s4ke/moar/lucene/query/ByteCharSeq.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.lucene.query; 25 | 26 | import com.github.s4ke.moar.util.CharSeq; 27 | import org.apache.lucene.util.BytesRef; 28 | import org.apache.lucene.util.UnicodeUtil; 29 | 30 | /** 31 | * @author Martin Braun 32 | */ 33 | public class ByteCharSeq implements CharSeq { 34 | 35 | private final BytesRef contents; 36 | private final byte[] tmpByte = new byte[1]; 37 | private final char[] tmpChar = new char[1]; 38 | 39 | public ByteCharSeq(BytesRef contents) { 40 | this.contents = contents; 41 | } 42 | 43 | @Override 44 | public int codePointLength() { 45 | return this.contents.length; 46 | } 47 | 48 | @Override 49 | public int codePoint(int index) { 50 | //FIXME: is this the correct behaviour? 51 | this.tmpByte[0] = this.contents.bytes[index]; 52 | UnicodeUtil.UTF8toUTF16( this.tmpByte, 0, 1, this.tmpChar ); 53 | return this.tmpChar[0] & 0xFFFF; 54 | } 55 | 56 | @Override 57 | public String subSequence(int start, int end) { 58 | return new String( this.contents.bytes, start, end ); 59 | } 60 | 61 | @Override 62 | public String toString() { 63 | return new String( this.contents.bytes, 0, this.contents.length ); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /lucene/src/main/java/com/github/s4ke/moar/lucene/query/MoarQuery.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.lucene.query; 25 | 26 | import java.io.IOException; 27 | 28 | import com.github.s4ke.moar.MoaMatcher; 29 | import com.github.s4ke.moar.MoaPattern; 30 | import com.github.s4ke.moar.util.CharSeq; 31 | import org.apache.lucene.index.FilteredTermsEnum; 32 | import org.apache.lucene.index.Terms; 33 | import org.apache.lucene.index.TermsEnum; 34 | import org.apache.lucene.search.MultiTermQuery; 35 | import org.apache.lucene.util.AttributeSource; 36 | import org.apache.lucene.util.BytesRef; 37 | 38 | /** 39 | * @author Martin Braun 40 | */ 41 | public class MoarQuery extends MultiTermQuery { 42 | 43 | private final MoaPattern moaPattern; 44 | 45 | public MoarQuery(String field, MoaPattern moaPattern) { 46 | super( field ); 47 | this.moaPattern = moaPattern; 48 | } 49 | 50 | 51 | @Override 52 | public String toString(String s) { 53 | return this.moaPattern.toString(); 54 | } 55 | 56 | 57 | @Override 58 | protected TermsEnum getTermsEnum( 59 | Terms terms, AttributeSource atts) throws IOException { 60 | MoaMatcher matcher = this.moaPattern.matcher( "" ); 61 | TermsEnum termsEnum = terms.iterator(); 62 | return new MoarTermsEnum( matcher, termsEnum ); 63 | } 64 | 65 | private static class MoarTermsEnum extends FilteredTermsEnum { 66 | 67 | private final MoaMatcher matcher; 68 | 69 | private MoarTermsEnum(MoaMatcher matcher, TermsEnum termsEnum) throws IOException { 70 | super( termsEnum ); 71 | this.matcher = matcher; 72 | this.setInitialSeekTerm( termsEnum.next() ); 73 | } 74 | 75 | @Override 76 | protected AcceptStatus accept(BytesRef term) throws IOException { 77 | CharSeq byteCharSeq = new ByteCharSeq( term ); 78 | if ( matcher.reuse( byteCharSeq ).matches() ) { 79 | return AcceptStatus.YES; 80 | } 81 | return AcceptStatus.NO; 82 | } 83 | } 84 | 85 | @Override 86 | public boolean equals(Object o) { 87 | if ( this == o ) { 88 | return true; 89 | } 90 | if ( o == null || getClass() != o.getClass() ) { 91 | return false; 92 | } 93 | 94 | MoarQuery moarQuery = (MoarQuery) o; 95 | 96 | return !(moaPattern != null ? !moaPattern.equals( moarQuery.moaPattern ) : moarQuery.moaPattern != null); 97 | } 98 | 99 | @Override 100 | public int hashCode() { 101 | int result = 0; 102 | result = 31 * result + (moaPattern != null ? moaPattern.hashCode() : 0); 103 | return result; 104 | } 105 | 106 | } 107 | -------------------------------------------------------------------------------- /lucene/src/test/java/com/github/s4ke/moar/lucene/query/test/BaseLuceneTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | 25 | package com.github.s4ke.moar.lucene.query.test; 26 | 27 | import java.io.IOException; 28 | import java.util.Arrays; 29 | import java.util.List; 30 | import java.util.Random; 31 | 32 | import org.apache.lucene.analysis.core.WhitespaceAnalyzer; 33 | import org.apache.lucene.document.Document; 34 | import org.apache.lucene.document.Field; 35 | import org.apache.lucene.document.FieldType; 36 | import org.apache.lucene.index.DirectoryReader; 37 | import org.apache.lucene.index.IndexOptions; 38 | import org.apache.lucene.index.IndexReader; 39 | import org.apache.lucene.index.IndexWriter; 40 | import org.apache.lucene.index.IndexWriterConfig; 41 | import org.apache.lucene.search.IndexSearcher; 42 | import org.apache.lucene.search.Query; 43 | import org.apache.lucene.search.TopDocs; 44 | import org.apache.lucene.store.Directory; 45 | 46 | import static org.junit.Assert.assertEquals; 47 | 48 | /** 49 | * @author Martin Braun 50 | */ 51 | public class BaseLuceneTest { 52 | 53 | public static final List WORDS = Arrays.asList( 54 | "toast", 55 | "marmalade", 56 | "peanutbutter", 57 | "jelly", 58 | "moar", 59 | "lucene", 60 | "regex", 61 | "hello", 62 | "bye", 63 | "bread", 64 | "baguette", 65 | "pizza", 66 | "kebap", 67 | "chili", 68 | "pepperoni", 69 | "space" 70 | ); 71 | 72 | public static final int WORD_COUNT_PER_DOCUMENT = 50; 73 | public static final int BACK_REF_DOC_COUNT = 1000; 74 | 75 | protected Directory d; 76 | 77 | public Document createDocument() { 78 | return new Document(); 79 | } 80 | 81 | public static final FieldType ID_FIELD_TYPE; 82 | 83 | static { 84 | FieldType idFieldType = new FieldType(); 85 | idFieldType.setStored( true ); 86 | idFieldType.setTokenized( false ); 87 | idFieldType.setIndexOptions( IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS ); 88 | ID_FIELD_TYPE = idFieldType; 89 | } 90 | 91 | public static final FieldType TAGS_FIELD_TYPE; 92 | 93 | static { 94 | FieldType tagsFieldType = new FieldType(); 95 | tagsFieldType.setStored( false ); 96 | tagsFieldType.setTokenized( true ); 97 | tagsFieldType.setStoreTermVectors( false ); 98 | tagsFieldType.setIndexOptions( IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS ); 99 | TAGS_FIELD_TYPE = tagsFieldType; 100 | } 101 | 102 | protected Random random; 103 | 104 | public void setup(Directory directory, Random random) { 105 | this.d = directory; 106 | if ( random == null ) { 107 | this.random = new Random(); 108 | } 109 | else { 110 | this.random = random; 111 | } 112 | } 113 | 114 | public void writeSingleDoc(Document document) throws IOException { 115 | try (IndexWriter iw = new IndexWriter( this.d, this.getIwc() )) { 116 | iw.addDocument( document ); 117 | iw.commit(); 118 | } 119 | } 120 | 121 | public String randomString(int words) { 122 | StringBuilder ret = new StringBuilder(); 123 | for ( int i = 0; i < words; ++i ) { 124 | ret.append( WORDS.get( this.random.nextInt( WORDS.size() ) ) ).append( " " ); 125 | } 126 | return ret.toString(); 127 | } 128 | 129 | public String repeat(String str, int count) { 130 | StringBuilder builder = new StringBuilder(); 131 | for ( int i = 0; i < count; ++i ) { 132 | builder.append( str ); 133 | } 134 | return builder.toString(); 135 | } 136 | 137 | 138 | public void clearIndex() throws IOException { 139 | System.out.println( "clearing index" ); 140 | { 141 | WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); 142 | 143 | IndexWriterConfig iwc = new IndexWriterConfig( analyzer ); 144 | try (IndexWriter iw = new IndexWriter( this.d, iwc )) { 145 | iw.deleteAll(); 146 | } 147 | } 148 | } 149 | 150 | public void assertHits(Query query, int hitCount) throws IOException { 151 | try (IndexReader ir = DirectoryReader.open( d )) { 152 | IndexSearcher searcher = new IndexSearcher( ir ); 153 | TopDocs td = searcher.search( query, 10 ); 154 | assertEquals( "hitCount didn't match expected hit count", hitCount, td.totalHits ); 155 | } 156 | } 157 | 158 | public void setupBackRefData() throws IOException { 159 | this.clearIndex(); 160 | 161 | System.out.println( "writing into index" ); 162 | WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); 163 | 164 | IndexWriterConfig iwc = new IndexWriterConfig( analyzer ); 165 | try (IndexWriter iw = new IndexWriter( this.d, iwc )) { 166 | 167 | for ( int i = 0; i < BACK_REF_DOC_COUNT; ++i ) { 168 | Document doc = createDocument(); 169 | int repeatCount = random.nextInt( 100 ) + 1; 170 | Field idField = new Field( "id", String.valueOf( i ), ID_FIELD_TYPE ); 171 | Field field = new Field( 172 | "tag", randomString( WORD_COUNT_PER_DOCUMENT ) + " " + repeat( 173 | "a", 174 | repeatCount 175 | ) + "b" + repeat( "a", repeatCount ), TAGS_FIELD_TYPE 176 | ); 177 | doc.add( field ); 178 | doc.add( idField ); 179 | iw.addDocument( doc ); 180 | if ( i % 100 == 0 ) { 181 | System.out.println( i ); 182 | } 183 | 184 | } 185 | iw.commit(); 186 | } 187 | System.out.println( "finished setting up index data" ); 188 | } 189 | 190 | public IndexWriterConfig getIwc() { 191 | WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); 192 | return new IndexWriterConfig( analyzer ); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /lucene/src/test/java/com/github/s4ke/moar/lucene/query/test/MoarQueryPerfTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.lucene.query.test; 25 | 26 | import java.io.IOException; 27 | import java.nio.file.Paths; 28 | import java.util.Random; 29 | 30 | import com.github.s4ke.moar.MoaPattern; 31 | import com.github.s4ke.moar.lucene.query.MoarQuery; 32 | import com.github.s4ke.moar.util.Perf; 33 | import org.apache.lucene.document.Document; 34 | import org.apache.lucene.document.Field; 35 | import org.apache.lucene.index.DirectoryReader; 36 | import org.apache.lucene.index.IndexReader; 37 | import org.apache.lucene.index.IndexWriter; 38 | import org.apache.lucene.index.Term; 39 | import org.apache.lucene.search.IndexSearcher; 40 | import org.apache.lucene.search.RegexpQuery; 41 | import org.apache.lucene.search.TopDocs; 42 | import org.apache.lucene.store.FSDirectory; 43 | 44 | import org.junit.After; 45 | import org.junit.Before; 46 | import org.junit.Test; 47 | 48 | /** 49 | * @author Martin Braun 50 | */ 51 | public class MoarQueryPerfTest extends BaseLuceneTest { 52 | 53 | @Before 54 | public void setup() throws IOException { 55 | this.setup(FSDirectory.open( Paths.get( "lucene_dir", "moarquery_perf" ) ), new Random(1231233471)); 56 | } 57 | 58 | private void setupComparisonData() throws IOException { 59 | this.clearIndex(); 60 | 61 | System.out.println( "writing into index" ); 62 | try (IndexWriter iw = new IndexWriter( this.d, this.getIwc() )) { 63 | 64 | for ( int i = 0; i < 1000; ++i ) { 65 | Document doc = createDocument(); 66 | Field idField = new Field( "id", String.valueOf( i ), ID_FIELD_TYPE ); 67 | Field field = new Field( "tag", randomString( WORD_COUNT_PER_DOCUMENT ), TAGS_FIELD_TYPE ); 68 | doc.add( field ); 69 | doc.add( idField ); 70 | iw.addDocument( doc ); 71 | if ( i % 100 == 0 ) { 72 | System.out.println( i ); 73 | } 74 | 75 | } 76 | iw.commit(); 77 | } 78 | System.out.println( "finished setting up index data" ); 79 | } 80 | 81 | @Test 82 | public void testComparison() throws IOException { 83 | this.setupComparisonData(); 84 | 85 | try (IndexReader ir = DirectoryReader.open( d )) { 86 | IndexSearcher is = new IndexSearcher( ir ); 87 | Perf perf = new Perf( true ); 88 | 89 | for ( int i = 0; i < 1000; ++i ) { 90 | String wordOfChoice = WORDS.get( this.random.nextInt( WORDS.size() ) ); 91 | wordOfChoice = wordOfChoice.substring( 0, this.random.nextInt( wordOfChoice.length() - 1 ) + 1 ); 92 | wordOfChoice += ".*"; 93 | System.out.println( wordOfChoice ); 94 | { 95 | perf.pre(); 96 | MoaPattern pattern = MoaPattern.compile( wordOfChoice ); 97 | MoarQuery tq = new MoarQuery( "tag", pattern ); 98 | 99 | TopDocs td = is.search( tq, 10 ); 100 | System.out.println( td.totalHits + " moar query hits" ); 101 | perf.after(); 102 | perf.report( "searching with moar" ); 103 | } 104 | 105 | { 106 | RegexpQuery regexpQuery = new RegexpQuery( new Term( "tag", wordOfChoice ) ); 107 | perf.pre(); 108 | TopDocs td = is.search( 109 | regexpQuery 110 | , 10 111 | ); 112 | System.out.println( td.totalHits + " regexp query hits" ); 113 | perf.after(); 114 | perf.report( "searching with regexp" ); 115 | } 116 | } 117 | } 118 | } 119 | 120 | @After 121 | public void tearDown() throws IOException { 122 | this.d.close(); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /lucene/src/test/java/com/github/s4ke/moar/lucene/query/test/MoarQueryTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Martin Braun 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | package com.github.s4ke.moar.lucene.query.test; 25 | 26 | import java.io.IOException; 27 | import java.nio.file.Paths; 28 | import java.util.Random; 29 | 30 | import com.github.s4ke.moar.MoaMatcher; 31 | import com.github.s4ke.moar.MoaPattern; 32 | import com.github.s4ke.moar.lucene.query.MoarQuery; 33 | import org.apache.lucene.document.Document; 34 | import org.apache.lucene.document.Field; 35 | import org.apache.lucene.index.IndexWriter; 36 | import org.apache.lucene.search.MatchAllDocsQuery; 37 | import org.apache.lucene.store.FSDirectory; 38 | 39 | import org.junit.After; 40 | import org.junit.Before; 41 | import org.junit.Test; 42 | 43 | /** 44 | * @author Martin Braun 45 | */ 46 | public class MoarQueryTest extends BaseLuceneTest { 47 | 48 | private static final String UNIQUE = "unique"; 49 | 50 | @Before 51 | public void setup() throws IOException { 52 | this.setup( FSDirectory.open( Paths.get( "lucene_dir", "moarquery" ) ), new Random( 123273472 ) ); 53 | } 54 | 55 | private void setupData() throws IOException { 56 | System.out.println( "clearing index" ); 57 | { 58 | try (IndexWriter iw = new IndexWriter( this.d, this.getIwc() )) { 59 | iw.deleteAll(); 60 | } 61 | } 62 | 63 | System.out.println( "writing into index" ); 64 | try (IndexWriter iw = new IndexWriter( this.d, this.getIwc() )) { 65 | 66 | 67 | { 68 | Document doc = createDocument(); 69 | Field idField = new Field( "id", String.valueOf( -1 ), ID_FIELD_TYPE ); 70 | Field field = new Field( "tag", UNIQUE, TAGS_FIELD_TYPE ); 71 | doc.add( field ); 72 | doc.add( idField ); 73 | iw.addDocument( doc ); 74 | } 75 | 76 | for ( int i = 0; i < 100; ++i ) { 77 | Document doc = createDocument(); 78 | Field idField = new Field( "id", String.valueOf( i ), ID_FIELD_TYPE ); 79 | Field field = new Field( "tag", randomString( WORD_COUNT_PER_DOCUMENT ), TAGS_FIELD_TYPE ); 80 | doc.add( field ); 81 | doc.add( idField ); 82 | iw.addDocument( doc ); 83 | if ( i % 10 == 0 ) { 84 | System.out.println( i ); 85 | } 86 | 87 | } 88 | iw.commit(); 89 | } 90 | System.out.println( "finished setting up index data" ); 91 | } 92 | 93 | @Test 94 | public void testBasics() throws IOException { 95 | this.setupData(); 96 | MoaPattern pattern = MoaPattern.compile( UNIQUE ); 97 | MoarQuery tq = new MoarQuery( "tag", pattern ); 98 | this.assertHits( tq, 1 ); 99 | } 100 | 101 | @Test 102 | public void testBackRef() throws IOException { 103 | this.setupBackRefData(); 104 | { 105 | //add one document that should not match 106 | Document doc = createDocument(); 107 | doc.add( new Field( "id", String.valueOf( -1 ), ID_FIELD_TYPE ) ); 108 | doc.add( new Field( "tag", UNIQUE, TAGS_FIELD_TYPE ) ); 109 | this.writeSingleDoc( doc ); 110 | } 111 | 112 | MoaPattern pattern = MoaPattern.compile( "(a*)b\\1" ); 113 | 114 | MoarQuery tq = new MoarQuery( "tag", pattern ); 115 | this.assertHits( new MatchAllDocsQuery(), BACK_REF_DOC_COUNT + 1 ); 116 | this.assertHits( tq, BACK_REF_DOC_COUNT ); 117 | } 118 | 119 | @After 120 | public void tearDown() throws IOException { 121 | this.d.close(); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 4.0.0 7 | com.github.s4ke 8 | 1.0-SNAPSHOT 9 | moar-parent 10 | pom 11 | moar-parent 12 | 13 | 14 | engine 15 | cli 16 | json 17 | lucene 18 | benchmark 19 | 20 | 21 | 22 | UTF-8 23 | 24 | 25 | 26 | 27 | 28 | org.apache.maven.plugins 29 | maven-compiler-plugin 30 | 3.2 31 | 32 | 1.8 33 | 1.8 34 | 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /presentation/README.md: -------------------------------------------------------------------------------- 1 | The presentation can be viewed at: 2 | 3 | https://docs.google.com/presentation/d/1P7KmQEj813LKLg0eL1LIWcRD-PHiB3UeOA4MCHcBj20/edit?usp=sharing 4 | -------------------------------------------------------------------------------- /presentation/automata.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/moar-regex/moar/c406bddfbf78c00830fbbf1235290c20007f3325/presentation/automata.pdf -------------------------------------------------------------------------------- /presentation/presentation.md: -------------------------------------------------------------------------------- 1 | The presentation can be viewed on [Google Docs](https://docs.google.com/presentation/d/1P7KmQEj813LKLg0eL1LIWcRD-PHiB3UeOA4MCHcBj20/edit?usp=sharing) 2 | --------------------------------------------------------------------------------