├── .gitignore ├── LICENSE ├── README.markdown ├── build.gradle └── src ├── main └── java │ └── net │ └── codebox │ └── readableregex │ ├── CharacterTokenPart.java │ ├── Quantifier.java │ ├── RegExBuilder.java │ ├── Token.java │ └── Utils.java └── test └── java └── net └── codebox └── readableregex └── TestRegExBuilder.java /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | .idea 3 | build 4 | *.iml 5 | out 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Rob Dawson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | Readable Regex 2 | ============== 3 | 4 | This library provides a way to make complex regular expressions in Java code more readable. 5 | 6 | The best way to explain what it does to show some examples: 7 | 8 | // Matches a single digit 9 | RegExBuilder.build(anyDigit()); // "[0-9]" 10 | 11 | // Matches exactly 2 digits 12 | RegExBuilder.build(exactly(2).of(anyDigit())); // "[0-9]{2}" 13 | 14 | // Matches between 2 and 4 letters 15 | RegExBuilder.build(between(2,4).of(anyLetter())); // "[a-zA-Z]{2,4}" 16 | 17 | Characters that have special meaning within a regular expression are escaped automatically: 18 | 19 | // Matches one or more occurrences of the text 'Ho?' 20 | RegExBuilder.build(oneOrMore().of("Ho?")); // "(Ho\?)+" 21 | 22 | // Matches anything except an opening or closing square bracket, or a backslash 23 | RegExBuilder.build( 24 | anyCharacterExcept( 25 | characters('[', ']','\\') 26 | ) 27 | ); // "[^[\\]\\\\]" 28 | 29 | Readability is greatly improved for more complex expressions: 30 | 31 | // More or less validates the format of an email address 32 | // [_\\-A-Za-z0-9]+(\\.[_\\-A-Za-z0-9]+)*@[A-Za-z0-9]+(\\.[A-Za-z0-9]+)*\\.[a-zA-Z]{2,} 33 | RegExBuilder.build( 34 | oneOrMore().of( 35 | anyOneOf( 36 | characters('_','-'), range('A','Z'), range('a','z'), range('0','9') 37 | ) 38 | ), 39 | zeroOrMore().of( 40 | text("."), 41 | oneOrMore().of( 42 | anyOneOf( 43 | characters('_','-'), range('A','Z'), range('a','z'), range('0','9') 44 | ) 45 | ) 46 | ), 47 | text("@"), 48 | oneOrMore().of( 49 | anyOneOf( 50 | range('A','Z'), range('a','z'), range('0','9') 51 | ) 52 | ), 53 | zeroOrMore().of( 54 | text("."), 55 | oneOrMore().of( 56 | anyOneOf( 57 | range('A','Z'), range('a','z'), range('0','9') 58 | ) 59 | ) 60 | ), 61 | text("."), 62 | atLeast(2).of( 63 | anyLetter() 64 | ) 65 | ); 66 | 67 | All classes in the library are immutable, and therefore instances are re-usable and thread-safe. -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | group = "net.codebox" 2 | version = "1.0.0" 3 | 4 | apply plugin: 'java' 5 | apply plugin: 'maven' 6 | apply plugin: 'signing' 7 | 8 | repositories { 9 | mavenCentral() 10 | } 11 | 12 | dependencies { 13 | compile group: 'junit', name: 'junit', version: '4.11' 14 | 15 | testCompile group: 'org.hamcrest', name: 'hamcrest-all', version: '1.3' 16 | } 17 | 18 | signing { 19 | sign configurations.archives 20 | } 21 | 22 | task javadocJar(type: Jar) { 23 | classifier = 'javadoc' 24 | from javadoc 25 | } 26 | 27 | task sourcesJar(type: Jar) { 28 | classifier = 'sources' 29 | from sourceSets.main.allSource 30 | } 31 | 32 | artifacts { 33 | archives javadocJar, sourcesJar 34 | } 35 | 36 | uploadArchives { 37 | repositories { 38 | mavenDeployer { 39 | beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } 40 | 41 | repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2") { 42 | authentication(userName: ossrhUsername, password: ossrhPassword) 43 | } 44 | 45 | snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots") { 46 | authentication(userName: ossrhUsername, password: ossrhPassword) 47 | } 48 | 49 | pom.project { 50 | name 'readable-regex' 51 | packaging 'jar' 52 | artifactId 'readable-regex' 53 | description 'This library provides a way to make complex regular expressions in Java code more readable.' 54 | url 'https://github.com/codebox/readable-regex' 55 | 56 | scm { 57 | connection 'scm:git@github.com:codebox/readable-regex.git' 58 | developerConnection 'scm:git@github.com:codebox/readable-regex.git' 59 | url 'https://github.com/codebox/readable-regex' 60 | } 61 | 62 | licenses { 63 | license { 64 | name 'MIT License' 65 | url 'https://opensource.org/licenses/MIT' 66 | } 67 | } 68 | 69 | developers { 70 | developer { 71 | id 'codebox' 72 | name 'Rob Dawson' 73 | email 'rob@codebox.net' 74 | } 75 | } 76 | } 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /src/main/java/net/codebox/readableregex/CharacterTokenPart.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | /** 4 | * Class representing part of a 'character token' in a regular expression. A character token 5 | * matches a single character, and is written using a pair of square brackets enclosing values 6 | * that determine which characters will be matched. 7 | * 8 | * Character tokens are composed of one of more parts, with each part representing either a list, 9 | * or a range, of characters. 10 | */ 11 | public abstract class CharacterTokenPart{ 12 | protected final boolean isFirstPartInToken, isLastPartInToken; 13 | 14 | protected CharacterTokenPart(final boolean isFirstPartInToken, final boolean isLastPartInToken) { 15 | this.isFirstPartInToken = isFirstPartInToken; 16 | this.isLastPartInToken = isLastPartInToken; 17 | } 18 | 19 | protected abstract CharacterTokenPart copyAndSetFirstPartInToken(); 20 | protected abstract CharacterTokenPart copyAndSetLastPartInToken(); 21 | 22 | private static class SimpleCharacterTokenPart extends CharacterTokenPart{ 23 | private final Character[] characters; 24 | protected SimpleCharacterTokenPart(final Character[] characters, final boolean isFirstPartInToken, 25 | final boolean isLastPartInToken) { 26 | super(isFirstPartInToken, isLastPartInToken); 27 | this.characters = characters; 28 | } 29 | 30 | @Override 31 | public String toString() { 32 | final int charactersCount = characters.length; 33 | final StringBuilder sb = new StringBuilder(); 34 | for (int i = 0; i < charactersCount; i++) { 35 | final boolean isFirstCharInPart = (i==0); 36 | final boolean isLastCharInPart = (i==charactersCount-1); 37 | 38 | final boolean isFirstCharInToken = isFirstCharInPart && isFirstPartInToken; 39 | final boolean isLastCharInToken = isLastCharInPart && isLastPartInToken; 40 | 41 | final String escapedChar = escapeCharacter(characters[i], isFirstCharInToken, isLastCharInToken); 42 | 43 | sb.append(escapedChar); 44 | } 45 | return sb.toString(); 46 | } 47 | 48 | @Override 49 | protected CharacterTokenPart copyAndSetFirstPartInToken() { 50 | return new SimpleCharacterTokenPart(characters, true, isLastPartInToken); 51 | } 52 | @Override 53 | protected CharacterTokenPart copyAndSetLastPartInToken() { 54 | return new SimpleCharacterTokenPart(characters, isFirstPartInToken, true); 55 | } 56 | } 57 | 58 | /** 59 | * Constructs a CharacterTokenPart instance that represents a list of characters. 60 | * 61 | * @param characters the characters that will be matched by this part of the token 62 | * 63 | * @return a CharacterTokenPart instance that represents a list of characters 64 | */ 65 | public static CharacterTokenPart characters(final Character... characters){ 66 | return new SimpleCharacterTokenPart(characters, false, false); 67 | } 68 | 69 | private static class RangeCharacterTokenPart extends CharacterTokenPart{ 70 | private char from, to; 71 | protected RangeCharacterTokenPart(final char from, final char to, final boolean isFirstPartInToken, 72 | final boolean isLastPartInToken) { 73 | super(isFirstPartInToken, isLastPartInToken); 74 | this.from = from; 75 | this.to = to; 76 | } 77 | 78 | @Override 79 | public String toString() { 80 | return escapeCharacter(from, isFirstPartInToken, false) + "-" + escapeCharacter(to, false, isLastPartInToken); 81 | } 82 | 83 | @Override 84 | protected CharacterTokenPart copyAndSetFirstPartInToken() { 85 | return new RangeCharacterTokenPart(from, to, true, isLastPartInToken); 86 | } 87 | @Override 88 | protected CharacterTokenPart copyAndSetLastPartInToken() { 89 | return new RangeCharacterTokenPart(from, to, isFirstPartInToken, true); 90 | } 91 | } 92 | 93 | /** 94 | * Constructs a CharacterTokenPart instance that represents a range of characters. 95 | * 96 | * @param from first character in the range of characters that will be matched 97 | * @param to last character in the range of characters that will be matched 98 | * 99 | * @return a CharacterTokenPart instance that represents a range of characters 100 | */ 101 | public static CharacterTokenPart range(final char from, final char to) { 102 | return new RangeCharacterTokenPart(from, to, false, false); 103 | } 104 | 105 | private static String escapeCharacter(final char c, final boolean isFirstCharInToken, final boolean isLastCharInToken){ 106 | final char[] specialCharacters; 107 | 108 | if (isFirstCharInToken){ 109 | specialCharacters = new char[]{'\\', ']', '^'}; 110 | 111 | } else { 112 | specialCharacters = new char[]{'\\', ']', '-'}; 113 | } 114 | 115 | return Utils.escapeSpecial(c, specialCharacters); 116 | } 117 | 118 | } -------------------------------------------------------------------------------- /src/main/java/net/codebox/readableregex/Quantifier.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | /** 4 | * Class representing a 'quantifier' which specifies how many times a token must appear in order 5 | * to constitute a match. 6 | */ 7 | public class Quantifier { 8 | private final String baseText, suffix; 9 | 10 | private Quantifier(String baseText, final String suffix){ 11 | this.baseText = baseText; 12 | this.suffix = suffix; 13 | } 14 | 15 | /** 16 | * Applies the quantifier represented by this instance to the specified sequence of tokens. 17 | * 18 | * @param tokens the series of tokens to be associated with the current instance 19 | * 20 | * @return a new Token instance which has been associated with the current instance 21 | */ 22 | public Token of(final Token... tokens){ 23 | if (tokens.length == 1){ 24 | // A single token may or may not require brackets 25 | return new Token(tokens[0], this); 26 | 27 | } else { 28 | // Multiple tokens will always require brackets 29 | return of(new Token(RegExBuilder.build(tokens), true, null)); 30 | } 31 | } 32 | 33 | /** 34 | * Convenience method which creates a text Token from the specified String, and then associates 35 | * it with the current instance. 36 | * 37 | * @param text the text to be converted into a Token 38 | * @return a text Token which has been associated with the current instance 39 | */ 40 | public Token of(final String text){ 41 | return of(Token.text(text)); 42 | } 43 | 44 | /** 45 | * Creates a new greedy Quantifier based on the current instance. 46 | * 47 | * @return a new greedy Quantifier 48 | */ 49 | public Quantifier greedy(){ 50 | return new Quantifier(baseText, ""); 51 | } 52 | 53 | /** 54 | * Creates a new lazy Quantifier based on the current instance. 55 | * 56 | * @return a new lazy Quantifier 57 | */ 58 | public Quantifier lazy(){ 59 | return new Quantifier(baseText, "?"); 60 | } 61 | 62 | /** 63 | * Creates a new possessive Quantifier based on the current instance. 64 | * 65 | * @return a new possessive Quantifier 66 | */ 67 | public Quantifier possessive(){ 68 | return new Quantifier(baseText, "+"); 69 | } 70 | 71 | @Override 72 | public String toString() { 73 | return appendSuffix(baseText, suffix); 74 | } 75 | 76 | private static String appendSuffix(String txt, String suffix){ 77 | return txt + (suffix == null ? "" : suffix); 78 | } 79 | 80 | /** 81 | * Creates a new Quantifier for matching one of more of the associated Tokens. 82 | * 83 | * @return Quantifier for matching one of more of the associated Tokens 84 | */ 85 | public static Quantifier oneOrMore(){ 86 | return new Quantifier("+", ""); 87 | } 88 | 89 | /** 90 | * Creates a new Quantifier for matching zero or one of the associated Tokens. 91 | * 92 | * @return Quantifier for matching zero or one of the associated Tokens 93 | */ 94 | public static Quantifier zeroOrOne(){ 95 | return new Quantifier("?", ""); 96 | } 97 | 98 | /** 99 | * Creates a new Quantifier for matching zero or more of the associated Tokens. 100 | * 101 | * @return Quantifier for matching zero or more of the associated Tokens 102 | */ 103 | public static Quantifier zeroOrMore(){ 104 | return new Quantifier("*", ""); 105 | } 106 | 107 | /** 108 | * Creates a new Quantifier for matching any number of Tokens that falls within the specified range. 109 | * 110 | * @param from the minimum number of tokens that will be matched 111 | * @param to the maximum number of tokens that will be matched 112 | * 113 | * @return Quantifier for matching any number of Tokens that falls within the specified range 114 | */ 115 | public static Quantifier between(final int from, final int to){ 116 | if (from < 0 || to < 0) { 117 | throw new IllegalArgumentException("A negative value was passed to the between() method, values must be >= 0"); 118 | } 119 | if (from > to){ 120 | throw new IllegalArgumentException( 121 | String.format("The 'from' argument value of %d was larger than the 'to' argument value of %d")); 122 | } 123 | return new Quantifier("{" + from + "," + to + "}", ""); 124 | } 125 | 126 | /** 127 | * Creates a new Quantifier for matching the specified number of Tokens. 128 | * 129 | * @param count the number of occurrences of the token to be matched 130 | * 131 | * @return Quantifier for matching the specified number of Tokens 132 | */ 133 | public static Quantifier exactly(final int count){ 134 | if (count < 0) { 135 | throw new IllegalArgumentException("A negative value was passed to the exactly() method, value must be >= 0"); 136 | } 137 | return new Quantifier("{" + count + "}", ""); 138 | } 139 | 140 | /** 141 | * Creates a new Quantifier for matching at least the specified number of Tokens. 142 | * 143 | * @param count the minimum number of occurrences of the token to be matched 144 | * 145 | * @return Quantifier for matching at least the specified number of Tokens 146 | */ 147 | public static Quantifier atLeast(final int count){ 148 | if (count < 0) { 149 | throw new IllegalArgumentException("A negative value was passed to the atLeast() method, value must be >= 0"); 150 | } 151 | return new Quantifier("{" + count + ",}", ""); 152 | } 153 | 154 | } 155 | -------------------------------------------------------------------------------- /src/main/java/net/codebox/readableregex/RegExBuilder.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | /** 4 | * Used to construct strings of text that can be compiled into Pattern objects representing 5 | * regular expressions. 6 | */ 7 | public class RegExBuilder { 8 | private Token[] tokens; 9 | 10 | /** 11 | * Initialises the instance. 12 | * 13 | * @param tokens a series of Token instances, which will be used to construct the text of 14 | * the regular expression 15 | */ 16 | public RegExBuilder(final Token... tokens) { 17 | this.tokens = tokens; 18 | } 19 | 20 | @Override 21 | public String toString() { 22 | return Utils.appendObjects(tokens); 23 | } 24 | 25 | /** 26 | * Convenience method which instantiates a RegExBuilder instance using the specified list of 27 | * Token objects, and invokes its toString() method, and returns the result. 28 | * 29 | * @param tokens a series of Token instances, which will be used to construct the text of 30 | * the regular expression 31 | * 32 | * @return the text of the regular expression 33 | */ 34 | public static String build(final Token... tokens) { 35 | return new RegExBuilder(tokens).toString(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/net/codebox/readableregex/Token.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | import java.util.regex.Pattern; 4 | 5 | /** 6 | * Class representing part of a regular expression. 7 | */ 8 | public class Token { 9 | private Quantifier quantifier; 10 | private boolean requiresBrackets; 11 | private String tokenString; 12 | 13 | protected Token(String tokenString, boolean requiresBrackets, Quantifier quantifier) { 14 | this.tokenString = tokenString; 15 | this.requiresBrackets = requiresBrackets; 16 | this.quantifier = quantifier; 17 | } 18 | 19 | protected Token(Token token, Quantifier quantifier) { 20 | this.tokenString = token.tokenString; 21 | this.requiresBrackets = token.requiresBrackets; 22 | this.quantifier = quantifier; 23 | } 24 | 25 | private static char[] SPECIAL_CHARS = new char[]{'\\', '[', '^', '$', '.', '|', '?', '*', '+', '(', ')', '{', '}'}; 26 | 27 | 28 | @Override 29 | public final String toString(){ 30 | if (quantifier == null){ 31 | return tokenString; 32 | } else { 33 | if (requiresBrackets){ 34 | return "(" + tokenString + ")" + quantifier.toString(); 35 | } else { 36 | return tokenString + quantifier.toString(); 37 | } 38 | } 39 | } 40 | 41 | /** 42 | * Encloses the specified series of tokens in a pair of brackets, creating a capturing group. 43 | * 44 | * @param tokens the tokens to be enclosed 45 | * 46 | * @return a new Token instance 47 | */ 48 | public static Token groupOf(final Token... tokens) { 49 | return new Token( 50 | "(" + RegExBuilder.build(tokens) + ")", false, null 51 | ); 52 | } 53 | 54 | /** 55 | * Encloses the specified series of tokens in a pair of brackets, creating a non-capturing group. 56 | * 57 | * @param tokens the tokens to be enclosed 58 | * 59 | * @return a new Token instance 60 | */ 61 | public static Token nonCapturingGroup(final Token... tokens) { 62 | return new Token( 63 | "(?:" + RegExBuilder.build(tokens) + ")", false, null 64 | ); 65 | } 66 | 67 | /** 68 | * Encloses the specified series of tokens in a pair of brackets, creating an independent non-capturing group. 69 | * 70 | * @param tokens the tokens to be enclosed 71 | * 72 | * @return a new Token instance 73 | */ 74 | public static Token independentNonCapturingGroup(final Token... tokens) { 75 | return new Token( 76 | "(?>" + RegExBuilder.build(tokens) + ")", false, null 77 | ); 78 | } 79 | 80 | /** 81 | * Creates a token that specifies a zero-width positive lookahead. 82 | * 83 | * @param tokens the tokens to be used to define the lookahead 84 | * 85 | * @return a new Token instance 86 | */ 87 | public static Token positiveLookAhead(final Token... tokens) { 88 | return new Token( 89 | "(?=" + RegExBuilder.build(tokens) + ")", false, null 90 | ); 91 | } 92 | 93 | /** 94 | * Creates a token that specifies a zero-width negative lookahead. 95 | * 96 | * @param tokens the tokens to be used to define the lookahead 97 | * 98 | * @return a new Token instance 99 | */ 100 | public static Token negativeLookAhead(final Token... tokens) { 101 | return new Token( 102 | "(?!" + RegExBuilder.build(tokens) + ")", false, null 103 | ); 104 | } 105 | 106 | /** 107 | * Creates a token that specifies a zero-width positive look-behind. 108 | * 109 | * @param tokens the tokens to be used to define the look-behind 110 | * 111 | * @return a new Token instance 112 | */ 113 | public static Token positiveLookBehind(final Token... tokens) { 114 | return new Token( 115 | "(?<=" + RegExBuilder.build(tokens) + ")", false, null 116 | ); 117 | } 118 | 119 | /** 120 | * Creates a token that specifies a zero-width negative look-behind. 121 | * 122 | * @param tokens the tokens to be used to define the look-behind 123 | * 124 | * @return a new Token instance 125 | */ 126 | public static Token negativeLookBehind(final Token... tokens) { 127 | return new Token( 128 | "(? 1, null 296 | ); 297 | } 298 | 299 | /** 300 | * Creates a token that matches any single character that does not correspond to any of 301 | * the specified CharacterTokenPart instances. 302 | * 303 | * @param characterTokens an array of CharacterTokenPart objects which must not be matched 304 | * 305 | * @return a new Token instance 306 | */ 307 | public static Token anyCharacterExcept(final CharacterTokenPart... characterTokens){ 308 | final String tokenString; 309 | setFirstAndLastParts(characterTokens); 310 | String txt = Utils.appendObjects(characterTokens); 311 | tokenString = "[^" + txt + "]"; 312 | 313 | return new Token(tokenString, false, null); 314 | } 315 | 316 | /** 317 | * Creates a token that matches any single character that corresponds to any of the 318 | * specified CharacterTokenPart instances. 319 | * 320 | * @param characterTokens an array of CharacterTokenPart objects, one of which must be matched 321 | * 322 | * @return a new Token instance 323 | */ 324 | public static Token anyOneOf(final CharacterTokenPart... characterTokens){ 325 | final String tokenString; 326 | setFirstAndLastParts(characterTokens); 327 | String txt = Utils.appendObjects(characterTokens); 328 | tokenString = "[" + txt + "]"; 329 | 330 | return new Token(tokenString, false, null); 331 | } 332 | 333 | private static void setFirstAndLastParts(final CharacterTokenPart... characterTokens){ 334 | final int count = characterTokens.length; 335 | for(int i=0; i 0){ 359 | sb.append("|"); 360 | } 361 | sb.append(token.toString()); 362 | } 363 | 364 | final boolean requiresBrackets; 365 | if (tokens.length > 1){ 366 | requiresBrackets = true; 367 | } else { 368 | requiresBrackets = tokens[0].requiresBrackets; 369 | } 370 | 371 | return new Token(sb.toString(), requiresBrackets, null); 372 | } 373 | 374 | /** 375 | * An alias for the 'or' method. 376 | * 377 | * @param tokens an array of Tokens, one of which must be matched 378 | * 379 | * @return a new Token instance 380 | */ 381 | public static Token oneOf(final Token... tokens) { 382 | return or(tokens); 383 | } 384 | } 385 | -------------------------------------------------------------------------------- /src/main/java/net/codebox/readableregex/Utils.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | /** 4 | * Contains utility methods for the RegExBuilder library. 5 | */ 6 | public class Utils { 7 | /** 8 | * Escapes the characters contained in the String argument. 9 | * 10 | * @param text the text to be checked for characters that require escaping 11 | * @param specialCharacters the list of characters that require escaping 12 | * 13 | * @return the resulting String 14 | */ 15 | public static String escapeSpecial(final String text, final char[] specialCharacters){ 16 | String tempText = text; 17 | for(char specialCharacter : specialCharacters){ 18 | tempText = tempText.replace("" + specialCharacter, "\\" + specialCharacter); 19 | } 20 | return tempText; 21 | } 22 | 23 | /** 24 | * Escapes the character argument if it is one of the specified 'special characters'. 25 | * 26 | * @param c the character to be escaped 27 | * @param specialCharacters the list of characters that require escaping 28 | * 29 | * @return a String containing the character value, escaped if required 30 | */ 31 | public static String escapeSpecial(char c, char[] specialCharacters){ 32 | return escapeSpecial("" + c, specialCharacters); 33 | } 34 | 35 | /** 36 | * Constructs a String by concatenating together the values returned by the toString() methods 37 | * of each of the specified objects. 38 | * 39 | * @param type parameter to keep the compiler happy 40 | * @param objects array of objects used to construct the string 41 | * 42 | * @return the resulting String 43 | */ 44 | public static String appendObjects(T... objects){ 45 | final StringBuilder sb = new StringBuilder(); 46 | 47 | for(T object : objects){ 48 | sb.append(object.toString()); 49 | } 50 | return sb.toString(); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/net/codebox/readableregex/TestRegExBuilder.java: -------------------------------------------------------------------------------- 1 | package net.codebox.readableregex; 2 | 3 | import org.junit.Test; 4 | 5 | import static net.codebox.readableregex.CharacterTokenPart.characters; 6 | import static net.codebox.readableregex.CharacterTokenPart.range; 7 | import static net.codebox.readableregex.Token.*; 8 | import static net.codebox.readableregex.Quantifier.*; 9 | import static org.hamcrest.CoreMatchers.is; 10 | import static org.junit.Assert.assertThat; 11 | import static org.junit.Assert.fail; 12 | 13 | public class TestRegExBuilder { 14 | @Test 15 | public void whenEmptyBuilder_thenEmptyText(){ 16 | assertThat(RegExBuilder.build(), is("")); 17 | } 18 | 19 | @Test 20 | public void whenOnlyPlainText_thenRegexIsCorrect(){ 21 | assertThat(RegExBuilder.build(text("A")), is("A")); 22 | assertThat(RegExBuilder.build(text("ABC")), is("ABC")); 23 | assertThat(RegExBuilder.build(text("ABC"), text("123")), is("ABC123")); 24 | } 25 | 26 | @Test 27 | public void whenMatchAnything_thenRegexIsCorrect(){ 28 | assertThat(RegExBuilder.build(anything()), is(".*")); 29 | assertThat(RegExBuilder.build(anything(), anything()), is(".*.*")); 30 | } 31 | 32 | @Test 33 | public void whenMatchAnyDigit_thenRegexIsCorrect(){ 34 | assertThat(RegExBuilder.build(anyDigit()), is("[0-9]")); 35 | assertThat(RegExBuilder.build(anyDigit(), anyDigit()), is("[0-9][0-9]")); 36 | } 37 | 38 | @Test 39 | public void whenMatchAnyLetter_thenRegexIsCorrect(){ 40 | assertThat(RegExBuilder.build(anyLetter()), is("[a-zA-Z]")); 41 | assertThat(RegExBuilder.build(anyLetter(), anyLetter()), is("[a-zA-Z][a-zA-Z]")); 42 | } 43 | 44 | // 'One or More' quantifier 45 | @Test 46 | public void whenMatchUsesOneOrMoreQuantifier_thenRegexIsCorrect(){ 47 | assertThat(RegExBuilder.build(oneOrMore().of("x")), is("x+")); 48 | assertThat(RegExBuilder.build(oneOrMore().of(text("xyz"))), is("(xyz)+")); 49 | assertThat(RegExBuilder.build(oneOrMore().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)+")); 50 | } 51 | 52 | @Test 53 | public void whenMatchUsesOneOrMoreGreedyQuantifier_thenRegexIsCorrect(){ 54 | assertThat(RegExBuilder.build(oneOrMore().greedy().of("x")), is("x+")); 55 | assertThat(RegExBuilder.build(oneOrMore().greedy().of(text("xyz"))), is("(xyz)+")); 56 | assertThat(RegExBuilder.build(oneOrMore().greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)+")); 57 | } 58 | 59 | @Test 60 | public void whenMatchUsesOneOrMorePossessiveQuantifier_thenRegexIsCorrect(){ 61 | assertThat(RegExBuilder.build(oneOrMore().possessive().of("x")), is("x++")); 62 | assertThat(RegExBuilder.build(oneOrMore().possessive().of(text("xyz"))), is("(xyz)++")); 63 | assertThat(RegExBuilder.build(oneOrMore().possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)++")); 64 | } 65 | 66 | @Test 67 | public void whenMatchUsesOneOrMoreLazyQuantifier_thenRegexIsCorrect(){ 68 | assertThat(RegExBuilder.build(oneOrMore().lazy().of("x")), is("x+?")); 69 | assertThat(RegExBuilder.build(oneOrMore().lazy().of(text("xyz"))), is("(xyz)+?")); 70 | assertThat(RegExBuilder.build(oneOrMore().lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)+?")); 71 | } 72 | 73 | // 'Zero or More' quantifier 74 | @Test 75 | public void whenMatchUsesZeroOrMoreQuantifier_thenRegexIsCorrect(){ 76 | assertThat(RegExBuilder.build(zeroOrMore().of("x")), is("x*")); 77 | assertThat(RegExBuilder.build(zeroOrMore().of(text("xyz"))), is("(xyz)*")); 78 | assertThat(RegExBuilder.build(zeroOrMore().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)*")); 79 | } 80 | 81 | @Test 82 | public void whenMatchUsesZeroOrMoreGreedyQuantifier_thenRegexIsCorrect(){ 83 | assertThat(RegExBuilder.build(zeroOrMore().greedy().of("x")), is("x*")); 84 | assertThat(RegExBuilder.build(zeroOrMore().greedy().of(text("xyz"))), is("(xyz)*")); 85 | assertThat(RegExBuilder.build(zeroOrMore().greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)*")); 86 | } 87 | 88 | @Test 89 | public void whenMatchUsesZeroOrMorePossessiveQuantifier_thenRegexIsCorrect(){ 90 | assertThat(RegExBuilder.build(zeroOrMore().possessive().of("x")), is("x*+")); 91 | assertThat(RegExBuilder.build(zeroOrMore().possessive().of(text("xyz"))), is("(xyz)*+")); 92 | assertThat(RegExBuilder.build(zeroOrMore().possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)*+")); 93 | } 94 | 95 | @Test 96 | public void whenMatchUsesZeroOrMoreLazyQuantifier_thenRegexIsCorrect(){ 97 | assertThat(RegExBuilder.build(zeroOrMore().lazy().of("x")), is("x*?")); 98 | assertThat(RegExBuilder.build(zeroOrMore().lazy().of(text("xyz"))), is("(xyz)*?")); 99 | assertThat(RegExBuilder.build(zeroOrMore().lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)*?")); 100 | } 101 | 102 | // 'Between' quantifier 103 | @Test(expected=IllegalArgumentException.class) 104 | public void whenNegativeFromValuePassedToBetweenMethod_thenExceptionIsThrown(){ 105 | between(-1, 4); 106 | } 107 | 108 | @Test(expected=IllegalArgumentException.class) 109 | public void whenNegativeToValuePassedToBetweenMethod_thenExceptionIsThrown(){ 110 | between(1, -4); 111 | } 112 | 113 | @Test(expected=IllegalArgumentException.class) 114 | public void whenFromValueLargerThanToValueIsPassedToBetweenMethod_thenExceptionIsThrown(){ 115 | between(4, 1); 116 | } 117 | 118 | @Test 119 | public void whenMatchUsesBetweenQuantifier_thenRegexIsCorrect(){ 120 | assertThat(RegExBuilder.build(between(2, 4).of("x")), is("x{2,4}")); 121 | assertThat(RegExBuilder.build(between(2, 4).of(text("xyz"))), is("(xyz){2,4}")); 122 | assertThat(RegExBuilder.build(between(2, 4).of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){2,4}")); 123 | } 124 | 125 | @Test 126 | public void whenMatchUsesBetweenGreedyQuantifier_thenRegexIsCorrect(){ 127 | assertThat(RegExBuilder.build(between(2, 4).greedy().of("x")), is("x{2,4}")); 128 | assertThat(RegExBuilder.build(between(2, 4).greedy().of(text("xyz"))), is("(xyz){2,4}")); 129 | assertThat(RegExBuilder.build(between(2, 4).greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){2,4}")); 130 | } 131 | 132 | @Test 133 | public void whenMatchUsesBetweenPossessiveQuantifier_thenRegexIsCorrect(){ 134 | assertThat(RegExBuilder.build(between(2, 4).possessive().of("x")), is("x{2,4}+")); 135 | assertThat(RegExBuilder.build(between(2, 4).possessive().of(text("xyz"))), is("(xyz){2,4}+")); 136 | assertThat(RegExBuilder.build(between(2, 4).possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){2,4}+")); 137 | 138 | } 139 | 140 | @Test 141 | public void whenMatchUsesBetweenLazyQuantifier_thenRegexIsCorrect(){ 142 | assertThat(RegExBuilder.build(between(2, 4).lazy().of("x")), is("x{2,4}?")); 143 | assertThat(RegExBuilder.build(between(2, 4).lazy().of(text("xyz"))), is("(xyz){2,4}?")); 144 | assertThat(RegExBuilder.build(between(2, 4).lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){2,4}?")); 145 | } 146 | 147 | // 'Zero or One' quantifier 148 | @Test 149 | public void whenMatchUsesZeroOrOneQuantifier_thenRegexIsCorrect(){ 150 | assertThat(RegExBuilder.build(zeroOrOne().of("x")), is("x?")); 151 | assertThat(RegExBuilder.build(zeroOrOne().of(text("xyz"))), is("(xyz)?")); 152 | assertThat(RegExBuilder.build(zeroOrOne().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)?")); 153 | } 154 | 155 | @Test 156 | public void whenMatchUsesZeroOrOneGreedyQuantifier_thenRegexIsCorrect(){ 157 | assertThat(RegExBuilder.build(zeroOrOne().greedy().of("x")), is("x?")); 158 | assertThat(RegExBuilder.build(zeroOrOne().greedy().of(text("xyz"))), is("(xyz)?")); 159 | assertThat(RegExBuilder.build(zeroOrOne().greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)?")); 160 | } 161 | 162 | @Test 163 | public void whenMatchUsesZeroOrOnePossessiveQuantifier_thenRegexIsCorrect(){ 164 | assertThat(RegExBuilder.build(zeroOrOne().possessive().of("x")), is("x?+")); 165 | assertThat(RegExBuilder.build(zeroOrOne().possessive().of(text("xyz"))), is("(xyz)?+")); 166 | assertThat(RegExBuilder.build(zeroOrOne().possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)?+")); 167 | } 168 | 169 | @Test 170 | public void whenMatchUsesZeroOrOneLazyQuantifier_thenRegexIsCorrect(){ 171 | assertThat(RegExBuilder.build(zeroOrOne().lazy().of("x")), is("x??")); 172 | assertThat(RegExBuilder.build(zeroOrOne().lazy().of(text("xyz"))), is("(xyz)??")); 173 | assertThat(RegExBuilder.build(zeroOrOne().lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*)??")); 174 | } 175 | 176 | // 'Exactly' quantifier 177 | @Test(expected=IllegalArgumentException.class) 178 | public void whenNegativeValuePassedToExactlyMethod_thenRegexIsCorrect(){ 179 | exactly(-1); 180 | } 181 | 182 | @Test 183 | public void whenMatchUsesExactlyQuantifier_thenRegexIsCorrect(){ 184 | assertThat(RegExBuilder.build(exactly(4).of("x")), is("x{4}")); 185 | assertThat(RegExBuilder.build(exactly(4).of(text("xyz"))), is("(xyz){4}")); 186 | assertThat(RegExBuilder.build(exactly(4).of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4}")); 187 | } 188 | 189 | @Test 190 | public void whenMatchUsesExactlyGreedyQuantifier_thenRegexIsCorrect(){ 191 | assertThat(RegExBuilder.build(exactly(4).greedy().of("x")), is("x{4}")); 192 | assertThat(RegExBuilder.build(exactly(4).greedy().of(text("xyz"))), is("(xyz){4}")); 193 | assertThat(RegExBuilder.build(exactly(4).greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4}")); 194 | } 195 | 196 | @Test 197 | public void whenMatchUsesExactlyPossessiveQuantifier_thenRegexIsCorrect(){ 198 | assertThat(RegExBuilder.build(exactly(4).possessive().of("x")), is("x{4}+")); 199 | assertThat(RegExBuilder.build(exactly(4).possessive().of(text("xyz"))), is("(xyz){4}+")); 200 | assertThat(RegExBuilder.build(exactly(4).possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4}+")); 201 | } 202 | 203 | @Test 204 | public void whenMatchUsesExactlyLazyQuantifier_thenRegexIsCorrect(){ 205 | assertThat(RegExBuilder.build(exactly(4).lazy().of("x")), is("x{4}?")); 206 | assertThat(RegExBuilder.build(exactly(4).lazy().of(text("xyz"))), is("(xyz){4}?")); 207 | assertThat(RegExBuilder.build(exactly(4).lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4}?")); 208 | } 209 | 210 | // 'At Least' quantifier 211 | @Test(expected=IllegalArgumentException.class) 212 | public void whenNegativeValuePassedToAtLeastMethod_thenRegexIsCorrect(){ 213 | atLeast(-1); 214 | } 215 | 216 | @Test 217 | public void whenMatchUsesAtLeastQuantifier_thenRegexIsCorrect(){ 218 | assertThat(RegExBuilder.build(atLeast(4).of("x")), is("x{4,}")); 219 | assertThat(RegExBuilder.build(atLeast(4).of(text("xyz"))), is("(xyz){4,}")); 220 | assertThat(RegExBuilder.build(atLeast(4).of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4,}")); 221 | } 222 | 223 | @Test 224 | public void whenMatchUsesAtLeastGreedyQuantifier_thenRegexIsCorrect(){ 225 | assertThat(RegExBuilder.build(atLeast(4).greedy().of("x")), is("x{4,}")); 226 | assertThat(RegExBuilder.build(atLeast(4).greedy().of(text("xyz"))), is("(xyz){4,}")); 227 | assertThat(RegExBuilder.build(atLeast(4).greedy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4,}")); 228 | } 229 | 230 | @Test 231 | public void whenMatchUsesAtLeastPossessiveQuantifier_thenRegexIsCorrect(){ 232 | assertThat(RegExBuilder.build(atLeast(4).possessive().of("x")), is("x{4,}+")); 233 | assertThat(RegExBuilder.build(atLeast(4).possessive().of(text("xyz"))), is("(xyz){4,}+")); 234 | assertThat(RegExBuilder.build(atLeast(4).possessive().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4,}+")); 235 | } 236 | 237 | @Test 238 | public void whenMatchUsesAtLeastLazyQuantifier_thenRegexIsCorrect(){ 239 | assertThat(RegExBuilder.build(atLeast(4).lazy().of("x")), is("x{4,}?")); 240 | assertThat(RegExBuilder.build(atLeast(4).lazy().of(text("xyz"))), is("(xyz){4,}?")); 241 | assertThat(RegExBuilder.build(atLeast(4).lazy().of(anyDigit(), text("xyz"), anything())), is("([0-9]xyz.*){4,}?")); 242 | } 243 | 244 | 245 | @Test 246 | public void whenOrOperatorUsedWithoutQuantifier_thenRegexIsCorrect(){ 247 | assertThat(RegExBuilder.build(or(text("A"), text("BCD"), text("E"))), is("A|BCD|E")); 248 | assertThat(RegExBuilder.build(or(text("A"))), is("A")); 249 | assertThat(RegExBuilder.build(or(text("ABC"))), is("ABC")); 250 | } 251 | 252 | @Test 253 | public void whenOrOperatorUsedWithQuantifier_thenRegexIsCorrect(){ 254 | assertThat(RegExBuilder.build(exactly(4).of(or(text("ABC"), text("D"), text("E")))), is("(ABC|D|E){4}")); 255 | assertThat(RegExBuilder.build(exactly(4).of(or(text("A")))), is("A{4}")); 256 | assertThat(RegExBuilder.build(exactly(4).of(or(text("ABC")))), is("(ABC){4}")); 257 | } 258 | 259 | @Test 260 | public void whenOneOfOperatorUsed_thenRegexIsCorrect(){ 261 | assertThat(RegExBuilder.build(oneOf(text("ABC"), text("123"))), is("ABC|123")); 262 | } 263 | 264 | @Test 265 | public void whenMatchAnyOneCharacter_thenRegexIsCorrect(){ 266 | assertThat(RegExBuilder.build(anyOneOf(characters('a','e','i','o','u'))), is("[aeiou]")); 267 | } 268 | 269 | @Test 270 | public void whenMatchAnyRangeOfCharacters_thenRegexIsCorrect(){ 271 | assertThat(RegExBuilder.build(anyOneOf(range('a','e'), range('v','z'))), is("[a-ev-z]")); 272 | } 273 | 274 | @Test 275 | public void whenMatchAnyCharacterExcept_thenRegexIsCorrect(){ 276 | assertThat(RegExBuilder.build(anyCharacterExcept(range('a','e'), characters('x','y','z'))), is("[^a-exyz]")); 277 | } 278 | 279 | @Test 280 | public void whenSpecialCharactersUsedInText_thenRegexIsCorrect(){ 281 | assertThat(RegExBuilder.build(text("\\")), is("\\\\")); 282 | assertThat(RegExBuilder.build(text("[")), is("\\[")); 283 | assertThat(RegExBuilder.build(text("^")), is("\\^")); 284 | assertThat(RegExBuilder.build(text("$")), is("\\$")); 285 | assertThat(RegExBuilder.build(text(".")), is("\\.")); 286 | assertThat(RegExBuilder.build(text("|")), is("\\|")); 287 | assertThat(RegExBuilder.build(text("?")), is("\\?")); 288 | assertThat(RegExBuilder.build(text("*")), is("\\*")); 289 | assertThat(RegExBuilder.build(text("+")), is("\\+")); 290 | assertThat(RegExBuilder.build(text("(")), is("\\(")); 291 | assertThat(RegExBuilder.build(text(")")), is("\\)")); 292 | assertThat(RegExBuilder.build(text("{")), is("\\{")); 293 | assertThat(RegExBuilder.build(text("}")), is("\\}")); 294 | } 295 | 296 | @Test 297 | public void whenSpecialCharactersUsedInCharacterMatcher_thenRegexIsCorrect(){ 298 | assertThat(RegExBuilder.build(anyOneOf(characters('\\'))), is("[\\\\]")); 299 | assertThat(RegExBuilder.build(anyOneOf(characters('-'))), is("[-]")); 300 | assertThat(RegExBuilder.build(anyOneOf(characters(']'))), is("[\\]]")); 301 | assertThat(RegExBuilder.build(anyOneOf(characters('^'))), is("[\\^]")); 302 | assertThat(RegExBuilder.build(anyOneOf(range('\\','-'))), is("[\\\\-\\-]")); 303 | assertThat(RegExBuilder.build(anyOneOf(range('^',']'))), is("[\\^-\\]]")); 304 | } 305 | 306 | // Literal Caret in character matcher 307 | @Test 308 | public void whenCaretUsedAtStartOfCharacterMatcher_thenRegexIsCorrect(){ 309 | assertThat(RegExBuilder.build(anyOneOf(characters('^'))), is("[\\^]")); 310 | assertThat(RegExBuilder.build(anyOneOf(characters('^', 'a', 'b'))), is("[\\^ab]")); 311 | assertThat(RegExBuilder.build(anyOneOf(characters('^'), characters('a', 'b'))), is("[\\^ab]")); 312 | assertThat(RegExBuilder.build(anyOneOf(characters('^', 'a'), characters('b'))), is("[\\^ab]")); 313 | assertThat(RegExBuilder.build(anyOneOf(characters('^', 'a', 'b'), characters('x', 'y'), range('0', '9'))), is("[\\^abxy0-9]")); 314 | } 315 | 316 | @Test 317 | public void whenCaretUsedInMiddleOfCharacterMatcher_thenRegexIsCorrect(){ 318 | assertThat(RegExBuilder.build(anyOneOf(characters('a', '^', 'b'))), is("[a^b]")); 319 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters('^', 'b'))), is("[a^b]")); 320 | assertThat(RegExBuilder.build(anyOneOf(characters('a', '^'), characters('b'))), is("[a^b]")); 321 | } 322 | 323 | @Test 324 | public void whenCaretUsedAtEndOfCharacterMatcher_thenRegexIsCorrect(){ 325 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b', '^'))), is("[ab^]")); 326 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters('b', '^'))), is("[ab^]")); 327 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b'), characters('^'))), is("[ab^]")); 328 | } 329 | 330 | // Literal Closing Bracket in character matcher 331 | @Test 332 | public void whenClosingBracketUsedAtStartOfCharacterMatcher_thenRegexIsCorrect(){ 333 | assertThat(RegExBuilder.build(anyOneOf(characters(']'))), is("[\\]]")); 334 | assertThat(RegExBuilder.build(anyOneOf(characters(']', 'a', 'b'))), is("[\\]ab]")); 335 | assertThat(RegExBuilder.build(anyOneOf(characters(']'), characters('a', 'b'))), is("[\\]ab]")); 336 | assertThat(RegExBuilder.build(anyOneOf(characters(']', 'a'), characters('b'))), is("[\\]ab]")); 337 | assertThat(RegExBuilder.build(anyOneOf(characters(']', 'a', 'b'), characters('x', 'y'), range('0', '9'))), is("[\\]abxy0-9]")); 338 | } 339 | 340 | @Test 341 | public void whenClosingBracketUsedInMiddleOfCharacterMatcher_thenRegexIsCorrect(){ 342 | assertThat(RegExBuilder.build(anyOneOf(characters('a', ']', 'b'))), is("[a\\]b]")); 343 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters(']', 'b'))), is("[a\\]b]")); 344 | assertThat(RegExBuilder.build(anyOneOf(characters('a', ']'), characters('b'))), is("[a\\]b]")); 345 | } 346 | 347 | @Test 348 | public void whenClosingBracketUsedAtEndOfCharacterMatcher_thenRegexIsCorrect(){ 349 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b', ']'))), is("[ab\\]]")); 350 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters('b', ']'))), is("[ab\\]]")); 351 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b'), characters(']'))), is("[ab\\]]")); 352 | 353 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('[', ']','\\'))), is("[^[\\]\\\\]")); 354 | } 355 | 356 | // Literal Closing Bracket in negative character matcher 357 | @Test 358 | public void whenClosingBracketUsedAtStartOfNegativeCharacterMatcher_thenRegexIsCorrect(){ 359 | assertThat(RegExBuilder.build(anyCharacterExcept(characters(']'))), is("[^\\]]")); 360 | assertThat(RegExBuilder.build(anyCharacterExcept(characters(']', 'a', 'b'))), is("[^\\]ab]")); 361 | assertThat(RegExBuilder.build(anyCharacterExcept(characters(']'), characters('a', 'b'))), is("[^\\]ab]")); 362 | assertThat(RegExBuilder.build(anyCharacterExcept(characters(']', 'a'), characters('b'))), is("[^\\]ab]")); 363 | assertThat(RegExBuilder.build(anyCharacterExcept(characters(']', 'a', 'b'), characters('x', 'y'), range('0', '9'))), is("[^\\]abxy0-9]")); 364 | } 365 | 366 | @Test 367 | public void whenClosingBracketUsedInMiddleOfNegativeCharacterMatcher_thenRegexIsCorrect(){ 368 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a', ']', 'b'))), is("[^a\\]b]")); 369 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a'), characters(']', 'b'))), is("[^a\\]b]")); 370 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a', ']'), characters('b'))), is("[^a\\]b]")); 371 | } 372 | 373 | @Test 374 | public void whenClosingBracketUsedAtEndOfNegativeCharacterMatcher_thenRegexIsCorrect(){ 375 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a', 'b', ']'))), is("[^ab\\]]")); 376 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a'), characters('b', ']'))), is("[^ab\\]]")); 377 | assertThat(RegExBuilder.build(anyCharacterExcept(characters('a', 'b'), characters(']'))), is("[^ab\\]]")); 378 | 379 | } 380 | 381 | // Literal Hyphen in character matcher 382 | @Test 383 | public void whenHyphenUsedAtStartOfCharacterMatcher_thenRegexIsCorrect(){ 384 | assertThat(RegExBuilder.build(anyOneOf(characters('-'))), is("[-]")); 385 | assertThat(RegExBuilder.build(anyOneOf(characters('-', 'a', 'b'))), is("[-ab]")); 386 | assertThat(RegExBuilder.build(anyOneOf(characters('-'), characters('a', 'b'))), is("[-ab]")); 387 | assertThat(RegExBuilder.build(anyOneOf(characters('-', 'a'), characters('b'))), is("[-ab]")); 388 | assertThat(RegExBuilder.build(anyOneOf(characters('-', 'a', 'b'), characters('x', 'y'), range('0', '9'))), is("[-abxy0-9]")); 389 | } 390 | 391 | @Test 392 | public void whenHyphenUsedInMiddleOfCharacterMatcher_thenRegexIsCorrect(){ 393 | assertThat(RegExBuilder.build(anyOneOf(characters('a', '-', 'b'))), is("[a\\-b]")); 394 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters('-', 'b'))), is("[a\\-b]")); 395 | assertThat(RegExBuilder.build(anyOneOf(characters('a', '-'), characters('b'))), is("[a\\-b]")); 396 | } 397 | 398 | @Test 399 | public void whenHyphenUsedAtEndOfCharacterMatcher_thenRegexIsCorrect(){ 400 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b', '-'))), is("[ab\\-]")); 401 | assertThat(RegExBuilder.build(anyOneOf(characters('a'), characters('b', '-'))), is("[ab\\-]")); 402 | assertThat(RegExBuilder.build(anyOneOf(characters('a', 'b'), characters('-'))), is("[ab\\-]")); 403 | } 404 | 405 | @Test 406 | public void whenTokensAreGrouped_thenRegexIsCorrect(){ 407 | assertThat(RegExBuilder.build(groupOf(text("ABC"))), is("(ABC)")); 408 | assertThat(RegExBuilder.build(groupOf(text("ABC"), anyDigit(), text("XYZ"))), is("(ABC[0-9]XYZ)")); 409 | } 410 | 411 | @Test 412 | public void whenBackReferencesToGroupsAreUsed_thenRegexIsCorrect(){ 413 | assertThat(RegExBuilder.build(group(1)), is("\\1")); 414 | assertThat(RegExBuilder.build(text("A"), group(1), group(2), text("B")), is("A\\1\\2B")); 415 | } 416 | 417 | @Test 418 | public void whenNonCapturingGroupsAreUsed_thenRegexIsCorrect(){ 419 | assertThat(RegExBuilder.build(nonCapturingGroup(text("ABC"))), is("(?:ABC)")); 420 | assertThat(RegExBuilder.build(nonCapturingGroup(text("ABC"), anyDigit(), text("XYZ"))), is("(?:ABC[0-9]XYZ)")); 421 | } 422 | 423 | @Test 424 | public void whenIndependentNonCapturingGroupsAreUsed_thenRegexIsCorrect(){ 425 | assertThat(RegExBuilder.build(independentNonCapturingGroup(text("ABC"))), is("(?>ABC)")); 426 | assertThat(RegExBuilder.build(independentNonCapturingGroup(text("ABC"), anyDigit(), text("XYZ"))), is("(?>ABC[0-9]XYZ)")); 427 | } 428 | 429 | @Test 430 | public void whenPositiveLookAheadIsUsed_thenRegexIsCorrect(){ 431 | assertThat(RegExBuilder.build(positiveLookAhead(text("ABC"))), is("(?=ABC)")); 432 | assertThat(RegExBuilder.build(positiveLookAhead(text("ABC"), anyDigit(), text("XYZ"))), is("(?=ABC[0-9]XYZ)")); 433 | } 434 | 435 | @Test 436 | public void whenNegativeLookAheadIsUsed_thenRegexIsCorrect(){ 437 | assertThat(RegExBuilder.build(negativeLookAhead(text("ABC"))), is("(?!ABC)")); 438 | assertThat(RegExBuilder.build(negativeLookAhead(text("ABC"), anyDigit(), text("XYZ"))), is("(?!ABC[0-9]XYZ)")); 439 | } 440 | 441 | @Test 442 | public void whenPositiveLookBehindIsUsed_thenRegexIsCorrect(){ 443 | assertThat(RegExBuilder.build(positiveLookBehind(text("ABC"))), is("(?<=ABC)")); 444 | assertThat(RegExBuilder.build(positiveLookBehind(text("ABC"), anyDigit(), text("XYZ"))), is("(?<=ABC[0-9]XYZ)")); 445 | } 446 | 447 | @Test 448 | public void whenNegativeLookBehindIsUsed_thenRegexIsCorrect(){ 449 | assertThat(RegExBuilder.build(negativeLookBehind(text("ABC"))), is("(?