├── .gitignore ├── bnd.bnd ├── .travis ├── deploy.sh └── settings.xml ├── src ├── test │ └── java │ │ └── ru │ │ └── lanwen │ │ └── verbalregex │ │ ├── matchers │ │ ├── EqualToRegexMatcher.java │ │ ├── TestMatchMatcher.java │ │ └── TestsExactMatcher.java │ │ ├── UsageLibTest.java │ │ ├── PredefinedCharClassesTest.java │ │ ├── NegativeCasesTest.java │ │ ├── RealWorldUnitTest.java │ │ └── BasicFunctionalityUnitTest.java └── main │ └── java │ └── ru │ └── lanwen │ └── verbalregex │ └── VerbalExpression.java ├── .travis.yml ├── LICENSE ├── README.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .idea 3 | *.iml 4 | .classpath 5 | .project 6 | .settings 7 | /target/ 8 | bin 9 | -------------------------------------------------------------------------------- /bnd.bnd: -------------------------------------------------------------------------------- 1 | Bundle-Name: ${project.name} 2 | Bundle-Version: ${project.version} 3 | Bundle-SymbolicName: ${project.groupId}.${project.artifactId} 4 | Export-Package: ru.lanwen.verbalregex 5 | Import-Package: * -------------------------------------------------------------------------------- /.travis/deploy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | if [ ! -z "$TRAVIS_TAG" ] 3 | then 4 | echo "on a tag -> set pom.xml to $TRAVIS_TAG" 5 | mvn --settings .travis/settings.xml org.codehaus.mojo:versions-maven-plugin:2.1:set -DnewVersion=$TRAVIS_TAG 1>/dev/null 2>/dev/null 6 | else 7 | echo "not on a tag -> keep SNAPSHOT version in pom.xml" 8 | fi 9 | 10 | mvn clean deploy --settings .travis/settings.xml -DskipTests=true -B -U 11 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/matchers/EqualToRegexMatcher.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex.matchers; 2 | 3 | import org.hamcrest.FeatureMatcher; 4 | import org.hamcrest.Matcher; 5 | import ru.lanwen.verbalregex.VerbalExpression; 6 | 7 | import static org.hamcrest.CoreMatchers.equalTo; 8 | 9 | /** 10 | * User: lanwen 11 | * Date: 29.05.14 12 | * Time: 22:59 13 | */ 14 | public final class EqualToRegexMatcher { 15 | private EqualToRegexMatcher() { 16 | } 17 | 18 | public static Matcher equalToRegex(final VerbalExpression.Builder builder) { 19 | return new FeatureMatcher(equalTo(builder.build().toString()), "regex", "") { 20 | @Override 21 | protected String featureValueOf(VerbalExpression verbalExpression) { 22 | return verbalExpression.toString(); 23 | } 24 | }; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /.travis/settings.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | 7 | 8 | ossrh 9 | ${env.SONATYPE_USERNAME} 10 | ${env.SONATYPE_PASSWORD} 11 | 12 | 13 | 14 | 15 | ossrh 16 | 17 | true 18 | 19 | 20 | ${env.GPG_EXECUTABLE} 21 | ${env.GPG_PASSPHRASE} 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: false 3 | 4 | jdk: 5 | - openjdk8 6 | 7 | install: 8 | - mvn --settings .travis/settings.xml install -DskipTests=true -Dgpg.skip -Dmaven.javadoc.skip=true -B -V 9 | 10 | before_install: 11 | - if [ ! -z "$GPG_SECRET_KEYS" ]; then echo $GPG_SECRET_KEYS | base64 --decode | $GPG_EXECUTABLE --import; fi 12 | - if [ ! -z "$GPG_OWNERTRUST" ]; then echo $GPG_OWNERTRUST | base64 --decode | $GPG_EXECUTABLE --import-ownertrust; fi 13 | 14 | after_success: 15 | - mvn clean cobertura:cobertura -Dcobertura.report.format=xml org.eluder.coveralls:coveralls-maven-plugin:4.3.0:report 16 | 17 | notifications: 18 | email: false 19 | 20 | deploy: 21 | - provider: script 22 | script: .travis/deploy.sh 23 | skip_cleanup: true 24 | on: 25 | repo: VerbalExpressions/JavaVerbalExpressions 26 | branch: master 27 | - provider: script 28 | script: .travis/deploy.sh 29 | skip_cleanup: true 30 | on: 31 | repo: VerbalExpressions/JavaVerbalExpressions 32 | tags: true 33 | 34 | 35 | cache: 36 | directories: 37 | - $HOME/.m2 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 VerbalExpressions 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/matchers/TestMatchMatcher.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex.matchers; 2 | 3 | import org.hamcrest.Description; 4 | import org.hamcrest.Factory; 5 | import org.hamcrest.TypeSafeMatcher; 6 | import ru.lanwen.verbalregex.VerbalExpression; 7 | 8 | /** 9 | * User: lanwen 10 | * Date: 29.05.14 11 | * Time: 20:06 12 | */ 13 | public class TestMatchMatcher extends TypeSafeMatcher { 14 | 15 | private String toTest; 16 | 17 | private TestMatchMatcher(String toTest) { 18 | this.toTest = toTest; 19 | } 20 | 21 | @Override 22 | protected boolean matchesSafely(VerbalExpression verbalExpression) { 23 | return verbalExpression.test(toTest); 24 | } 25 | 26 | @Override 27 | public void describeTo(Description description) { 28 | description.appendText("regex should match to ").appendValue(toTest); 29 | } 30 | 31 | @Override 32 | protected void describeMismatchSafely(VerbalExpression item, Description mismatchDescription) { 33 | mismatchDescription.appendText(item.toString()).appendText(" don't matches this string"); 34 | } 35 | 36 | @Factory 37 | public static TestMatchMatcher matchesTo(String test) { 38 | return new TestMatchMatcher(test); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/matchers/TestsExactMatcher.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex.matchers; 2 | 3 | import org.hamcrest.Description; 4 | import org.hamcrest.Factory; 5 | import org.hamcrest.TypeSafeMatcher; 6 | import ru.lanwen.verbalregex.VerbalExpression; 7 | 8 | /** 9 | * User: lanwen 10 | * Date: 29.05.14 11 | * Time: 20:06 12 | */ 13 | public class TestsExactMatcher extends TypeSafeMatcher { 14 | 15 | private String toTest; 16 | 17 | private TestsExactMatcher(String toTest) { 18 | this.toTest = toTest; 19 | } 20 | 21 | @Override 22 | protected boolean matchesSafely(VerbalExpression verbalExpression) { 23 | return verbalExpression.testExact(toTest); 24 | } 25 | 26 | @Override 27 | public void describeTo(Description description) { 28 | description.appendText("regex should match exactly to ").appendValue(toTest); 29 | } 30 | 31 | @Override 32 | protected void describeMismatchSafely(VerbalExpression item, Description mismatchDescription) { 33 | mismatchDescription.appendText(item.toString()).appendText(" don't matches this string"); 34 | } 35 | 36 | @Factory 37 | public static TestsExactMatcher matchesExactly(String test) { 38 | return new TestsExactMatcher(test); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/UsageLibTest.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.hamcrest.CoreMatchers.equalTo; 6 | import static org.hamcrest.core.IsNot.not; 7 | import static org.junit.Assert.assertThat; 8 | 9 | /** 10 | * User: lanwen 11 | * Date: 11.05.14 12 | * Time: 3:30 13 | */ 14 | public class UsageLibTest { 15 | 16 | 17 | @Test 18 | public void staticFabricsRetunSameAsConstructorExpressions() { 19 | VerbalExpression regexViaFactory = VerbalExpression.regex().anything().build(); 20 | VerbalExpression regexViaConstructor = new VerbalExpression.Builder().anything().build(); 21 | 22 | assertThat("Factory builder method produce not same as constructor regex", 23 | regexViaFactory.toString(), equalTo(regexViaConstructor.toString())); 24 | } 25 | 26 | @Test 27 | public void clonedBuilderEqualsOriginal() { 28 | VerbalExpression.Builder builder = VerbalExpression.regex().anything().addModifier('i'); 29 | VerbalExpression.Builder clonedBuilder = VerbalExpression.regex(builder); 30 | 31 | assertThat("Cloned builder changed after creating new one", 32 | builder.build().toString(), equalTo(clonedBuilder.build().toString())); 33 | } 34 | 35 | @Test 36 | public void clonedBuilderCantChangeOriginal() { 37 | VerbalExpression.Builder builder = VerbalExpression.regex().anything().addModifier('i'); 38 | VerbalExpression.Builder clonedBuilder = VerbalExpression.regex(builder).endOfLine(); 39 | 40 | assertThat("Cloned builder changed after creating new one", 41 | builder.build().toString(), not(clonedBuilder.build().toString())); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/PredefinedCharClassesTest.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.hamcrest.CoreMatchers.equalTo; 6 | import static org.hamcrest.CoreMatchers.is; 7 | import static org.hamcrest.CoreMatchers.not; 8 | import static org.hamcrest.MatcherAssert.assertThat; 9 | import static ru.lanwen.verbalregex.VerbalExpression.regex; 10 | import static ru.lanwen.verbalregex.matchers.TestMatchMatcher.matchesTo; 11 | 12 | /** 13 | * User: lanwen 14 | * Date: 13.05.14 15 | * Time: 16:26 16 | */ 17 | public class PredefinedCharClassesTest { 18 | 19 | public static final String LETTERS_NO_DIGITS = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_"; 20 | public static final String DIGITS = "0123456789"; 21 | public static final String NON_LETTERS = ";'[]{}|?/"; 22 | public static final String SPACE = " \t\n\f\r"; 23 | 24 | @Test 25 | public void testWordChar() throws Exception { 26 | VerbalExpression regex = regex().wordChar().build(); 27 | 28 | assertThat("Not matches on letters", regex, matchesTo(LETTERS_NO_DIGITS + DIGITS)); 29 | assertThat("matches on non letters", regex, not(matchesTo((NON_LETTERS + SPACE)))); 30 | assertThat("Extracts wrong word chars", 31 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(LETTERS_NO_DIGITS + DIGITS)); 32 | 33 | } 34 | 35 | @Test 36 | public void testNonWordChar() throws Exception { 37 | VerbalExpression regex = regex().nonWordChar().build(); 38 | 39 | assertThat("matches on letters", regex, not(matchesTo((LETTERS_NO_DIGITS + DIGITS)))); 40 | assertThat("Not matches on non letters", regex, matchesTo(NON_LETTERS + SPACE)); 41 | assertThat("Extracts wrong chars", 42 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(NON_LETTERS + SPACE)); 43 | 44 | } 45 | 46 | @Test 47 | public void testSpace() throws Exception { 48 | VerbalExpression regex = regex().space().build(); 49 | 50 | assertThat("matches on letters", regex, not(matchesTo((LETTERS_NO_DIGITS + DIGITS + NON_LETTERS)))); 51 | assertThat("Not matches on space", regex, matchesTo(SPACE)); 52 | assertThat("Extracts wrong chars", 53 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), equalTo(SPACE)); 54 | 55 | } 56 | 57 | @Test 58 | public void testNonSpace() throws Exception { 59 | VerbalExpression regex = regex().nonSpace().build(); 60 | 61 | assertThat("Not matches on non space", regex, matchesTo(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS)); 62 | assertThat("matches on space", regex, not(matchesTo((SPACE)))); 63 | assertThat("Extracts wrong chars", 64 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), not(SPACE)); 65 | 66 | } 67 | 68 | @Test 69 | public void testDigit() throws Exception { 70 | VerbalExpression regex = regex().digit().build(); 71 | 72 | assertThat("matches on letters", regex, not(matchesTo((LETTERS_NO_DIGITS + SPACE + NON_LETTERS)))); 73 | assertThat("Not matches on digits", regex, matchesTo(DIGITS)); 74 | assertThat("Extracts wrong chars", 75 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), is(DIGITS)); 76 | 77 | } 78 | 79 | @Test 80 | public void testNonDigit() throws Exception { 81 | VerbalExpression regex = regex().nonDigit().build(); 82 | 83 | assertThat("Not matches on letters", regex, matchesTo(LETTERS_NO_DIGITS + SPACE + NON_LETTERS)); 84 | assertThat("matches on digits", regex, not(matchesTo((DIGITS)))); 85 | assertThat("Extracts wrong chars", 86 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), not(DIGITS)); 87 | 88 | } 89 | 90 | @Test 91 | public void testWord() throws Exception { 92 | VerbalExpression regex = regex().word().build(); 93 | 94 | assertThat("not matches on word", regex, matchesTo(LETTERS_NO_DIGITS + DIGITS)); 95 | assertThat("matches on space and non letters", regex, not(matchesTo(SPACE + NON_LETTERS))); 96 | assertThat("extracts wrong chars", 97 | regex.getText(LETTERS_NO_DIGITS + DIGITS + NON_LETTERS + SPACE), is(LETTERS_NO_DIGITS + DIGITS)); 98 | 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/NegativeCasesTest.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.regex.PatternSyntaxException; 6 | 7 | import static org.hamcrest.CoreMatchers.containsString; 8 | import static org.hamcrest.CoreMatchers.equalTo; 9 | import static org.junit.Assert.assertThat; 10 | import static ru.lanwen.verbalregex.VerbalExpression.regex; 11 | import static ru.lanwen.verbalregex.matchers.TestMatchMatcher.matchesTo; 12 | 13 | /** 14 | * User: lanwen 15 | * Date: 11.05.14 16 | * Time: 3:37 17 | */ 18 | public class NegativeCasesTest { 19 | 20 | @Test(expected = IllegalStateException.class) 21 | public void testEndCaptureOnEmptyRegex() { 22 | regex().endCapture().build(); 23 | } 24 | 25 | @Test(expected = IndexOutOfBoundsException.class) 26 | public void shouldExceptionWhenTryGetMoreThanCapturedGroup() { 27 | String text = "abc"; 28 | VerbalExpression regex = regex().find("b").capture().find("c").build(); 29 | 30 | regex.getText(text, 2); 31 | } 32 | 33 | @Test(expected = IllegalArgumentException.class) 34 | public void shouldExceptionWhenTryGetByNonExistentCaptureName() { 35 | String text = "abc"; 36 | VerbalExpression regex = regex().find("b") 37 | .capture("test1").find("c").build(); 38 | 39 | regex.getText(text, "test2"); 40 | } 41 | 42 | @Test(expected = PatternSyntaxException.class) 43 | public void testRangeWithoutArgs() throws Exception { 44 | regex().startOfLine().range().build(); 45 | } 46 | 47 | @Test(expected = PatternSyntaxException.class) 48 | public void testRangeWithOneArg() throws Exception { 49 | regex().startOfLine().range("a").build(); 50 | } 51 | 52 | @Test 53 | public void rangeWithThreeArgsUsesOnlyFirstTwo() throws Exception { 54 | VerbalExpression regex = regex().startOfLine().range("a", "z", "A").build(); 55 | 56 | assertThat("Range with three args differs from expected", regex.toString(), equalTo("^[a-z]")); 57 | } 58 | 59 | @Test 60 | public void orWithNullMatchesAny() throws Exception { 61 | VerbalExpression regex = regex().startOfLine().then("a").or(null).build(); 62 | assertThat("regex don't matches writed letter", regex, matchesTo("a")); 63 | assertThat("or(null) should match any", regex, matchesTo("bcd")); 64 | 65 | assertThat("or(null) extract only first", regex.getText("abcd"), equalTo("a")); 66 | } 67 | 68 | @Test 69 | public void orAfterCaptureProduceEmptyGroup() throws Exception { 70 | VerbalExpression regex = regex().startOfLine().then("a").capture().or("b").build(); 71 | 72 | assertThat(regex.toString(), containsString("()|")); 73 | 74 | assertThat("regex dont matches string abcd", regex.getText("abcd", 0), equalTo("a")); 75 | assertThat("regex dont extract a by first group", regex.getText("abcd", 1), equalTo("")); 76 | } 77 | 78 | @Test 79 | public void orAfterNamedCaptureProduceEmptyGroup() { 80 | String captureName = "test"; 81 | VerbalExpression regex = regex().startOfLine().then("a") 82 | .capture(captureName).or("b").build(); 83 | 84 | assertThat(regex.toString(), containsString("(?)|")); 85 | 86 | assertThat("regex don't matches string abcd", 87 | regex.getText("abcd", 0), equalTo("a")); 88 | assertThat("regex don't extract a by group named " + captureName, 89 | regex.getText("abcd", captureName), equalTo("")); 90 | } 91 | 92 | @Test 93 | public void multiplyWithNullOnCountEqualToWithOneAndMore() throws Exception { 94 | VerbalExpression regex = regex().multiple("some", null).build(); 95 | 96 | assertThat("Multiply with null should be equal to oneOrMore", 97 | regex.toString(), equalTo(regex().find("some").oneOrMore().build().toString())); 98 | } 99 | 100 | @Test 101 | public void multiplyWithMoreThan3ParamsOnCountEqualToWithOneAndMore() throws Exception { 102 | VerbalExpression regex = regex().multiple("some", 1, 2, 3).build(); 103 | 104 | assertThat("Multiply with 3 args should be equal to oneOrMore", 105 | regex.toString(), equalTo(regex().find("some").oneOrMore().build().toString())); 106 | } 107 | 108 | @Test(expected = java.util.regex.PatternSyntaxException.class) 109 | public void twoOpenCaptsWithOrThrowSyntaxException() throws Exception { 110 | VerbalExpression regex = regex().capt().capt().or("0").build(); 111 | String ignored = regex.toString(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | JavaVerbalExpressions 2 | ===================== 3 | [![release](http://github-release-version.herokuapp.com/github/VerbalExpressions/JavaVerbalExpressions/release.svg?style=flat)](https://github.com/VerbalExpressions/JavaVerbalExpressions/releases/latest) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/ru.lanwen.verbalregex/java-verbal-expressions/badge.svg?style=flat)](https://maven-badges.herokuapp.com/maven-central/ru.lanwen.verbalregex/java-verbal-expressions) 4 | [](https://github.com/VerbalExpressions/JSVerbalExpressions) 5 | [![Coverage Status](https://coveralls.io/repos/VerbalExpressions/JavaVerbalExpressions/badge.svg)](https://coveralls.io/r/VerbalExpressions/JavaVerbalExpressions) 6 | 7 | VerbalExpressions is a Java library that helps to construct difficult regular expressions. 8 | 9 | 10 | 11 | ## Getting Started 12 | 13 | Maven Dependency: 14 | 15 | ```xml 16 | 17 | ru.lanwen.verbalregex 18 | java-verbal-expressions 19 | 1.8 20 | 21 | ``` 22 | 23 | You can use *SNAPSHOT* dependency with adding to `pom.xml`: 24 | ```xml 25 | 26 | 27 | ossrh 28 | https://oss.sonatype.org/content/repositories/snapshots 29 | 30 | 31 | ``` 32 | 33 | ## Examples 34 | ```java 35 | VerbalExpression testRegex = VerbalExpression.regex() 36 | .startOfLine().then("http").maybe("s") 37 | .then("://") 38 | .maybe("www.").anythingBut(" ") 39 | .endOfLine() 40 | .build(); 41 | 42 | // Create an example URL 43 | String url = "https://www.google.com"; 44 | 45 | // Use VerbalExpression's testExact() method to test if the entire string matches the regex 46 | testRegex.testExact(url); //True 47 | 48 | testRegex.toString(); // Outputs the regex used: 49 | // ^(?:http)(?:s)?(?:\:\/\/)(?:www\.)?(?:[^\ ]*)$ 50 | 51 | ``` 52 | 53 | ```java 54 | VerbalExpression testRegex = VerbalExpression.regex() 55 | .startOfLine().then("abc").or("def") 56 | .build(); 57 | 58 | String testString = "defzzz"; 59 | 60 | //Use VerbalExpression's test() method to test if parts if the string match the regex 61 | testRegex.test(testString); // true 62 | testRegex.testExact(testString); // false 63 | testRegex.getText(testString); // returns: def 64 | ``` 65 | 66 | Builder can be cloned: 67 | ```java 68 | VerbalExpression regex = regex(regex().anything().addModifier('i')).endOfLine().build(); 69 | ``` 70 | 71 | Or can be used in another regex: 72 | ```java 73 | VerbalExpression.Builder digits = regex().capt().digit().oneOrMore().endCapt().tab(); 74 | VerbalExpression regex2 = regex().add(digits).add(digits).build(); 75 | ``` 76 | 77 | Feel free to use any predefined char groups: 78 | ```java 79 | regex().wordChar().nonWordChar() 80 | .space().nonSpace() 81 | .digit().nonDigit() 82 | ``` 83 | 84 | Define captures: 85 | ```java 86 | String text = "aaabcd"; 87 | VerbalExpression regex = regex() 88 | .find("a") 89 | .capture().find("b").anything().endCapture().then("cd").build(); 90 | 91 | regex.getText(text) // returns "abcd" 92 | regex.getText(text, 1) // returns "b" 93 | ``` 94 | 95 | ## More complex examples 96 | * [Parse long strings example](https://github.com/VerbalExpressions/JavaVerbalExpressions/wiki/Parse-long-strings-example) 97 | 98 | ## Other implementations 99 | You can view all implementations on [VerbalExpressions.github.io](http://VerbalExpressions.github.io) 100 | 101 | [ 102 | [Javascript](https://github.com/VerbalExpressions/JSVerbalExpressions) - 103 | [PHP](https://github.com/VerbalExpressions/PHPVerbalExpressions) - 104 | [Python](https://github.com/VerbalExpressions/PythonVerbalExpressions) - 105 | [C#](https://github.com/VerbalExpressions/CSharpVerbalExpressions) - 106 | [Objective-C](https://github.com/VerbalExpressions/ObjectiveCVerbalExpressions) - 107 | [Ruby](https://github.com/ryan-endacott/verbal_expressions) - 108 | [Groovy](https://github.com/VerbalExpressions/GroovyVerbalExpressions) - 109 | [Haskell](https://github.com/VerbalExpressions/HaskellVerbalExpressions) - 110 | [C++](https://github.com/VerbalExpressions/CppVerbalExpressions) - ... ([moarr](https://github.com/VerbalExpressions)) ] 111 | 112 | ## Project released with travis 113 | 114 | With help of this tutorial: 115 | https://dracoblue.net/dev/uploading-snapshots-and-releases-to-maven-central-with-travis/ 116 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import org.junit.Ignore; 4 | import org.junit.Test; 5 | 6 | import static org.hamcrest.CoreMatchers.equalTo; 7 | import static org.hamcrest.CoreMatchers.not; 8 | import static org.junit.Assert.assertEquals; 9 | import static org.junit.Assert.assertThat; 10 | import static ru.lanwen.verbalregex.VerbalExpression.regex; 11 | import static ru.lanwen.verbalregex.matchers.TestMatchMatcher.matchesTo; 12 | import static ru.lanwen.verbalregex.matchers.TestsExactMatcher.matchesExactly; 13 | 14 | 15 | public class RealWorldUnitTest { 16 | 17 | @Test 18 | public void testUrl() { 19 | VerbalExpression testRegex = new VerbalExpression.Builder() 20 | .startOfLine() 21 | .then("http") 22 | .maybe("s") 23 | .then("://") 24 | .maybe("www.") 25 | .anythingBut(" ") 26 | .endOfLine() 27 | .build(); 28 | 29 | // Create an example URL 30 | String testUrl = "https://www.google.com"; 31 | assertThat("Matches Google's url", testRegex, matchesTo(testUrl)); //True 32 | 33 | assertThat("Regex doesn't match same regex as in example", 34 | testRegex.toString(), 35 | equalTo("^(?:http)(?:s)?(?:\\:\\/\\/)(?:www\\.)?(?:[^\\ ]*)$")); 36 | } 37 | 38 | @Test 39 | public void testTelephoneNumber() { 40 | VerbalExpression regex = regex() 41 | .startOfLine() 42 | .then("+") 43 | .capture().range("0", "9").count(3).maybe("-").maybe(" ").endCapture() 44 | .count(3) 45 | .endOfLine().build(); 46 | 47 | String phoneWithSpace = "+097 234 243"; 48 | String phoneWithoutSpace = "+097234243"; 49 | String phoneWithDash = "+097-234-243"; 50 | 51 | assertThat(regex, matchesExactly(phoneWithSpace)); 52 | assertThat(regex, matchesExactly(phoneWithoutSpace)); 53 | assertThat(regex, matchesExactly(phoneWithDash)); 54 | 55 | } 56 | 57 | @Test 58 | public void complexPatternWithMultiplyCaptures() throws Exception { 59 | String logLine = "3\t4\t1\thttp://localhost:20001\t1\t63528800\t0\t63528800\t1000000000\t0\t63528800\tSTR1"; 60 | 61 | VerbalExpression regex = regex() 62 | .capt().digit().oneOrMore().endCapture().tab() 63 | .capt().digit().oneOrMore().endCapture().tab() 64 | .capt().range("0", "1").count(1).endCapture().tab() 65 | .capt().find("http://localhost:20").digit().count(3).endCapture().tab() 66 | .capt().range("0", "1").count(1).endCapture().tab() 67 | .capt().digit().oneOrMore().endCapture().tab() 68 | .capt().range("0", "1").count(1).endCapture().tab() 69 | .capt().digit().oneOrMore().endCapture().tab() 70 | .capt().digit().oneOrMore().endCapture().tab() 71 | .capt().range("0", "1").count(1).endCapture().tab() 72 | .capt().digit().oneOrMore().endCapture().tab() 73 | .capt().find("STR").range("0", "2").count(1).endCapture().build(); 74 | 75 | assertThat(regex, matchesExactly(logLine)); 76 | 77 | VerbalExpression.Builder digits = regex().capt().digit().oneOrMore().endCapt().tab(); 78 | VerbalExpression.Builder range = regex().capt().range("0", "1").count(1).endCapt().tab(); 79 | VerbalExpression.Builder host = regex().capt().find("http://localhost:20").digit().count(3).endCapt().tab(); 80 | VerbalExpression.Builder fake = regex().capt().find("STR").range("0", "2").count(1); 81 | 82 | VerbalExpression regex2 = regex() 83 | .add(digits).add(digits) 84 | .add(range).add(host).add(range).add(digits).add(range) 85 | .add(digits).add(digits) 86 | .add(range).add(digits).add(fake).build(); 87 | 88 | assertThat(regex2, matchesExactly(logLine)); 89 | 90 | //(\\d+)\\t(\\d+)\\t([0-1]{1})\\t(http://localhost:20\\d{3})\\t([0-1]{1}) 91 | // \\t(\\d+)\\t([0-1]{1})\\t(\\d+)\\t(\\d+)\\t([0-1]{1})\\t(\\d+)\\t(FAKE[1-2]{1}) 92 | /* 93 | 3 4 1 http://localhost:20001 1 28800 0 528800 1000000000 0 528800 STR1 94 | 3 5 1 http://localhost:20002 1 28800 0 528800 1000020002 0 528800 STR2 95 | 4 6 0 http://localhost:20002 1 48800 0 528800 1000000000 0 528800 STR1 96 | 4 7 0 http://localhost:20003 1 48800 0 528800 1000020003 0 528800 STR2 97 | 5 8 1 http://localhost:20003 1 68800 0 528800 1000000000 0 528800 STR1 98 | 5 9 1 http://localhost:20004 1 28800 0 528800 1000020004 0 528800 STR2 99 | */ 100 | } 101 | 102 | @Test 103 | public void unusualRegex() throws Exception { 104 | assertThat(regex().add("[A-Z0-1!-|]").build().toString(), equalTo("[A-Z0-1!-|]")); 105 | 106 | } 107 | 108 | @Test 109 | @Ignore("Planned in 1.3") 110 | public void captureWithName() throws Exception { 111 | } 112 | 113 | @Test 114 | public void oneOfShouldFindEpisodeTitleOfStarWarsMovies() { 115 | VerbalExpression regex = VerbalExpression.regex() 116 | .find("Star Wars: ") 117 | .oneOf("The Phantom Menace", "Attack of the Clones", "Revenge of the Sith", 118 | "The Force Awakens", "A New Hope", "The Empire Strikes Back", "Return of the Jedi") 119 | .build(); 120 | assertThat(regex, matchesTo("Star Wars: The Empire Strikes Back")); 121 | assertThat(regex, matchesTo("Star Wars: Return of the Jedi")); 122 | } 123 | 124 | @Test 125 | public void captureAfterNewLineHasGroupNumberOne() throws Exception { 126 | 127 | final String lineBreak = "\n"; 128 | final String some = "some"; 129 | final String text = " text"; 130 | final VerbalExpression expression = VerbalExpression.regex(). 131 | lineBreak() 132 | .capture().find(some).endCapture().then(text) 133 | .build(); 134 | 135 | assertThat(some, equalTo(expression.getText(lineBreak + some + text, 1))); 136 | } 137 | 138 | @Test 139 | public void captureAfterNewLineHasANamedGroup() { 140 | 141 | final String lineBreak = "\n"; 142 | final String some = "some"; 143 | final String text = " text"; 144 | final String captureName = "name"; 145 | final VerbalExpression expression = VerbalExpression.regex(). 146 | lineBreak() 147 | .capture(captureName).find(some).endCapture().then(text) 148 | .build(); 149 | 150 | assertThat(some, 151 | equalTo(expression.getText(lineBreak + some + text, captureName))); 152 | } 153 | 154 | @Test 155 | public void missingOptionalCaptureGroupReturnsEmptyStringNotStringContainingNullLiteral() { 156 | final VerbalExpression expression = VerbalExpression.regex(). 157 | startOfLine() 158 | .capture("optionalCapture") 159 | .oneOf("a", "b") 160 | .endCapture() 161 | .count(0, 1) 162 | .then("c") 163 | .endOfLine() 164 | .build(); 165 | final String testString = "c"; 166 | assertThat(expression, matchesExactly(testString)); 167 | assertThat(expression.getText("c", "optionalCapture"), equalTo("")); 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | ru.lanwen.verbalregex 5 | java-verbal-expressions 6 | 1.5-SNAPSHOT 7 | 8 | jar 9 | 10 | JavaVerbalExpressions 11 | VerbalExpressions is a Java library that helps to construct difficult regular expressions 12 | https://github.com/VerbalExpressions/JavaVerbalExpressions 13 | 14 | 15 | 16 | UTF-8 17 | -Xdoclint:none 18 | 19 | 20 | 21 | 22 | git@github.com:VerbalExpressions/JavaVerbalExpressions.git 23 | scm:git:git@github.com:VerbalExpressions/JavaVerbalExpressions.git 24 | scm:git:git@github.com:VerbalExpressions/JavaVerbalExpressions.git 25 | 26 | 27 | 28 | 29 | 30 | The MIT License (MIT) 31 | https://raw.githubusercontent.com/VerbalExpressions/JavaVerbalExpressions/master/LICENSE 32 | repo 33 | 34 | 35 | 36 | 37 | 38 | lanwen 39 | Kirill Merkushev 40 | lanwen+github@yandex.ru 41 | 42 | 43 | 44 | 45 | 46 | junit 47 | junit 48 | 4.11 49 | test 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | biz.aQute.bnd 58 | bnd-maven-plugin 59 | 3.3.0 60 | 61 | 62 | 63 | bnd-process 64 | 65 | 66 | 67 | 68 | 69 | org.apache.maven.plugins 70 | maven-jar-plugin 71 | 3.0.2 72 | 73 | 74 | ${project.build.outputDirectory}/META-INF/MANIFEST.MF 75 | 76 | 77 | 78 | 79 | maven-compiler-plugin 80 | 3.1 81 | true 82 | 83 | 1.7 84 | 1.7 85 | utf-8 86 | 87 | 88 | 89 | org.codehaus.mojo 90 | cobertura-maven-plugin 91 | 2.6 92 | 93 | 94 | 95 | clean 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | org.codehaus.mojo 107 | cobertura-maven-plugin 108 | 2.6 109 | 110 | 111 | 112 | 113 | 114 | 115 | ossrh 116 | https://oss.sonatype.org/content/repositories/snapshots 117 | 118 | 119 | ossrh 120 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 121 | 122 | 123 | 124 | 125 | 126 | deployment 127 | 128 | true 129 | 130 | 131 | 132 | 133 | org.apache.maven.plugins 134 | maven-gpg-plugin 135 | 1.5 136 | 137 | 138 | sign-artifacts 139 | verify 140 | 141 | sign 142 | 143 | 144 | 145 | 146 | 147 | org.sonatype.plugins 148 | nexus-staging-maven-plugin 149 | 1.6.3 150 | true 151 | 152 | ossrh 153 | https://oss.sonatype.org/ 154 | true 155 | 156 | 157 | 158 | 159 | org.apache.maven.plugins 160 | maven-source-plugin 161 | 2.2.1 162 | 163 | 164 | attach-sources 165 | 166 | jar-no-fork 167 | 168 | 169 | 170 | 171 | 172 | 173 | org.apache.maven.plugins 174 | maven-javadoc-plugin 175 | 2.9.1 176 | 177 | 178 | attach-javadocs 179 | 180 | jar 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | -------------------------------------------------------------------------------- /src/main/java/ru/lanwen/verbalregex/VerbalExpression.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import static java.lang.String.valueOf; 4 | 5 | import java.util.ArrayList; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | public class VerbalExpression { 13 | 14 | private final Pattern pattern; 15 | 16 | public static class Builder { 17 | 18 | private StringBuilder prefixes = new StringBuilder(); 19 | private StringBuilder source = new StringBuilder(); 20 | private StringBuilder suffixes = new StringBuilder(); 21 | private int modifiers = Pattern.MULTILINE; 22 | 23 | private static final Map SYMBOL_MAP = new HashMap() {{ 24 | put('d', Pattern.UNIX_LINES); 25 | put('i', Pattern.CASE_INSENSITIVE); 26 | put('x', Pattern.COMMENTS); 27 | put('m', Pattern.MULTILINE); 28 | put('s', Pattern.DOTALL); 29 | put('u', Pattern.UNICODE_CASE); 30 | put('U', Pattern.UNICODE_CHARACTER_CLASS); 31 | }}; 32 | 33 | /** 34 | * Package private. Use {@link #regex()} to build a new one 35 | * 36 | * @since 1.2 37 | */ 38 | Builder() { 39 | } 40 | 41 | /** 42 | * Escapes any non-word char with two backslashes 43 | * used by any method, except {@link #add(String)} 44 | * 45 | * @param pValue - the string for char escaping 46 | * @return sanitized string value 47 | */ 48 | private String sanitize(final String pValue) { 49 | return pValue.replaceAll("[\\W]", "\\\\$0"); 50 | } 51 | 52 | /** 53 | * Counts occurrences of some substring in whole string 54 | * Same as org.apache.commons.lang3.StringUtils#countMatches(String, java.lang.String) 55 | * by effect. Used to count braces for {@link #or(String)} method 56 | * 57 | * @param where - where to find 58 | * @param what - what needs to count matches 59 | * @return 0 if nothing found, count of occurrences instead 60 | */ 61 | private int countOccurrencesOf(String where, String what) { 62 | return (where.length() - where.replace(what, "").length()) / what.length(); 63 | } 64 | 65 | public VerbalExpression build() { 66 | Pattern pattern = Pattern.compile(new StringBuilder(prefixes) 67 | .append(source).append(suffixes).toString(), modifiers); 68 | return new VerbalExpression(pattern); 69 | } 70 | 71 | /** 72 | * Append literal expression 73 | * Everything added to the expression should go trough this method 74 | * (keep in mind when creating your own methods). 75 | * All existing methods already use this, so for basic usage, you can just ignore this method. 76 | *

77 | * Example: 78 | * regex().add("\n.*").build() // produce exact "\n.*" regexp 79 | * 80 | * @param pValue - literal expression, not sanitized 81 | * @return this builder 82 | */ 83 | public Builder add(final String pValue) { 84 | this.source.append(pValue); 85 | return this; 86 | } 87 | 88 | /** 89 | * Append a regex from builder and wrap it with unnamed group (?: ... ) 90 | * 91 | * @param regex - VerbalExpression.Builder, that not changed 92 | * @return this builder 93 | * @since 1.2 94 | */ 95 | public Builder add(final Builder regex) { 96 | return this.group().add(regex.build().toString()).endGr(); 97 | } 98 | 99 | /** 100 | * Enable or disable the expression to start at the beginning of the line 101 | * 102 | * @param pEnable - enables or disables the line starting 103 | * @return this builder 104 | */ 105 | public Builder startOfLine(final boolean pEnable) { 106 | this.prefixes.append(pEnable ? "^" : ""); 107 | if (!pEnable) { 108 | this.prefixes = new StringBuilder(this.prefixes.toString().replace("^", "")); 109 | } 110 | return this; 111 | } 112 | 113 | /** 114 | * Mark the expression to start at the beginning of the line 115 | * Same as {@link #startOfLine(boolean)} with true arg 116 | * 117 | * @return this builder 118 | */ 119 | public Builder startOfLine() { 120 | return startOfLine(true); 121 | } 122 | 123 | /** 124 | * Enable or disable the expression to end at the last character of the line 125 | * 126 | * @param pEnable - enables or disables the line ending 127 | * @return this builder 128 | */ 129 | public Builder endOfLine(final boolean pEnable) { 130 | this.suffixes.append(pEnable ? "$" : ""); 131 | if (!pEnable) { 132 | this.suffixes = new StringBuilder(this.suffixes.toString().replace("$", "")); 133 | } 134 | return this; 135 | } 136 | 137 | /** 138 | * Mark the expression to end at the last character of the line 139 | * Same as {@link #endOfLine(boolean)} with true arg 140 | * 141 | * @return this builder 142 | */ 143 | public Builder endOfLine() { 144 | return endOfLine(true); 145 | } 146 | 147 | /** 148 | * Add a string to the expression 149 | * 150 | * @param pValue - the string to be looked for (sanitized) 151 | * @return this builder 152 | */ 153 | public Builder then(final String pValue) { 154 | return this.add("(?:" + sanitize(pValue) + ")"); 155 | } 156 | 157 | /** 158 | * Add a string to the expression 159 | * Syntax sugar for {@link #then(String)} - use it in case: 160 | * regex().find("string") // when it goes first 161 | * 162 | * @param value - the string to be looked for (sanitized) 163 | * @return this builder 164 | */ 165 | public Builder find(final String value) { 166 | return this.then(value); 167 | } 168 | 169 | /** 170 | * Add a string to the expression that might appear once (or not) 171 | * Example: 172 | * The following matches all strings that contain http:// or https:// 173 | * VerbalExpression regex = regex() 174 | * .find("http") 175 | * .maybe("s") 176 | * .then("://") 177 | * .anythingBut(" ").build(); 178 | * regex.test("http://") //true 179 | * regex.test("https://") //true 180 | * 181 | * @param pValue - the string to be looked for 182 | * @return this builder 183 | */ 184 | public Builder maybe(final String pValue) { 185 | return this.then(pValue).add("?"); 186 | } 187 | 188 | /** 189 | * Add a regex to the expression that might appear once (or not) 190 | * Example: 191 | * The following matches all names that have a prefix or not. 192 | * VerbalExpression.Builder namePrefix = regex().oneOf("Mr.", "Ms."); 193 | * VerbalExpression name = regex() 194 | * .maybe(namePrefix) 195 | * .space() 196 | * .zeroOrMore() 197 | * .word() 198 | * .oneOrMore() 199 | * .build(); 200 | * regex.test("Mr. Bond/") //true 201 | * regex.test("James") //true 202 | * 203 | * @param regex - the string to be looked for 204 | * @return this builder 205 | */ 206 | public Builder maybe(final Builder regex) { 207 | return this.group().add(regex).endGr().add("?"); 208 | } 209 | 210 | /** 211 | * Add expression that matches anything (includes empty string) 212 | * 213 | * @return this builder 214 | */ 215 | public Builder anything() { 216 | return this.add("(?:.*)"); 217 | } 218 | 219 | /** 220 | * Add expression that matches anything, but not passed argument 221 | * 222 | * @param pValue - the string not to match 223 | * @return this builder 224 | */ 225 | public Builder anythingBut(final String pValue) { 226 | return this.add("(?:[^" + sanitize(pValue) + "]*)"); 227 | } 228 | 229 | /** 230 | * Add expression that matches something that might appear once (or more) 231 | * 232 | * @return this builder 233 | */ 234 | public Builder something() { 235 | return this.add("(?:.+)"); 236 | } 237 | 238 | public Builder somethingButNot(final String pValue) { 239 | return this.add("(?:[^" + sanitize(pValue) + "]+)"); 240 | } 241 | 242 | /** 243 | * Add universal line break expression 244 | * 245 | * @return this builder 246 | */ 247 | public Builder lineBreak() { 248 | return this.add("(?:\\n|(?:\\r\\n)|(?:\\r\\r))"); 249 | } 250 | 251 | /** 252 | * Shortcut for {@link #lineBreak()} 253 | * 254 | * @return this builder 255 | */ 256 | public Builder br() { 257 | return this.lineBreak(); 258 | } 259 | 260 | /** 261 | * Add expression to match a tab character ('\u0009') 262 | * 263 | * @return this builder 264 | */ 265 | public Builder tab() { 266 | return this.add("(?:\\t)"); 267 | } 268 | 269 | /** 270 | * Add word, same as [a-zA-Z_0-9]+ 271 | * 272 | * @return this builder 273 | */ 274 | public Builder word() { 275 | return this.add("(?:\\w+)"); 276 | } 277 | 278 | 279 | /* 280 | --- Predefined character classes 281 | */ 282 | 283 | /** 284 | * Add word character, same as [a-zA-Z_0-9] 285 | * 286 | * @return this builder 287 | */ 288 | public Builder wordChar() { 289 | return this.add("(?:\\w)"); 290 | } 291 | 292 | 293 | /** 294 | * Add non-word character: [^\w] 295 | * 296 | * @return this builder 297 | */ 298 | public Builder nonWordChar() { 299 | return this.add("(?:\\W)"); 300 | } 301 | 302 | /** 303 | * Add non-digit: [^0-9] 304 | * 305 | * @return this builder 306 | */ 307 | public Builder nonDigit() { 308 | return this.add("(?:\\D)"); 309 | } 310 | 311 | /** 312 | * Add same as [0-9] 313 | * 314 | * @return this builder 315 | */ 316 | public Builder digit() { 317 | return this.add("(?:\\d)"); 318 | } 319 | 320 | /** 321 | * Add whitespace character, same as [ \t\n\x0B\f\r] 322 | * 323 | * @return this builder 324 | */ 325 | public Builder space() { 326 | return this.add("(?:\\s)"); 327 | } 328 | 329 | /** 330 | * Add non-whitespace character: [^\s] 331 | * 332 | * @return this builder 333 | */ 334 | public Builder nonSpace() { 335 | return this.add("(?:\\S)"); 336 | } 337 | 338 | /** 339 | * Add word boundary: \b 340 | *

341 | * Example: 342 | *

{@code
343 |          * VerbalExpression regex = regex()
344 |          *         .wordBoundary().find("abc").wordBoundary()
345 |          *         .build();
346 |          * regex.test("a abc"); // true
347 |          * regex.test("a.abc"); // true
348 |          * regex.test("aabc"); // false
349 |          * }
350 | * 351 | * @return this builder 352 | */ 353 | public Builder wordBoundary() { 354 | return this.add("(?:\\b)"); 355 | } 356 | 357 | 358 | /* 359 | --- / end of predefined character classes 360 | */ 361 | 362 | 363 | public Builder anyOf(final String pValue) { 364 | this.add("[" + sanitize(pValue) + "]"); 365 | return this; 366 | } 367 | 368 | /** 369 | * Shortcut to {@link #anyOf(String)} 370 | * 371 | * @param value - CharSequence every char from can be matched 372 | * @return this builder 373 | */ 374 | public Builder any(final String value) { 375 | return this.anyOf(value); 376 | } 377 | 378 | /** 379 | * Add expression to match a range (or multiply ranges) 380 | * Usage: .range(from, to [, from, to ... ]) 381 | * Example: The following matches a hexadecimal number: 382 | * regex().range( "0", "9", "a", "f") // produce [0-9a-f] 383 | * 384 | * @param pArgs - pairs for range 385 | * @return this builder 386 | */ 387 | public Builder range(final String... pArgs) { 388 | StringBuilder value = new StringBuilder("["); 389 | for (int firstInPairPosition = 1; firstInPairPosition < pArgs.length; firstInPairPosition += 2) { 390 | String from = sanitize(pArgs[firstInPairPosition - 1]); 391 | String to = sanitize(pArgs[firstInPairPosition]); 392 | 393 | value.append(from).append("-").append(to); 394 | } 395 | value.append("]"); 396 | 397 | return this.add(value.toString()); 398 | } 399 | 400 | public Builder addModifier(final char pModifier) { 401 | if (SYMBOL_MAP.containsKey(pModifier)) { 402 | modifiers |= SYMBOL_MAP.get(pModifier); 403 | } 404 | 405 | return this; 406 | } 407 | 408 | public Builder removeModifier(final char pModifier) { 409 | if (SYMBOL_MAP.containsKey(pModifier)) { 410 | modifiers &= ~SYMBOL_MAP.get(pModifier); 411 | } 412 | 413 | return this; 414 | } 415 | 416 | public Builder withAnyCase(final boolean pEnable) { 417 | if (pEnable) { 418 | this.addModifier('i'); 419 | } else { 420 | this.removeModifier('i'); 421 | } 422 | return this; 423 | } 424 | 425 | /** 426 | * Turn ON matching with ignoring case 427 | * Example: 428 | * // matches "a" 429 | * // matches "A" 430 | * regex().find("a").withAnyCase() 431 | * 432 | * @return this builder 433 | */ 434 | public Builder withAnyCase() { 435 | return withAnyCase(true); 436 | } 437 | 438 | public Builder searchOneLine(final boolean pEnable) { 439 | if (pEnable) { 440 | this.removeModifier('m'); 441 | } else { 442 | this.addModifier('m'); 443 | } 444 | return this; 445 | } 446 | 447 | /** 448 | * Convenient method to show that string usage count is exact count, range count or simply one or more 449 | * Usage: 450 | * regex().multiply("abc") // Produce (?:abc)+ 451 | * regex().multiply("abc", null) // Produce (?:abc)+ 452 | * regex().multiply("abc", (int)from) // Produce (?:abc){from} 453 | * regex().multiply("abc", (int)from, (int)to) // Produce (?:abc){from, to} 454 | * regex().multiply("abc", (int)from, (int)to, (int)...) // Produce (?:abc)+ 455 | * 456 | * @param pValue - the string to be looked for 457 | * @param count - (optional) if passed one or two numbers, it used to show count or range count 458 | * @return this builder 459 | * @see #oneOrMore() 460 | * @see #then(String) 461 | * @see #zeroOrMore() 462 | */ 463 | public Builder multiple(final String pValue, final int... count) { 464 | if (count == null) { 465 | return this.then(pValue).oneOrMore(); 466 | } 467 | switch (count.length) { 468 | case 1: 469 | return this.then(pValue).count(count[0]); 470 | case 2: 471 | return this.then(pValue).count(count[0], count[1]); 472 | default: 473 | return this.then(pValue).oneOrMore(); 474 | } 475 | } 476 | 477 | /** 478 | * Adds "+" char to regexp 479 | * Same effect as {@link #atLeast(int)} with "1" argument 480 | * Also, used by {@link #multiple(String, int...)} when second argument is null, or have length more than 2 481 | * 482 | * @return this builder 483 | * @since 1.2 484 | */ 485 | public Builder oneOrMore() { 486 | return this.add("+"); 487 | } 488 | 489 | /** 490 | * Adds "*" char to regexp, means zero or more times repeated 491 | * Same effect as {@link #atLeast(int)} with "0" argument 492 | * 493 | * @return this builder 494 | * @since 1.2 495 | */ 496 | public Builder zeroOrMore() { 497 | return this.add("*"); 498 | } 499 | 500 | /** 501 | * Add count of previous group 502 | * for example: 503 | * .find("w").count(3) // produce - (?:w){3} 504 | * 505 | * @param count - number of occurrences of previous group in expression 506 | * @return this Builder 507 | */ 508 | public Builder count(final int count) { 509 | this.source.append("{").append(count).append("}"); 510 | return this; 511 | } 512 | 513 | /** 514 | * Produce range count 515 | * for example: 516 | * .find("w").count(1, 3) // produce (?:w){1,3} 517 | * 518 | * @param from - minimal number of occurrences 519 | * @param to - max number of occurrences 520 | * @return this Builder 521 | * @see #count(int) 522 | */ 523 | public Builder count(final int from, final int to) { 524 | this.source.append("{").append(from).append(",").append(to).append("}"); 525 | return this; 526 | } 527 | 528 | /** 529 | * Produce range count with only minimal number of occurrences 530 | * for example: 531 | * .find("w").atLeast(1) // produce (?:w){1,} 532 | * 533 | * @param from - minimal number of occurrences 534 | * @return this Builder 535 | * @see #count(int) 536 | * @see #oneOrMore() 537 | * @see #zeroOrMore() 538 | * @since 1.2 539 | */ 540 | public Builder atLeast(final int from) { 541 | return this.add("{").add(valueOf(from)).add(",}"); 542 | } 543 | 544 | /** 545 | * Add a alternative expression to be matched 546 | * 547 | * Issue #32 548 | * 549 | * @param pValue - the string to be looked for 550 | * @return this builder 551 | */ 552 | public Builder or(final String pValue) { 553 | this.prefixes.append("(?:"); 554 | 555 | int opened = countOccurrencesOf(this.prefixes.toString(), "("); 556 | int closed = countOccurrencesOf(this.suffixes.toString(), ")"); 557 | 558 | if (opened >= closed) { 559 | this.suffixes = new StringBuilder(")" + this.suffixes.toString()); 560 | } 561 | 562 | this.add(")|(?:"); 563 | if (pValue != null) { 564 | this.then(pValue); 565 | } 566 | return this; 567 | } 568 | 569 | /** 570 | * Adds an alternative expression to be matched 571 | * based on an array of values 572 | * 573 | * @param pValues - the strings to be looked for 574 | * @return this builder 575 | * @since 1.3 576 | */ 577 | public Builder oneOf(final String... pValues) { 578 | if(pValues != null && pValues.length > 0) { 579 | this.add("(?:"); 580 | for(int i = 0; i < pValues.length; i++) { 581 | String value = pValues[i]; 582 | this.add("(?:"); 583 | this.add(value); 584 | this.add(")"); 585 | if(i < pValues.length - 1) { 586 | this.add("|"); 587 | } 588 | } 589 | this.add(")"); 590 | } 591 | return this; 592 | } 593 | 594 | /** 595 | * Adds capture - open brace to current position and closed to suffixes 596 | * 597 | * @return this builder 598 | */ 599 | public Builder capture() { 600 | return this.capture(null); 601 | } 602 | 603 | /** 604 | * Adds named-capture - open brace to current position and closed to suffixes 605 | *

606 | *

Example:{@code
607 |          * String text = "test@example.com";
608 |          * VerbalExpression regex = regex()
609 |          *         .find("@")
610 |          *         .capture("domain").anything().build();
611 |          * regex.getText(text, "domain"); // => "example.com"
612 |          * }
613 | * 614 | * @return this builder 615 | * @since 1.6 616 | */ 617 | public Builder capture(final String name) { 618 | this.suffixes.append(")"); 619 | 620 | if (name == null || name.trim().isEmpty()) { 621 | return this.add("("); 622 | } 623 | return this.add("(?<" + name + ">"); 624 | } 625 | 626 | /** 627 | * Shortcut for {@link #capture()} 628 | * 629 | * @return this builder 630 | * @since 1.2 631 | */ 632 | public Builder capt() { 633 | return this.capture(); 634 | } 635 | 636 | /** 637 | * Shortcut for {@link #capture(String)} 638 | * 639 | * @return this builder 640 | * @since 1.6 641 | */ 642 | public Builder capt(final String name) { 643 | return this.capture(name); 644 | } 645 | 646 | /** 647 | * Same as {@link #capture()}, but don't save result 648 | * May be used to set count of duplicated captures, without creating a new saved capture 649 | * Example: 650 | * // Without group() - count(2) applies only to second capture 651 | * regex().group() 652 | * .capt().range("0", "1").endCapt().tab() 653 | * .capt().digit().count(5).endCapt() 654 | * .endGr().count(2); 655 | * 656 | * @return this builder 657 | * @since 1.2 658 | */ 659 | public Builder group() { 660 | this.suffixes.append(")"); 661 | return this.add("(?:"); 662 | } 663 | 664 | /** 665 | * Close brace for previous capture and remove last closed brace from suffixes 666 | * Can be used to continue build regex after capture or to add multiply captures 667 | * 668 | * @return this builder 669 | */ 670 | public Builder endCapture() { 671 | if (this.suffixes.indexOf(")") != -1) { 672 | this.suffixes.setLength(suffixes.length() - 1); 673 | return this.add(")"); 674 | } else { 675 | throw new IllegalStateException("Can't end capture (group) when it not started"); 676 | } 677 | } 678 | 679 | /** 680 | * Shortcut for {@link #endCapture()} 681 | * 682 | * @return this builder 683 | * @since 1.2 684 | */ 685 | public Builder endCapt() { 686 | return this.endCapture(); 687 | } 688 | 689 | /** 690 | * Closes current unnamed and unmatching group 691 | * Shortcut for {@link #endCapture()} 692 | * Use it with {@link #group()} for prettify code 693 | * Example: 694 | * regex().group().maybe("word").count(2).endGr() 695 | * 696 | * @return this builder 697 | * @since 1.2 698 | */ 699 | public Builder endGr() { 700 | return this.endCapture(); 701 | } 702 | } 703 | 704 | /** 705 | * Use builder {@link #regex()} (or {@link #regex(ru.lanwen.verbalregex.VerbalExpression.Builder)}) 706 | * to create new instance of VerbalExpression 707 | * 708 | * @param pattern - {@link java.util.regex.Pattern} that constructed by builder 709 | */ 710 | private VerbalExpression(final Pattern pattern) { 711 | this.pattern = pattern; 712 | } 713 | 714 | /** 715 | * Test that full string matches regular expression 716 | * 717 | * @param pToTest - string to check match 718 | * @return true if matches exact string, false otherwise 719 | */ 720 | public boolean testExact(final String pToTest) { 721 | boolean ret = false; 722 | if (pToTest != null) { 723 | ret = pattern.matcher(pToTest).matches(); 724 | } 725 | return ret; 726 | } 727 | 728 | /** 729 | * Test that full string contains regex 730 | * 731 | * @param pToTest - string to check match 732 | * @return true if string contains regex, false otherwise 733 | */ 734 | public boolean test(final String pToTest) { 735 | boolean ret = false; 736 | if (pToTest != null) { 737 | ret = pattern.matcher(pToTest).find(); 738 | } 739 | return ret; 740 | } 741 | 742 | /** 743 | * Extract full string that matches regex 744 | * Same as {@link #getText(String, int)} for 0 group 745 | * 746 | * @param toTest - string to extract from 747 | * @return group 0, extracted from text 748 | */ 749 | public String getText(final String toTest) { 750 | return getText(toTest, 0); 751 | } 752 | 753 | /** 754 | * Extract exact group from string 755 | * 756 | * @param toTest - string to extract from 757 | * @param group - group to extract 758 | * @return extracted group 759 | * @since 1.1 760 | */ 761 | public String getText(final String toTest, final int group) { 762 | Matcher m = pattern.matcher(toTest); 763 | StringBuilder result = new StringBuilder(); 764 | while (m.find()) { 765 | String groupValue = m.group(group); 766 | if (groupValue != null) { 767 | result.append(groupValue); 768 | } 769 | } 770 | return result.toString(); 771 | } 772 | 773 | /** 774 | * Extract exact named-group from string 775 | *

776 | * Example is see to {@link Builder#capture(String)} 777 | * 778 | * @param toTest - string to extract from 779 | * @param group - group to extract 780 | * @return extracted group 781 | * @since 1.6 782 | */ 783 | public String getText(final String toTest, final String group) { 784 | Matcher m = pattern.matcher(toTest); 785 | StringBuilder result = new StringBuilder(); 786 | while (m.find()) { 787 | String groupValue = m.group(group); 788 | if (groupValue != null) { 789 | result.append(groupValue); 790 | } 791 | } 792 | return result.toString(); 793 | } 794 | 795 | /** 796 | * Extract exact group from string and add it to list 797 | * 798 | * Example: 799 | * String text = "SampleHelloWorldString"; 800 | * VerbalExpression regex = regex().capt().oneOf("Hello", "World").endCapt().maybe("String").build(); 801 | * list = regex.getTextGroups(text, 0) //result: "Hello", "WorldString" 802 | * list = regex.getTextGroups(text, 1) //result: "Hello", "World" 803 | * 804 | * @param toTest - string to extract from 805 | * @param group - group to extract 806 | * @return list of extracted groups 807 | */ 808 | public List getTextGroups(final String toTest, final int group) { 809 | List groups = new ArrayList<>(); 810 | Matcher m = pattern.matcher(toTest); 811 | while (m.find()) { 812 | groups.add(m.group(group)); 813 | } 814 | return groups; 815 | } 816 | 817 | @Override 818 | public String toString() { 819 | return pattern.pattern(); 820 | } 821 | 822 | /** 823 | * Creates new instance of VerbalExpression builder from cloned builder 824 | * 825 | * @param pBuilder - instance to clone 826 | * @return new VerbalExpression.Builder copied from passed 827 | * @since 1.1 828 | */ 829 | public static Builder regex(final Builder pBuilder) { 830 | Builder builder = new Builder(); 831 | 832 | //Using created StringBuilder 833 | builder.prefixes.append(pBuilder.prefixes); 834 | builder.source.append(pBuilder.source); 835 | builder.suffixes.append(pBuilder.suffixes); 836 | builder.modifiers = pBuilder.modifiers; 837 | 838 | return builder; 839 | } 840 | 841 | /** 842 | * Creates new instance of VerbalExpression builder 843 | * 844 | * @return new VerbalExpression.Builder 845 | * @since 1.1 846 | */ 847 | public static Builder regex() { 848 | return new Builder(); 849 | } 850 | } 851 | -------------------------------------------------------------------------------- /src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java: -------------------------------------------------------------------------------- 1 | package ru.lanwen.verbalregex; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.List; 6 | 7 | import static org.hamcrest.CoreMatchers.*; 8 | import static org.junit.Assert.*; 9 | import static ru.lanwen.verbalregex.VerbalExpression.regex; 10 | import static ru.lanwen.verbalregex.matchers.EqualToRegexMatcher.equalToRegex; 11 | import static ru.lanwen.verbalregex.matchers.TestMatchMatcher.matchesTo; 12 | import static ru.lanwen.verbalregex.matchers.TestsExactMatcher.matchesExactly; 13 | 14 | public class BasicFunctionalityUnitTest { 15 | @Test 16 | public void testSomething() { 17 | VerbalExpression testRegex = new VerbalExpression.Builder().something().build(); 18 | 19 | assertThat("Null object doesn't have something", testRegex, not(matchesTo(null))); 20 | assertThat("empty string doesn't have something", testRegex, not(matchesTo(""))); 21 | assertThat("a", testRegex, matchesTo("a")); 22 | } 23 | 24 | @Test 25 | public void testAnything() { 26 | VerbalExpression testRegex = new VerbalExpression.Builder() 27 | .startOfLine() 28 | .anything() 29 | .build(); 30 | 31 | assertThat(testRegex, matchesTo("what")); 32 | assertThat(testRegex, not(matchesTo(""))); 33 | assertThat(testRegex, matchesTo(" ")); 34 | } 35 | 36 | @Test 37 | public void testAnythingBut() { 38 | VerbalExpression testRegex = new VerbalExpression.Builder() 39 | .startOfLine() 40 | .anythingBut("w") 41 | .build(); 42 | 43 | assertFalse("starts with w", testRegex.testExact("what")); 44 | assertTrue("Not contain w", testRegex.testExact("that")); 45 | assertTrue("Not contain w", testRegex.testExact(" ")); 46 | assertFalse("Null object", testRegex.testExact(null)); 47 | } 48 | 49 | @Test 50 | public void testSomethingBut() { 51 | VerbalExpression testRegex = new VerbalExpression.Builder() 52 | .somethingButNot("a") 53 | .build(); 54 | 55 | assertFalse("Null string", testRegex.testExact(null)); 56 | assertFalse("empty string doesn't have something", testRegex.testExact("")); 57 | assertTrue("doesn't contain a", testRegex.testExact("b")); 58 | assertFalse("Contain a", testRegex.testExact("a")); 59 | } 60 | 61 | @Test 62 | public void testStartOfLine() { 63 | VerbalExpression testRegex = new VerbalExpression.Builder() 64 | .startOfLine() 65 | .then("a") 66 | .build(); 67 | 68 | assertFalse("Null string", testRegex.testExact(null)); 69 | assertFalse("empty string doesn't have something", testRegex.testExact("")); 70 | assertThat("Starts with a", testRegex, matchesTo("a")); 71 | assertThat("Starts with a", testRegex, matchesTo("ab")); 72 | assertThat("Doesn't start with a", testRegex, not(matchesTo("ba"))); 73 | } 74 | 75 | @Test 76 | public void testStartOfLineFalse() { 77 | VerbalExpression testRegex = regex() 78 | .startOfLine(false) 79 | .then("a") 80 | .build(); 81 | assertThat(testRegex, matchesTo("ba")); 82 | assertThat(testRegex, matchesTo("ab")); 83 | } 84 | 85 | @Test 86 | public void testRangeWithMultiplyRanges() throws Exception { 87 | VerbalExpression regex = regex().range("a", "z", "A", "Z").build(); 88 | 89 | assertThat("Regex with multi-range differs from expected", regex.toString(), equalTo("[a-zA-Z]")); 90 | assertThat("Regex don't matches letter", regex, matchesTo("b")); 91 | assertThat("Regex matches digit, but should match only letter", regex, not(matchesTo("1"))); 92 | } 93 | 94 | @Test 95 | public void testEndOfLine() { 96 | VerbalExpression testRegex = new VerbalExpression.Builder() 97 | .find("a") 98 | .endOfLine() 99 | .build(); 100 | 101 | assertThat("Ends with a", testRegex, matchesTo("bba")); 102 | assertThat("Ends with a", testRegex, matchesTo("a")); 103 | assertThat("Ends with a", testRegex, not(matchesTo(null))); 104 | assertThat("Doesn't end with a", testRegex, not(matchesTo("ab"))); 105 | } 106 | 107 | 108 | @Test 109 | public void testEndOfLineIsFalse() { 110 | VerbalExpression testRegex = regex() 111 | .find("a") 112 | .endOfLine(false) 113 | .build(); 114 | assertThat(testRegex, matchesTo("ba")); 115 | assertThat(testRegex, matchesTo("ab")); 116 | } 117 | 118 | 119 | @Test 120 | public void testMaybe() { 121 | VerbalExpression testRegex = new VerbalExpression.Builder() 122 | .startOfLine() 123 | .then("a") 124 | .maybe("b") 125 | .build(); 126 | 127 | assertThat("Regex isn't correct", testRegex.toString(), equalTo("^(?:a)(?:b)?")); 128 | 129 | assertThat("Maybe has a 'b' after an 'a'", testRegex, matchesTo("acb")); 130 | assertThat("Maybe has a 'b' after an 'a'", testRegex, matchesTo("abc")); 131 | assertThat("Maybe has a 'b' after an 'a'", testRegex, not(matchesTo("cab"))); 132 | } 133 | 134 | @Test 135 | public void testAnyOf() { 136 | VerbalExpression testRegex = new VerbalExpression.Builder() 137 | .startOfLine() 138 | .then("a") 139 | .anyOf("xyz") 140 | .build(); 141 | 142 | assertThat("Has an x, y, or z after a", testRegex, matchesTo("ay")); 143 | assertThat("Doesn't have an x, y, or z after a", testRegex, not(matchesTo("abc"))); 144 | } 145 | 146 | 147 | @Test 148 | public void testAnySameAsAnyOf() { 149 | VerbalExpression any = regex().any("abc").build(); 150 | VerbalExpression anyOf = regex().anyOf("abc").build(); 151 | 152 | assertThat("any differs from anyOf", any.toString(), equalTo(anyOf.toString())); 153 | } 154 | 155 | @Test 156 | public void testOr() { 157 | VerbalExpression testRegex = new VerbalExpression.Builder() 158 | .startOfLine() 159 | .then("abc") 160 | .or("def") 161 | .build(); 162 | 163 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 164 | assertThat("Doesn't start with abc or def", testRegex, not(matchesTo("xyzabc"))); 165 | } 166 | 167 | @Test 168 | public void testLineBreak() { 169 | VerbalExpression testRegex = new VerbalExpression.Builder() 170 | .startOfLine() 171 | .then("abc") 172 | .lineBreak() 173 | .then("def") 174 | .build(); 175 | 176 | assertThat("abc then line break then def", testRegex, matchesTo("abc\r\ndef")); 177 | assertThat("abc then line break then def", testRegex, matchesTo("abc\ndef")); 178 | assertThat("abc then line break then space then def", testRegex, not(matchesTo("abc\r\n def"))); 179 | } 180 | 181 | @Test 182 | public void testMacintoshLineBreak() { 183 | VerbalExpression testRegex = new VerbalExpression.Builder() 184 | .startOfLine() 185 | .then("abc") 186 | .lineBreak() 187 | .then("def") 188 | .build(); 189 | 190 | assertThat("abc then line break then def", testRegex, matchesTo("abc\r\rdef")); 191 | } 192 | 193 | @Test 194 | public void testBr() { 195 | VerbalExpression testRegexBr = new VerbalExpression.Builder() 196 | .startOfLine() 197 | .then("abc") 198 | .br() 199 | .then("def") 200 | .build(); 201 | 202 | VerbalExpression testRegexLineBr = new VerbalExpression.Builder() 203 | .startOfLine() 204 | .then("abc") 205 | .lineBreak() 206 | .then("def") 207 | .build(); 208 | 209 | assertThat(".br() differs from .lineBreak()", testRegexBr.toString(), equalTo(testRegexLineBr.toString())); 210 | } 211 | 212 | @Test 213 | public void testTab() { 214 | VerbalExpression testRegex = new VerbalExpression.Builder() 215 | .startOfLine() 216 | .tab() 217 | .then("abc") 218 | .build(); 219 | 220 | assertThat("tab then abc", testRegex, matchesTo("\tabc")); 221 | assertThat("no tab then abc", testRegex, not(matchesTo("abc"))); 222 | } 223 | 224 | @Test 225 | public void testWord() { 226 | VerbalExpression testRegex = new VerbalExpression.Builder() 227 | .startOfLine() 228 | .word() 229 | .build(); 230 | 231 | assertThat("word", testRegex, matchesTo("abc123")); 232 | assertThat("non-word", testRegex, not(matchesTo("@#"))); 233 | } 234 | 235 | @Test 236 | public void testMultipleNoRange() { 237 | VerbalExpression testRegexStringOnly = new VerbalExpression.Builder() 238 | .startOfLine() 239 | .multiple("abc") 240 | .build(); 241 | VerbalExpression testRegexStringAndNull = new VerbalExpression.Builder() 242 | .startOfLine() 243 | .multiple("abc", null) 244 | .build(); 245 | VerbalExpression testRegexMoreThan2Ints = new VerbalExpression.Builder() 246 | .startOfLine() 247 | .multiple("abc", 2, 4, 8) 248 | .build(); 249 | VerbalExpression[] testRegexesSameBehavior = { 250 | testRegexStringOnly, 251 | testRegexStringAndNull, 252 | testRegexMoreThan2Ints 253 | }; 254 | for (VerbalExpression testRegex : testRegexesSameBehavior) { 255 | assertThat("abc once", testRegex, 256 | matchesTo("abc")); 257 | assertThat("abc more than once", testRegex, 258 | matchesTo("abcabcabc")); 259 | assertThat("no abc", testRegex, 260 | not(matchesTo("xyz"))); 261 | } 262 | } 263 | 264 | @Test 265 | public void testMultipleFrom() { 266 | VerbalExpression testRegexFrom = new VerbalExpression.Builder() 267 | .startOfLine() 268 | .multiple("abc", 2) 269 | .build(); 270 | assertThat("no abc", testRegexFrom, 271 | not(matchesTo("xyz"))); 272 | assertThat("abc less than 2 times", testRegexFrom, 273 | not(matchesTo("abc"))); 274 | assertThat("abc exactly 2 times", testRegexFrom, 275 | matchesTo("abcabc")); 276 | assertThat("abc more than 2 times", testRegexFrom, 277 | matchesTo("abcabcabc")); 278 | } 279 | 280 | @Test 281 | public void testMultipleFromTo() { 282 | VerbalExpression testRegexFromTo = new VerbalExpression.Builder() 283 | .startOfLine() 284 | .multiple("abc", 2, 4) 285 | .build(); 286 | assertThat("no abc", testRegexFromTo, not(matchesTo("xyz"))); 287 | assertThat("abc less than 2 times", testRegexFromTo, 288 | not(matchesTo("abc"))); 289 | assertThat("abc exactly 2 times", testRegexFromTo, matchesTo("abcabc")); 290 | assertThat("abc between 2 and 4 times", testRegexFromTo, 291 | matchesTo("abcabcabc")); 292 | assertThat("abc exactly 4 times", testRegexFromTo, 293 | matchesTo("abcabcabcabc")); 294 | assertThat("abc more than 4 times", testRegexFromTo, 295 | not(matchesExactly("abcabcabcabcabc"))); 296 | } 297 | 298 | @Test 299 | public void testWithAnyCase() { 300 | VerbalExpression testRegex = new VerbalExpression.Builder() 301 | .startOfLine() 302 | .then("a") 303 | .build(); 304 | 305 | assertThat("not case insensitive", testRegex, not(matchesTo("A"))); 306 | testRegex = new VerbalExpression.Builder() 307 | .startOfLine() 308 | .then("a") 309 | .withAnyCase() 310 | .build(); 311 | 312 | assertThat("case insensitive", testRegex, matchesTo("A")); 313 | assertThat("case insensitive", testRegex, matchesTo("a")); 314 | } 315 | 316 | @Test 317 | public void testWithAnyCaseTurnOnThenTurnOff() { 318 | VerbalExpression testRegex = regex() 319 | .withAnyCase() 320 | .startOfLine() 321 | .then("a") 322 | .withAnyCase(false) 323 | .build(); 324 | 325 | assertThat(testRegex, not(matchesTo("A"))); 326 | } 327 | 328 | @Test 329 | public void testWithAnyCaseIsFalse() { 330 | VerbalExpression testRegex = regex() 331 | .startOfLine() 332 | .then("a") 333 | .withAnyCase(false) 334 | .build(); 335 | 336 | assertThat(testRegex, not(matchesTo("A"))); 337 | } 338 | 339 | @Test 340 | public void testSearchOneLine() { 341 | VerbalExpression testRegex = regex() 342 | .startOfLine() 343 | .then("a") 344 | .br() 345 | .then("b") 346 | .endOfLine() 347 | .build(); 348 | 349 | assertThat("b is on the second line", testRegex, matchesTo("a\nb")); 350 | 351 | testRegex = new VerbalExpression.Builder() 352 | .startOfLine() 353 | .then("a") 354 | .br() 355 | .then("b") 356 | .endOfLine() 357 | .searchOneLine(true) 358 | .build(); 359 | 360 | assertThat("b is on the second line but we are only searching the first", testRegex, matchesTo("a\nb")); 361 | } 362 | 363 | @Test 364 | public void testGetText() { 365 | String testString = "123 https://www.google.com 456"; 366 | VerbalExpression testRegex = new VerbalExpression.Builder().add("http") 367 | .maybe("s") 368 | .then("://") 369 | .then("www.") 370 | .anythingBut(" ") 371 | .add("com").build(); 372 | assertEquals(testRegex.getText(testString), "https://www.google.com"); 373 | 374 | } 375 | 376 | @Test 377 | public void testStartCapture() { 378 | String text = "aaabcd"; 379 | VerbalExpression regex = regex() 380 | .find("a").count(3) 381 | .capture().find("b").anything().build(); 382 | 383 | assertThat("regex don't match string", regex.getText(text), equalTo(text)); 384 | assertThat("can't get first captured group", regex.getText(text, 1), equalTo("bcd")); 385 | } 386 | 387 | @Test 388 | public void testStartNamedCapture() { 389 | String text = "test@example.com"; 390 | String captureName = "domain"; 391 | VerbalExpression regex = regex() 392 | .find("@") 393 | .capture(captureName).anything().build(); 394 | 395 | assertThat("can't get captured group named " + captureName, 396 | regex.getText(text, captureName), 397 | equalTo("example.com")); 398 | } 399 | 400 | @Test 401 | public void captIsSameAsCapture() { 402 | assertThat("Capt produce different than capture regex", regex().capt().build().toString(), 403 | equalTo(regex().capture().build().toString())); 404 | } 405 | 406 | @Test 407 | public void namedCaptIsSameAsNamedCapture() { 408 | String name = "test"; 409 | assertThat("Named-capt produce different than named-capture regex", 410 | regex().capt(name).build().toString(), 411 | equalTo(regex().capture(name).build().toString())); 412 | } 413 | 414 | @Test 415 | public void shouldReturnEmptyStringWhenNoGroupFound() { 416 | String text = "abc"; 417 | VerbalExpression regex = regex().find("d").capture().find("e").build(); 418 | 419 | assertThat("regex don't match string", regex.getText(text), equalTo("")); 420 | assertThat("first captured group not empty string", regex.getText(text, 1), equalTo("")); 421 | assertThat("second captured group not empty string", regex.getText(text, 2), equalTo("")); 422 | } 423 | 424 | @Test 425 | public void testCountWithRange() { 426 | String text4c = "abcccce"; 427 | String text2c = "abcce"; 428 | String text1c = "abce"; 429 | 430 | VerbalExpression regex = regex().find("c").count(2, 3).build(); 431 | 432 | assertThat("regex don't match string", regex.getText(text4c), equalTo("ccc")); 433 | assertThat("regex don't match string", regex.getText(text2c), equalTo("cc")); 434 | assertThat("regex don't match string", regex, not(matchesTo(text1c))); 435 | } 436 | 437 | @Test 438 | public void testEndCapture() { 439 | String text = "aaabcd"; 440 | VerbalExpression regex = regex() 441 | .find("a") 442 | .capture().find("b").anything().endCapture().then("cd").build(); 443 | 444 | assertThat(regex.getText(text), equalTo("abcd")); 445 | assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b")); 446 | } 447 | 448 | @Test 449 | public void testEndNamedCapture() { 450 | String text = "aaabcd"; 451 | String captureName = "str"; 452 | VerbalExpression regex = regex() 453 | .find("a") 454 | .capture(captureName).find("b").anything().endCapture() 455 | .then("cd").build(); 456 | 457 | assertThat(regex.getText(text), equalTo("abcd")); 458 | assertThat("can't get captured group named " + captureName, 459 | regex.getText(text, captureName), equalTo("b")); 460 | } 461 | 462 | @Test 463 | public void testMultiplyCapture() { 464 | String text = "aaabcd"; 465 | VerbalExpression regex = regex() 466 | .find("a").count(1) 467 | .capture().find("b").endCapture().anything().capture().find("d").build(); 468 | 469 | assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b")); 470 | assertThat("can't get second captured group", regex.getText(text, 2), equalTo("d")); 471 | } 472 | 473 | @Test 474 | public void testMultiplyNamedCapture() { 475 | String text = "aaabcd"; 476 | String captureName1 = "str1"; 477 | String captureName2 = "str2"; 478 | VerbalExpression regex = regex() 479 | .find("a").count(1) 480 | .capture(captureName1).find("b").endCapture() 481 | .anything().capture(captureName2).find("d").build(); 482 | 483 | assertThat("can't get captured group named " + captureName1, 484 | regex.getText(text, captureName1), equalTo("b")); 485 | assertThat("can't get captured group named " + captureName2, 486 | regex.getText(text, captureName2), equalTo("d")); 487 | } 488 | 489 | @Test 490 | public void testOrWithCapture() { 491 | VerbalExpression testRegex = regex() 492 | .capture() 493 | .find("abc") 494 | .or("def") 495 | .build(); 496 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 497 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 498 | assertThat("Doesn't start with abc or def", testRegex, not(matchesExactly("xyzabcefg"))); 499 | 500 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abc")); 501 | assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("")); 502 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abc")); 503 | } 504 | 505 | @Test 506 | public void testOrWithNamedCapture() { 507 | String captureName = "test"; 508 | VerbalExpression testRegex = regex() 509 | .capture(captureName) 510 | .find("abc") 511 | .or("def") 512 | .build(); 513 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 514 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 515 | assertThat("Doesn't start with abc or def", 516 | testRegex, not(matchesExactly("xyzabcefg"))); 517 | 518 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 519 | equalTo("abc")); 520 | assertThat(testRegex.getText("xxxdefzzz", captureName), 521 | equalTo("")); 522 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 523 | equalTo("abc")); 524 | } 525 | 526 | @Test 527 | public void testOrWithClosedCapture() { 528 | VerbalExpression testRegex = regex() 529 | .capture() 530 | .find("abc") 531 | .endCapt() 532 | .or("def") 533 | .build(); 534 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 535 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 536 | assertThat("Doesn't start with abc or def", testRegex, not(matchesExactly("xyzabcefg"))); 537 | 538 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abc")); 539 | assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("")); 540 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abc")); 541 | } 542 | 543 | @Test 544 | public void testOrWithClosedNamedCapture() { 545 | String captureName = "test"; 546 | VerbalExpression testRegex = regex() 547 | .capture(captureName) 548 | .find("abc") 549 | .endCapt() 550 | .or("def") 551 | .build(); 552 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 553 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 554 | assertThat("Doesn't start with abc or def", 555 | testRegex, not(matchesExactly("xyzabcefg"))); 556 | 557 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 558 | equalTo("abc")); 559 | assertThat(testRegex.getText("xxxdefzzz", captureName), 560 | equalTo("")); 561 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 562 | equalTo("abc")); 563 | } 564 | 565 | @Test 566 | public void addRegexBuilderWrapsItWithUnsavedGroup() throws Exception { 567 | VerbalExpression regex = regex() 568 | .add(regex().capt().find("string").count(2).endCapt().count(1).digit()).count(2).build(); 569 | 570 | assertThat("Added regex builder don't wrapped with unsaved group", 571 | regex.toString(), startsWith("(?:((?:string")); 572 | 573 | String example = "stringstring1"; 574 | String example2digit = "stringstring11"; 575 | 576 | assertThat(regex, matchesExactly(example + example)); 577 | assertThat(regex, not(matchesExactly(example2digit))); 578 | } 579 | 580 | @Test 581 | public void multiplyWith1NumProduceSameAsCountResult() throws Exception { 582 | VerbalExpression regex = regex().multiple("a", 1).build(); 583 | 584 | assertThat(regex, equalToRegex(regex().find("a").count(1))); 585 | } 586 | 587 | @Test 588 | public void multiplyWith2NumProduceSameAsCountRangeResult() throws Exception { 589 | VerbalExpression regex = regex().multiple("a", 1, 2).build(); 590 | 591 | assertThat(regex, equalToRegex(regex().find("a").count(1, 2))); 592 | } 593 | 594 | @Test 595 | public void atLeast1HaveSameEffectAsOneOrMore() throws Exception { 596 | VerbalExpression regex = regex().find("a").atLeast(1).build(); 597 | 598 | String matched = "aaaaaa"; 599 | String oneMatchedExactly = "a"; 600 | String oneMatched = "ab"; 601 | String empty = ""; 602 | 603 | assertThat(regex, matchesExactly(matched)); 604 | assertThat(regex, matchesExactly(oneMatchedExactly)); 605 | assertThat(regex, not(matchesExactly(oneMatched))); 606 | assertThat(regex, matchesTo(oneMatched)); 607 | assertThat(regex, not(matchesTo(empty))); 608 | } 609 | 610 | @Test 611 | public void oneOreMoreSameAsAtLeast1() throws Exception { 612 | VerbalExpression regexWithOneOrMore = regex().find("a").oneOrMore().build(); 613 | 614 | String matched = "aaaaaa"; 615 | String oneMatchedExactly = "a"; 616 | String oneMatched = "ab"; 617 | String empty = ""; 618 | 619 | assertThat(regexWithOneOrMore, matchesExactly(matched)); 620 | assertThat(regexWithOneOrMore, matchesExactly(oneMatchedExactly)); 621 | assertThat(regexWithOneOrMore, not(matchesExactly(oneMatched))); 622 | assertThat(regexWithOneOrMore, matchesTo(oneMatched)); 623 | assertThat(regexWithOneOrMore, not(matchesTo(empty))); 624 | } 625 | 626 | @Test 627 | public void atLeast0HaveSameEffectAsZeroOrMore() throws Exception { 628 | VerbalExpression regex = regex().find("a").atLeast(0).build(); 629 | 630 | String matched = "aaaaaa"; 631 | String oneMatchedExactly = "a"; 632 | String oneMatched = "ab"; 633 | String empty = ""; 634 | 635 | assertThat(regex, matchesExactly(matched)); 636 | assertThat(regex, matchesExactly(oneMatchedExactly)); 637 | assertThat(regex, not(matchesExactly(oneMatched))); 638 | assertThat(regex, matchesTo(empty)); 639 | assertThat(regex, matchesExactly(empty)); 640 | } 641 | 642 | @Test 643 | public void zeroOreMoreSameAsAtLeast0() throws Exception { 644 | VerbalExpression regexWithOneOrMore = regex().find("a").zeroOrMore().build(); 645 | 646 | String matched = "aaaaaa"; 647 | String oneMatchedExactly = "a"; 648 | String oneMatched = "ab"; 649 | String empty = ""; 650 | 651 | assertThat(regexWithOneOrMore, matchesExactly(matched)); 652 | assertThat(regexWithOneOrMore, matchesExactly(oneMatchedExactly)); 653 | assertThat(regexWithOneOrMore, not(matchesExactly(oneMatched))); 654 | assertThat(regexWithOneOrMore, matchesTo(oneMatched)); 655 | assertThat(regexWithOneOrMore, matchesTo(empty)); 656 | assertThat(regexWithOneOrMore, matchesExactly(empty)); 657 | } 658 | 659 | @Test 660 | public void testOneOf() { 661 | VerbalExpression testRegex = new VerbalExpression.Builder() 662 | .startOfLine() 663 | .oneOf("abc", "def") 664 | .build(); 665 | 666 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 667 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 668 | assertThat("Doesn't start with abc nor def", testRegex, not(matchesTo("xyzabc"))); 669 | } 670 | 671 | @Test 672 | public void testOneOfWithCapture() { 673 | VerbalExpression testRegex = regex() 674 | .capture() 675 | .oneOf("abc", "def") 676 | .build(); 677 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 678 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 679 | assertThat("Doesn't start with abc or def", testRegex, not(matchesExactly("xyzabcefg"))); 680 | 681 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcdef")); 682 | assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("def")); 683 | } 684 | 685 | @Test 686 | public void testOneOfWithNamedCapture() { 687 | String captureName = "test"; 688 | VerbalExpression testRegex = regex() 689 | .capture(captureName) 690 | .oneOf("abc", "def") 691 | .build(); 692 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 693 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 694 | assertThat("Doesn't start with abc or def", 695 | testRegex, not(matchesExactly("xyzabcefg"))); 696 | 697 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 698 | equalTo("abcdef")); 699 | assertThat(testRegex.getText("xxxdefzzz", captureName), 700 | equalTo("def")); 701 | } 702 | 703 | @Test 704 | public void testOneOfWithClosedCapture() { 705 | VerbalExpression testRegex = regex() 706 | .capture() 707 | .oneOf("abc", "def") 708 | .endCapt() 709 | .build(); 710 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 711 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 712 | assertThat("Doesn't start with abc or def", testRegex, not(matchesExactly("xyzabcefg"))); 713 | 714 | assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcdef")); 715 | assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("def")); 716 | } 717 | 718 | @Test 719 | public void testOneOfWithClosedNamedCapture() { 720 | String captureName = "test"; 721 | VerbalExpression testRegex = regex() 722 | .capture(captureName) 723 | .oneOf("abc", "def") 724 | .endCapt() 725 | .build(); 726 | assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); 727 | assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); 728 | assertThat("Doesn't start with abc or def", 729 | testRegex, not(matchesExactly("xyzabcefg"))); 730 | 731 | assertThat(testRegex.getText("xxxabcdefzzz", captureName), 732 | equalTo("abcdef")); 733 | assertThat(testRegex.getText("xxxdefzzz", captureName), 734 | equalTo("def")); 735 | } 736 | 737 | @Test 738 | public void shouldAddMaybeWithOneOfFromAnotherBuilder() { 739 | VerbalExpression.Builder namePrefix = regex().oneOf("Mr.", "Ms."); 740 | VerbalExpression name = regex() 741 | .maybe(namePrefix) 742 | .space() 743 | .zeroOrMore() 744 | .word() 745 | .oneOrMore() 746 | .build(); 747 | 748 | assertThat("Is a name with prefix", name, matchesTo("Mr. Bond")); 749 | assertThat("Is a name without prefix", name, matchesTo("James")); 750 | 751 | } 752 | 753 | @Test 754 | public void testListOfTextGroups() { 755 | String text = "SampleHelloWorldString"; 756 | VerbalExpression regex = regex() 757 | .capt() 758 | .oneOf("Hello", "World") 759 | .endCapt() 760 | .maybe("String") 761 | .build(); 762 | 763 | List groups0 = regex.getTextGroups(text, 0); 764 | 765 | assertThat(groups0.get(0), equalTo("Hello")); 766 | assertThat(groups0.get(1), equalTo("WorldString")); 767 | 768 | List groups1 = regex.getTextGroups(text, 1); 769 | 770 | assertThat(groups1.get(0), equalTo("Hello")); 771 | assertThat(groups1.get(1), equalTo("World")); 772 | } 773 | 774 | @Test 775 | public void testWordBoundary() { 776 | VerbalExpression regex = regex() 777 | .capture() 778 | .wordBoundary().then("o").word().oneOrMore().wordBoundary() 779 | .endCapture() 780 | .build(); 781 | 782 | assertThat(regex.getText("apple orange grape", 1), is("orange")); 783 | assertThat(regex.test("appleorange grape"), is(false)); 784 | assertThat(regex.test("apple3orange grape"), is(false)); 785 | } 786 | } 787 | --------------------------------------------------------------------------------