├── .dockerignore ├── .gitignore ├── Dockerfile ├── Makefile ├── README.md ├── docker-compose.yml ├── gpg-params.template ├── pom.xml ├── settings.xml.template └── src ├── main └── java │ └── com │ └── github │ └── sgreben │ └── regex_builder │ ├── CaptureGroup.java │ ├── CaptureGroupIndex.java │ ├── CaptureGroupReplacementPart.java │ ├── CharClass.java │ ├── Expression.java │ ├── ExpressionWrapper.java │ ├── FluentRe.java │ ├── Matcher.java │ ├── Pattern.java │ ├── Re.java │ ├── Replacement.java │ ├── ReplacementPart.java │ ├── StringReplacementPart.java │ ├── charclass │ ├── AnyCharacter.java │ ├── BeginInput.java │ ├── Binary.java │ ├── CharClassBase.java │ ├── CharClassVisitor.java │ ├── Complement.java │ ├── Digit.java │ ├── EndInput.java │ ├── EndInputBeforeFinalTerminator.java │ ├── HorizontalWhitespace.java │ ├── Intersection.java │ ├── Java.java │ ├── Nary.java │ ├── NonDigit.java │ ├── NonHorizontalWhitespace.java │ ├── NonVerticalWhitespace.java │ ├── NonWhitespace.java │ ├── NonWordBoundary.java │ ├── NonWordCharacter.java │ ├── NoneOf.java │ ├── Nullary.java │ ├── OneOf.java │ ├── Posix.java │ ├── Range.java │ ├── RangeComplement.java │ ├── RawComplement.java │ ├── Unary.java │ ├── Union.java │ ├── VerticalWhitespace.java │ ├── Whitespace.java │ ├── WordBoundary.java │ └── WordCharacter.java │ ├── compiler │ ├── CaptureGroupVisitor.java │ └── Compiler.java │ ├── expression │ ├── Atomic.java │ ├── BackReference.java │ ├── BeginLine.java │ ├── Binary.java │ ├── CharClassExpression.java │ ├── Choice.java │ ├── EndLine.java │ ├── ExpressionBase.java │ ├── ExpressionVisitor.java │ ├── ExpressionVisitorBase.java │ ├── Literal.java │ ├── Nary.java │ ├── NegativeLookahead.java │ ├── NegativeLookbehind.java │ ├── Nullary.java │ ├── Optional.java │ ├── OptionalPossessive.java │ ├── OptionalReluctant.java │ ├── PositiveLookahead.java │ ├── PositiveLookbehind.java │ ├── Raw.java │ ├── Repeat.java │ ├── Repeat1.java │ ├── Repeat1Possessive.java │ ├── Repeat1Reluctant.java │ ├── RepeatAtLeast.java │ ├── RepeatAtLeastPossessive.java │ ├── RepeatAtLeastReluctant.java │ ├── RepeatPossessive.java │ ├── RepeatReluctant.java │ ├── Sequence.java │ └── Unary.java │ ├── hamcrest │ └── MatchesPattern.java │ └── tokens │ ├── BACK_REFERENCE.java │ ├── BRACES.java │ ├── CARET.java │ ├── CHAR_CLASS_INTERSECTION.java │ ├── CHAR_CLASS_NAMED.java │ ├── DASH.java │ ├── DOT.java │ ├── END_CHAR_CLASS.java │ ├── END_GROUP.java │ ├── LITERAL.java │ ├── PIPE.java │ ├── PLUS.java │ ├── QUESTION.java │ ├── RAW.java │ ├── STAR.java │ ├── START_CHAR_CLASS.java │ ├── START_GROUP.java │ ├── START_GROUP_ATOMIC_NON_CAPTURING.java │ ├── START_GROUP_NAMED.java │ ├── START_GROUP_NON_CAPTURING.java │ ├── START_NEGATIVE_LOOKAHEAD.java │ ├── START_NEGATIVE_LOOKBEHIND.java │ ├── START_POSITIVE_LOOKAHEAD.java │ ├── START_POSITIVE_LOOKBEHIND.java │ └── TOKEN.java └── test └── java └── com └── github └── sgreben └── regex_builder ├── CharClassTest.java ├── FluentReTest.java └── MatcherTest.java /.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !/src/ 3 | !/pom.xml 4 | !/settings.xml 5 | !/gpg-params 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | /settings.xml 3 | /gpg-params 4 | /gpg-home 5 | /.project 6 | /.classpath 7 | /.settings 8 | /.vscode 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:3.12 AS download-maven 2 | RUN apk add --no-cache curl 3 | ARG MAVEN_VERSION=3.6.3 4 | ARG SHA512=c35a1803a6e70a126e80b2b3ae33eed961f83ed74d18fcd16909b2d44d7dada3203f1ffe726c17ef8dcca2dcaa9fca676987befeadc9b9f759967a8cb77181c0 5 | ARG BASE_URL=https://apache.osuosl.org/maven/maven-3/${MAVEN_VERSION}/binaries 6 | RUN mkdir -p /app /app/ref \ 7 | && curl -fsSL -o /maven.tar.gz ${BASE_URL}/apache-maven-"${MAVEN_VERSION}"-bin.tar.gz \ 8 | && echo "${SHA512} /maven.tar.gz" | sha512sum -c - \ 9 | && tar -xzf /maven.tar.gz -C /app --strip-components=1 10 | 11 | FROM alpine:3.12 AS generate-gpg-key 12 | RUN apk add --no-cache gnupg1 13 | ENV GNUPGHOME=/key 14 | RUN mkdir -p "${GNUPGHOME}" 15 | COPY gpg-params /gpg-params 16 | RUN gpg --batch --gen-key /gpg-params 17 | 18 | FROM openjdk:9-jdk-slim AS build-jar 19 | RUN ln -s /etc/java-9-openjdk /usr/lib/jvm/java-9-openjdk-$(dpkg --print-architecture)/conf 20 | ENV MAVEN_HOME /usr/share/maven 21 | ENV MAVEN_CONFIG "${HOME}/.m2" 22 | COPY --from=download-maven /app "${MAVEN_HOME}" 23 | RUN ln -s "${MAVEN_HOME}"/bin/mvn /usr/bin/mvn 24 | # Build app 25 | ENV APP_SOURCE /app 26 | WORKDIR /app 27 | COPY pom.xml pom.xml 28 | RUN mvn validate dependency:go-offline 29 | COPY src src 30 | RUN mvn clean compile test 31 | RUN mvn -DskipTests=true package 32 | 33 | FROM build-jar AS build-signed-jar 34 | RUN apt-get update && apt-get install -o APT::Immediate-Configure=0 --no-install-recommends -y gnupg1 && rm -rf /var/lib/apt/lists/* 35 | ENV GNUPGHOME=/key 36 | COPY --from=generate-gpg-key /key "${GNUPGHOME}" 37 | COPY settings.xml /root/.m2/ 38 | RUN mvn -o -DskipTests=true verify 39 | 40 | FROM build-signed-jar AS deploy-snapshot 41 | RUN mvn -DskipTests=true deploy 42 | 43 | FROM build-signed-jar AS deploy-release 44 | RUN mvn -DskipTests=true deploy 45 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: jar signed-jar export-gpg-key deploy-release deploy-snapshot publish-gpg-key 2 | 3 | jar: 4 | mkdir -p target 5 | docker-compose build build-jar 6 | docker-compose run --rm build-jar 7 | deploy-release: publish-gpg-key 8 | docker-compose build deploy-release 9 | deploy-snapshot: publish-gpg-key 10 | docker-compose build deploy-snapshot 11 | signed-jar: export-gpg-key 12 | mkdir -p target 13 | docker-compose build build-signed-jar 14 | docker-compose run --rm build-signed-jar 15 | publish-gpg-key: export-gpg-key 16 | docker-compose build publish-gpg-key 17 | docker-compose run --rm publish-gpg-key 18 | export-gpg-key: gpg-home 19 | gpg-home: 20 | mkdir -p gpg-home 21 | docker-compose build export-gpg-key 22 | docker-compose run --rm export-gpg-key 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Java Regex Builder 2 | 3 | Write regexes as **plain Java code**. Unlike opaque regex strings, commenting your expressions and reusing regex fragments is straightforward. 4 | 5 | The **regex-builder** library is implemented as a light-weight wrapper around `java.util.regex`. It consists of three main components: the expression builder `Re`, its fluent API equivalent `FluentRe`, and the character class builder `CharClass`. The components are introduced in the examples below as well as in the API overview tables at the end of this document. 6 | 7 | There's a [discussion](https://www.reddit.com/r/java/comments/4tyk90/github_sgrebenregexbuilder_write_regular/) of this project over on the Java subreddit. 8 | 9 | - [Maven dependency](#maven-dependency) 10 | - [Examples](#examples) 11 | - [Apache log](#apache-log) 12 | - [Apache log (fluent API)](#apache-log-fluent-api) 13 | - [Date (DD/MM/YYYY HH:MM:SS)](#date-ddmmyyyy-hhmmss) 14 | - [Hex color](#hex-color) 15 | - [Reusing expressions](#reusing-expressions) 16 | - [Reusable Apache log expression](#reusable-apache-log-expression) 17 | - [API](#api) 18 | - [Expression builder](#expression-builder) 19 | - [CharClass builder](#charclass-builder) 20 | 21 | ## Maven dependency 22 | 23 | ```xml 24 | 25 | com.github.sgreben 26 | regex-builder 27 | 1.2.1 28 | 29 | ``` 30 | 31 | ## Examples 32 | 33 | Imports: 34 | ```java 35 | import com.github.sgreben.regex_builder.CaptureGroup; 36 | import com.github.sgreben.regex_builder.Expression; 37 | import com.github.sgreben.regex_builder.Pattern; 38 | import static com.github.sgreben.regex_builder.CharClass.*; 39 | import static com.github.sgreben.regex_builder.Re.*; 40 | ``` 41 | 42 | ### Apache log 43 | 44 | - Regex string: `(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)` 45 | - Java code: 46 | ```java 47 | 48 | CaptureGroup ip, client, user, dateTime, method, request, protocol, responseCode, size; 49 | Expression token = repeat1(nonWhitespaceChar()); 50 | 51 | ip = capture(token); 52 | client = capture(token); 53 | user = capture(token); 54 | dateTime = capture(sequence( 55 | repeat1(union(wordChar(),':','/')), whitespaceChar(), oneOf("+\\-"), repeat(digit(), 4) 56 | )); 57 | method = capture(token); 58 | request = capture(token); 59 | protocol = capture(token); 60 | responseCode = capture(repeat(digit(), 3)); 61 | size = capture(number()); 62 | 63 | Pattern p = Pattern.compile(sequence( 64 | ip, ' ', client, ' ', user, " [", dateTime, "] \"", method, ' ', request, ' ', protocol, "\" ", responseCode, ' ', size 65 | )); 66 | ``` 67 | Note that capture groups are plain java objects - no need to mess around with group indices or string group names. You can use the expression like this: 68 | ```java 69 | String logLine = "127.0.0.1 - - [21/Jul/2014:9:55:27 -0800] \"GET /home.html HTTP/1.1\" 200 2048"; 70 | Matcher m = p.matcher(logLine); 71 | 72 | assertTrue(m.matches()); 73 | 74 | assertEquals("127.0.0.1", m.group(ip)); 75 | assertEquals("-", m.group(client)); 76 | assertEquals("-", m.group(user)); 77 | assertEquals("21/Jul/2014:9:55:27 -0800", m.group(dateTime)); 78 | assertEquals("GET", m.group(method)); 79 | assertEquals("/home.html", m.group(request)); 80 | assertEquals("HTTP/1.1", m.group(protocol)); 81 | assertEquals("200", m.group(responseCode)); 82 | assertEquals("2048", m.group(size)); 83 | ``` 84 | 85 | Or, if you'd like to rewrite the log to a simpler "ip - request - response code" format, you can simply do 86 | ```java 87 | String result = m.replaceFirst(replacement(ip, " - ", request, " - ", responseCode)); 88 | ``` 89 | 90 | ### Apache log (fluent API) 91 | 92 | The above example can also be expressed using the fluent API implemented in `FluentRe`. To use it, you have import it as 93 | 94 | ```java 95 | import static com.github.sgreben.regex_builder.CharClass.*; 96 | import com.github.sgreben.regex_builder.FluentRe; 97 | ``` 98 | 99 | ```java 100 | CaptureGroup ip, client, user, dateTime, method, request, protocol, responseCode, size; 101 | FluentRe nonWhitespace = FluentRe.match(nonWhitespaceChar()).repeat1(); 102 | 103 | ip = nonWhitespace.capture(); 104 | client = nonWhitespace.capture(); 105 | user = nonWhitespace.capture(); 106 | dateTime = FluentRe 107 | .match(union(wordChar(), oneOf(":/"))).repeat1() 108 | .then(whitespaceChar()) 109 | .then(oneOf("+\\-")) 110 | .then(FluentRe.match(digit()).repeat(4)) 111 | .capture(); 112 | method = nonWhitespace.capture(); 113 | request = nonWhitespace.capture(); 114 | protocol = nonWhitespace.capture(); 115 | responseCode = FluentRe.match(digit()).repeat(3).capture(); 116 | size = FluentRe.match(digit()).repeat1().capture(); 117 | 118 | Pattern p = FluentRe.match(beginInput()) 119 | .then(ip).then(' ') 120 | .then(client).then(' ') 121 | .then(user).then(" [") 122 | .then(dateTime).then("] \"") 123 | .then(method).then(' ') 124 | .then(request).then(' ') 125 | .then(protocol).then("\" ") 126 | .then(responseCode).then(' ') 127 | .then(size) 128 | .then(endInput()) 129 | .compile(); 130 | ``` 131 | 132 | ### Date (DD/MM/YYYY HH:MM:SS) 133 | 134 | - Regex string: `(\d\d\)/(\d\d)\/(\d\d\d\d) (\d\d):(\d\d):(\d\d)` 135 | - Java code: 136 | ```java 137 | Expression twoDigits = repeat(digit(), 2); 138 | Expression fourDigits = repeat(digit(), 4); 139 | CaptureGroup day = capture(twoDigits); 140 | CaptureGroup month = capture(twoDigits); 141 | CaptureGroup year = capture(fourDigits); 142 | CaptureGroup hour = capture(twoDigits); 143 | CaptureGroup minute = capture(twoDigits); 144 | CaptureGroup second = capture(twoDigits); 145 | Expression dateExpression = sequence( 146 | day, '/', month, '/', year, ' ', // DD/MM/YYY 147 | hour, ':', minute, ':', second, // HH:MM:SS 148 | ); 149 | ``` 150 | 151 | Use the expression like this: 152 | ```java 153 | Pattern p = Pattern.compile(dateExpression) 154 | Matcher m = p.matcher("01/05/2015 12:30:22"); 155 | m.find(); 156 | assertEquals("01", m.group(day)); 157 | assertEquals("05", m.group(month)); 158 | assertEquals("2015", m.group(year)); 159 | assertEquals("12", m.group(hour)); 160 | assertEquals("30", m.group(minute)); 161 | assertEquals("22", m.group(second)); 162 | ``` 163 | 164 | ### Hex color 165 | 166 | - Regex string: `#([a-fA-F0-9]){3}(([a-fA-F0-9]){3})?` 167 | - Java code: 168 | ```java 169 | Expression threeHexDigits = repeat(hexDigit(), 3); 170 | CaptureGroup hexValue = capture( 171 | threeHexDigits, // #FFF 172 | optional(threeHexDigits) // #FFFFFF 173 | ); 174 | Expression hexColor = sequence( 175 | '#', hexValue 176 | ); 177 | ``` 178 | 179 | Use the expression like this: 180 | ```java 181 | Pattern p = Pattern.compile(hexColor); 182 | Matcher m = p.matcher("#0FAFF3 and #1bf"); 183 | m.find(); 184 | assertEquals("0FAFF3", m.group(hexValue)); 185 | m.find(); 186 | assertEquals("1bf", m.group(hexValue)); 187 | ``` 188 | 189 | ## Reusing expressions 190 | 191 | To reuse an expression cleanly, it should be packaged as a class. To access the capture groups contained in the expression, 192 | each capture group should be exposed as a final field or method. 193 | 194 | To allow the resulting object to be used as an expression, `regex-builder` provides a utility class `ExpressionWrapper`, 195 | which exposes a method `setExpression(Expression expr)` and implements the `Expresssion` interface. 196 | 197 | ```java 198 | import com.github.sgreben.regex_builder.ExpressionWrapper; 199 | ``` 200 | 201 | To use the class, simply extend it and call `setExpression` in your constructor or initialization block. 202 | You can then pass it to any `regex-builder` method that expects an `Expression`. 203 | 204 | ### Reusable Apache log expression 205 | Using `ExpressionWrapper`, we can package the Apache log 206 | example above as follows: 207 | ```java 208 | public class ApacheLog extends ExpressionWrapper { 209 | public final CaptureGroup ip, client, user, dateTime, method, request, protocol, responseCode, size; 210 | 211 | { 212 | Expression nonWhitespace = repeat1(CharClass.nonWhitespaceChar()); 213 | ip = capture(nonWhitespace); 214 | client = capture(nonWhitespace); 215 | user = capture(nonWhitespace); 216 | dateTime = capture(sequence( 217 | repeat1(union(wordChar(), ':', '/')), 218 | whitespaceChar(), 219 | oneOf("+\\-"), 220 | repeat(digit(), 4) 221 | )); 222 | method = capture(nonWhitespace); 223 | request = capture(nonWhitespace); 224 | protocol = capture(nonWhitespace); 225 | responseCode = capture(repeat(CharClass.digit(), 3)); 226 | size = capture(repeat1(CharClass.digit())); 227 | 228 | Expression expression = sequence( 229 | ip, ' ', client, ' ', user, " [", dateTime, "] \"", method, ' ', request, ' ', protocol, "\" ", responseCode, ' ', size, 230 | ); 231 | setExpression(expression); 232 | } 233 | } 234 | ``` 235 | 236 | We can then use instances of the packaged expression like this: 237 | 238 | ```java 239 | public static boolean sameIP(String twoLogs) { 240 | ApacheLog log1 = new ApacheLog(); 241 | ApacheLog log2 = new ApacheLog(); 242 | Pattern p = Pattern.compile(sequence( 243 | log1, ' ', log2 244 | )); 245 | Matcher m = p.matcher(twoLogs); 246 | m.find(); 247 | return m.group(log1.ip).equals(m.group(log2.ip)); 248 | } 249 | ``` 250 | 251 | 252 | ## API 253 | 254 | ### Expression builder 255 | 256 | | Builder method | `java.util.regex` syntax | 257 | | --------------------------- | ------------------------ | 258 | | repeat(e, N) | e{N} | 259 | | repeat(e) | e* | 260 | | repeat(e).possessive() | e*+ | 261 | | repeatPossessive(e) | e*+ | 262 | | repeat1(e) | e+ | 263 | | repeat1(e).possessive() | e++ | 264 | | repeat1Possessive(e) | e++ | 265 | | optional(e) | e? | 266 | | optional(e).possessive() | e?+ | 267 | | optionalPossessive(e) | e?+ | 268 | | capture(e) | (e) | 269 | | positiveLookahead(e) | (?=e) | 270 | | negativeLookahead(e) | (?!e) | 271 | | positiveLookbehind(e) | (?<=e) | 272 | | negativeLookbehind(e) | (? 3 | 4.0.0 4 | com.github.sgreben 5 | regex-builder 6 | jar 7 | 1.2.1 8 | ${project.groupId}:${project.artifactId} 9 | Construct regular expressions as pure Java code. 10 | https://github.com/sgreben/regex-builder/ 11 | 12 | 13 | Sergey Grebenshchikov 14 | sgreben@gmail.com 15 | https://github.com/sgreben 16 | 17 | 18 | 19 | scm:git:git://github.com/sgreben/regex-builder.git 20 | scm:git:ssh://github.com:sgreben/regex-builder.git 21 | http://github.com/sgreben/regex-builder/tree/master 22 | 23 | 24 | 25 | MIT License 26 | http://www.opensource.org/licenses/mit-license.php 27 | 28 | 29 | 30 | 31 | ossrh 32 | https://oss.sonatype.org/content/repositories/snapshots 33 | 34 | 35 | 36 | 37 | junit 38 | junit 39 | 4.13.1 40 | test 41 | 42 | 43 | org.hamcrest 44 | hamcrest 45 | 2.1 46 | 47 | 48 | 49 | 50 | 51 | org.apache.maven.plugins 52 | maven-compiler-plugin 53 | 3.1 54 | 55 | -Xlint:unchecked 56 | 1.8 57 | 1.8 58 | 59 | 60 | 61 | org.apache.maven.plugins 62 | maven-source-plugin 63 | 3.0.1 64 | 65 | 66 | attach-sources 67 | 68 | jar-no-fork 69 | 70 | 71 | 72 | 73 | 74 | org.apache.maven.plugins 75 | maven-javadoc-plugin 76 | 2.9.1 77 | 78 | 79 | attach-javadocs 80 | 81 | jar 82 | 83 | 84 | 85 | 86 | 87 | org.sonatype.plugins 88 | nexus-staging-maven-plugin 89 | 1.6.7 90 | true 91 | 92 | ossrh 93 | https://oss.sonatype.org/ 94 | true 95 | 96 | 97 | 98 | org.apache.maven.plugins 99 | maven-gpg-plugin 100 | 1.5 101 | 102 | 103 | sign-artifacts 104 | verify 105 | 106 | sign 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /settings.xml.template: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ossrh 5 | TODO: USERNAME 6 | TODO: PASSWORD 7 | 8 | 9 | 10 | 11 | ossrh 12 | 13 | true 14 | 15 | 16 | gpg1 17 | TODO: PASSPHRASE 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/CaptureGroup.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.expression.Unary; 4 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 5 | import com.github.sgreben.regex_builder.tokens.START_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.START_GROUP_NAMED; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | /** 10 | * A regex capture group "(...)" 11 | */ 12 | public class CaptureGroup extends Unary { 13 | private final String name; 14 | 15 | public CaptureGroup(Expression expression) { 16 | super(expression); 17 | name = null; 18 | } 19 | 20 | public CaptureGroup(Expression expression, String name) { 21 | super(expression); 22 | this.name = name; 23 | } 24 | 25 | @Override 26 | public void compile(CaptureGroupIndex index, java.util.List output) { 27 | if (name != null) { 28 | output.add(new START_GROUP_NAMED(name)); 29 | } else { 30 | output.add(new START_GROUP()); 31 | } 32 | for (Expression child : children()) { 33 | child.compile(index, output); 34 | } 35 | output.add(new END_GROUP()); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/CaptureGroupIndex.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | /** 7 | * A mapping between CaptureGroup objects and the underlying regex group indices. 8 | */ 9 | public class CaptureGroupIndex { 10 | private final Map groupIndex; 11 | 12 | public CaptureGroupIndex() { 13 | this.groupIndex = new HashMap(); 14 | } 15 | 16 | public Integer get(CaptureGroup group) { 17 | return groupIndex.get(group); 18 | } 19 | 20 | public void put(CaptureGroup group, Integer index) { 21 | groupIndex.put(group, index); 22 | } 23 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/CaptureGroupReplacementPart.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | class CaptureGroupReplacementPart implements ReplacementPart { 4 | private final CaptureGroup group; 5 | 6 | public CaptureGroupReplacementPart(CaptureGroup group) { 7 | this.group = group; 8 | } 9 | 10 | @Override 11 | public String toReplacementString(CaptureGroupIndex index) { 12 | return "$" + index.get(group); 13 | } 14 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/CharClass.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.charclass.*; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | 6 | public abstract class CharClass { 7 | public static CharClass range(char from, char to) { 8 | return new Range(from, to); 9 | } 10 | 11 | public static CharClass range(char... ranges) { 12 | return new Range(ranges); 13 | } 14 | 15 | public static CharClass union(CharClass... cs) { 16 | return new Union(cs); 17 | } 18 | 19 | public static CharClass union(Object... cs) { 20 | return union(convertStrings(cs)); 21 | } 22 | 23 | public static CharClass intersection(CharClass... cs) { 24 | return new Intersection(cs); 25 | } 26 | 27 | public static CharClass intersection(Object... cs) { 28 | return intersection(convertStrings(cs)); 29 | } 30 | 31 | public static CharClass complement(CharClass cs) { 32 | return new Complement(cs); 33 | } 34 | 35 | public static CharClass anyChar() { 36 | return new AnyCharacter(); 37 | } 38 | 39 | public static CharClass digit() { 40 | return new Digit(); 41 | } 42 | 43 | public static CharClass nonDigit() { 44 | return new NonDigit(); 45 | } 46 | 47 | public static CharClass hexDigit() { 48 | return range('a', 'f', 'A', 'F', '0', '9'); 49 | } 50 | 51 | public static CharClass nonHexDigit() { 52 | return complement(hexDigit()); 53 | } 54 | 55 | public static CharClass wordChar() { 56 | return new WordCharacter(); 57 | } 58 | 59 | public static CharClass nonWordChar() { 60 | return new NonWordCharacter(); 61 | } 62 | 63 | public static CharClass wordBoundary() { 64 | return new WordBoundary(); 65 | } 66 | 67 | public static CharClass nonWordBoundary() { 68 | return new NonWordBoundary(); 69 | } 70 | 71 | public static CharClass whitespaceChar() { 72 | return new Whitespace(); 73 | } 74 | 75 | public static CharClass nonWhitespaceChar() { 76 | return new NonWhitespace(); 77 | } 78 | 79 | public static CharClass verticalWhitespaceChar() { 80 | return new VerticalWhitespace(); 81 | } 82 | 83 | public static CharClass nonVerticalWhitespaceChar() { 84 | return new NonVerticalWhitespace(); 85 | } 86 | 87 | public static CharClass horizontalWhitespaceChar() { 88 | return new HorizontalWhitespace(); 89 | } 90 | 91 | public static CharClass nonHorizontalWhitespaceChar() { 92 | return new NonHorizontalWhitespace(); 93 | } 94 | 95 | public static CharClass beginInput() { 96 | return new BeginInput(); 97 | } 98 | 99 | public static CharClass endInput() { 100 | return new EndInput(); 101 | } 102 | 103 | public static CharClass endInputBeforeFinalTerminator() { 104 | return new EndInputBeforeFinalTerminator(); 105 | } 106 | 107 | public static CharClass oneOf(String chars) { 108 | return new OneOf(chars); 109 | } 110 | 111 | public static CharClass noneOf(String chars) { 112 | return new NoneOf(chars); 113 | } 114 | 115 | private static CharClass[] convertStrings(Object[] os) { 116 | CharClass[] charClasses = new CharClass[os.length]; 117 | for (int i = 0; i < os.length; ++i) { 118 | Object o = os[i]; 119 | if (o instanceof Character) { 120 | charClasses[i] = oneOf("" + (Character) o); 121 | } 122 | if (o instanceof String) { 123 | charClasses[i] = oneOf((String) o); 124 | } 125 | if (o instanceof CharClass) { 126 | charClasses[i] = (CharClass) o; 127 | } 128 | } 129 | return charClasses; 130 | } 131 | 132 | public abstract java.lang.Iterable children(); 133 | 134 | public abstract void accept(CharClassVisitor visitor); 135 | 136 | public abstract CharClass complement(); 137 | 138 | public abstract void compile(java.util.List output); 139 | 140 | public static class Posix { 141 | private Posix() {} 142 | 143 | public static CharClass Lower() { 144 | return new com.github.sgreben.regex_builder.charclass.Posix("Lower"); 145 | } 146 | 147 | public static CharClass Upper() { 148 | return new com.github.sgreben.regex_builder.charclass.Posix("Upper"); 149 | } 150 | 151 | public static CharClass ASCII() { 152 | return new com.github.sgreben.regex_builder.charclass.Posix("ASCII"); 153 | } 154 | 155 | public static CharClass Alpha() { 156 | return new com.github.sgreben.regex_builder.charclass.Posix("Alpha"); 157 | } 158 | 159 | public static CharClass Digit() { 160 | return new com.github.sgreben.regex_builder.charclass.Posix("Digit"); 161 | } 162 | 163 | public static CharClass Alnum() { 164 | return new com.github.sgreben.regex_builder.charclass.Posix("Alnum"); 165 | } 166 | 167 | public static CharClass Punct() { 168 | return new com.github.sgreben.regex_builder.charclass.Posix("Punct"); 169 | } 170 | 171 | public static CharClass Graph() { 172 | return new com.github.sgreben.regex_builder.charclass.Posix("Graph"); 173 | } 174 | 175 | public static CharClass Print() { 176 | return new com.github.sgreben.regex_builder.charclass.Posix("Print"); 177 | } 178 | 179 | public static CharClass Blank() { 180 | return new com.github.sgreben.regex_builder.charclass.Posix("Blank"); 181 | } 182 | 183 | public static CharClass Cntrl() { 184 | return new com.github.sgreben.regex_builder.charclass.Posix("Cntrl"); 185 | } 186 | 187 | public static CharClass XDigit() { 188 | return new com.github.sgreben.regex_builder.charclass.Posix("XDigit"); 189 | } 190 | 191 | public static CharClass Space() { 192 | return new com.github.sgreben.regex_builder.charclass.Posix("Space"); 193 | } 194 | } 195 | 196 | public static class Java { 197 | private Java() {} 198 | 199 | public static CharClass LowerCase() { 200 | return new com.github.sgreben.regex_builder.charclass.Java("javaLowerCase"); 201 | } 202 | 203 | public static CharClass UpperCase() { 204 | return new com.github.sgreben.regex_builder.charclass.Java("javaUpperCase"); 205 | } 206 | 207 | public static CharClass Whitespace() { 208 | return new com.github.sgreben.regex_builder.charclass.Java("javaWhitespace"); 209 | } 210 | 211 | public static CharClass Mirrored() { 212 | return new com.github.sgreben.regex_builder.charclass.Java("javaMirrored"); 213 | } 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/Expression.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.expression.ExpressionVisitor; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | 6 | public interface Expression { 7 | java.lang.Iterable children(); 8 | 9 | void accept(ExpressionVisitor visitor); 10 | 11 | void compile(CaptureGroupIndex index, java.util.List output); 12 | 13 | Expression possessive(); 14 | 15 | Expression reluctant(); 16 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/ExpressionWrapper.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.expression.ExpressionVisitor; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | 6 | import java.util.Collections; 7 | import java.util.List; 8 | 9 | public class ExpressionWrapper implements Expression { 10 | private Expression expression; 11 | 12 | public ExpressionWrapper() { 13 | expression = null; 14 | } 15 | 16 | public Iterable children() { 17 | return Collections.singleton(expression); 18 | } 19 | 20 | public void accept(ExpressionVisitor visitor) { 21 | expression.accept(visitor); 22 | } 23 | 24 | public void compile(CaptureGroupIndex index, List output) { 25 | expression.compile(index, output); 26 | } 27 | 28 | public Expression possessive() { 29 | return expression.possessive(); 30 | } 31 | 32 | public Expression reluctant() { 33 | return expression.reluctant(); 34 | } 35 | 36 | void setExpression(Expression expression) { 37 | this.expression = expression; 38 | } 39 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/FluentRe.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.expression.CharClassExpression; 4 | 5 | /** 6 | * Fluent regex builder API 7 | */ 8 | public class FluentRe { 9 | private Expression expression; 10 | 11 | private FluentRe(Expression expression) { 12 | this.expression = expression; 13 | } 14 | 15 | public static FluentRe match(Expression e) { 16 | return new FluentRe(e); 17 | } 18 | 19 | public static FluentRe match(String s) { 20 | return new FluentRe(Re.string(s)); 21 | } 22 | 23 | public static FluentRe match(CharClass c) { 24 | return new FluentRe(new CharClassExpression(c)); 25 | } 26 | 27 | public static FluentRe match(char c) { 28 | return match(Character.toString(c)); 29 | } 30 | 31 | public FluentRe or(FluentRe e) { 32 | return new FluentRe(Re.choice(expression, e.expression)); 33 | } 34 | 35 | public FluentRe repeat() { 36 | return new FluentRe(Re.repeat(expression)); 37 | } 38 | 39 | public FluentRe repeat1() { 40 | return new FluentRe(Re.repeat1(expression)); 41 | } 42 | 43 | public FluentRe repeat(int n) { 44 | return new FluentRe(Re.repeat(expression, n)); 45 | } 46 | 47 | public FluentRe repeatAtLeast(int n) { 48 | return new FluentRe(Re.repeatAtLeast(expression, n)); 49 | } 50 | 51 | public FluentRe repeat(int min, int max) { 52 | return new FluentRe(Re.repeat(expression, min, max)); 53 | } 54 | 55 | /** 56 | * Adds a "reluctant" modifier (if applicable) 57 | */ 58 | public FluentRe reluctant() { 59 | return new FluentRe(expression.reluctant()); 60 | } 61 | 62 | /** 63 | * Adds a "possessive" modifier (if applicable) 64 | */ 65 | public FluentRe possessive() { 66 | return new FluentRe(expression.possessive()); 67 | } 68 | 69 | public FluentRe or(char e) { 70 | return new FluentRe(Re.choice(expression, e)); 71 | } 72 | 73 | public FluentRe or(CharClass e) { 74 | return new FluentRe(Re.choice(expression, e)); 75 | } 76 | 77 | public FluentRe or(String e) { 78 | return new FluentRe(Re.choice(expression, e)); 79 | } 80 | 81 | public FluentRe or(Expression e) { 82 | return new FluentRe(Re.choice(expression, e)); 83 | } 84 | 85 | public CaptureGroup capture() { 86 | return Re.capture(expression); 87 | } 88 | 89 | public CaptureGroup captureNamed(String name) { 90 | return Re.captureNamed(name, expression); 91 | } 92 | 93 | public FluentRe separatedBy1(FluentRe e) { 94 | return new FluentRe(Re.separatedBy1(e.expression, expression)); 95 | } 96 | 97 | public FluentRe separatedBy1(Expression e) { 98 | return new FluentRe(Re.separatedBy1(e, expression)); 99 | } 100 | 101 | public FluentRe separatedBy1(String e) { 102 | return new FluentRe(Re.separatedBy1(e, expression)); 103 | } 104 | 105 | public FluentRe separatedBy1(char e) { 106 | return new FluentRe(Re.separatedBy1(e, expression)); 107 | } 108 | 109 | public FluentRe separatedBy1(CharClass e) { 110 | return new FluentRe(Re.separatedBy1(e, expression)); 111 | } 112 | 113 | public FluentRe separatedBy1Possessive(FluentRe e) { 114 | return new FluentRe(Re.separatedBy1Possessive(e.expression, expression)); 115 | } 116 | 117 | public FluentRe separatedBy1Possessive(Expression e) { 118 | return new FluentRe(Re.separatedBy1Possessive(e, expression)); 119 | } 120 | 121 | public FluentRe separatedBy1Possessive(String e) { 122 | return new FluentRe(Re.separatedBy1Possessive(e, expression)); 123 | } 124 | 125 | public FluentRe separatedBy1Possessive(char e) { 126 | return new FluentRe(Re.separatedBy1Possessive(e, expression)); 127 | } 128 | 129 | public FluentRe separatedBy1Possessive(CharClass e) { 130 | return new FluentRe(Re.separatedBy1Possessive(e, expression)); 131 | } 132 | 133 | public FluentRe separatedBy(FluentRe e) { 134 | return new FluentRe(Re.separatedBy(e.expression, expression)); 135 | } 136 | 137 | public FluentRe separatedBy(Expression e) { 138 | return new FluentRe(Re.separatedBy(e, expression)); 139 | } 140 | 141 | public FluentRe separatedBy(String e) { 142 | return new FluentRe(Re.separatedBy(e, expression)); 143 | } 144 | 145 | public FluentRe separatedBy(char e) { 146 | return new FluentRe(Re.separatedBy(e, expression)); 147 | } 148 | 149 | public FluentRe separatedBy(CharClass e) { 150 | return new FluentRe(Re.separatedBy(e, expression)); 151 | } 152 | 153 | public FluentRe separatedByPossessive(FluentRe e) { 154 | return new FluentRe(Re.separatedByPossessive(e.expression, expression)); 155 | } 156 | 157 | public FluentRe separatedByPossessive(Expression e) { 158 | return new FluentRe(Re.separatedByPossessive(e, expression)); 159 | } 160 | 161 | public FluentRe separatedByPossessive(String e) { 162 | return new FluentRe(Re.separatedByPossessive(e, expression)); 163 | } 164 | 165 | public FluentRe separatedByPossessive(char e) { 166 | return new FluentRe(Re.separatedByPossessive(e, expression)); 167 | } 168 | 169 | public FluentRe separatedByPossessive(CharClass e) { 170 | return new FluentRe(Re.separatedByPossessive(e, expression)); 171 | } 172 | 173 | public FluentRe then(FluentRe e) { 174 | return new FluentRe(Re.sequence(expression, e.expression)); 175 | } 176 | 177 | public FluentRe then(Expression e) { 178 | return new FluentRe(Re.sequence(expression, e)); 179 | } 180 | 181 | public FluentRe then(String s) { 182 | return new FluentRe(Re.sequence(expression, s)); 183 | } 184 | 185 | public FluentRe then(char c) { 186 | return new FluentRe(Re.sequence(expression, c)); 187 | } 188 | 189 | public FluentRe then(CharClass c) { 190 | return new FluentRe(Re.sequence(expression, c)); 191 | } 192 | 193 | public FluentRe optional() { 194 | return new FluentRe(Re.optional(expression)); 195 | } 196 | 197 | public Pattern compile() { 198 | return Pattern.compile(expression); 199 | } 200 | 201 | public Pattern compile(int flags) { 202 | return Pattern.compile(expression, flags); 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/Matcher.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import java.util.regex.MatchResult; 4 | 5 | public class Matcher { 6 | private final java.util.regex.Matcher matcher; 7 | private final CaptureGroupIndex groupIndex; 8 | 9 | public Matcher(java.util.regex.Matcher matcher, CaptureGroupIndex groupIndex) { 10 | this.matcher = matcher; 11 | this.groupIndex = groupIndex; 12 | } 13 | 14 | public boolean matches() { 15 | return matcher.matches(); 16 | } 17 | 18 | public boolean find() { 19 | return matcher.find(); 20 | } 21 | 22 | public boolean find(int offset) { 23 | return matcher.find(offset); 24 | } 25 | 26 | public int start() { 27 | return matcher.start(); 28 | } 29 | 30 | public int start(CaptureGroup group) { 31 | return matcher.start(groupIndex.get(group)); 32 | } 33 | 34 | public int end() { 35 | return matcher.end(); 36 | } 37 | 38 | public int end(CaptureGroup group) { 39 | return matcher.end(groupIndex.get(group)); 40 | } 41 | 42 | public MatchResult toMatchResult() { 43 | return matcher.toMatchResult(); 44 | } 45 | 46 | public String group(CaptureGroup group) { 47 | return matcher.group(groupIndex.get(group)); 48 | } 49 | 50 | public String replaceAll(Replacement replacement) { 51 | String replacementString = replacement.toReplacementString(groupIndex); 52 | return matcher.replaceAll(replacementString); 53 | } 54 | 55 | public String replaceFirst(Replacement replacement) { 56 | String replacementString = replacement.toReplacementString(groupIndex); 57 | return matcher.replaceFirst(replacementString); 58 | } 59 | 60 | public Matcher appendReplacement(StringBuffer sb, Replacement replacement) { 61 | String replacementString = replacement.toReplacementString(groupIndex); 62 | return new Matcher(matcher.appendReplacement(sb, replacementString), groupIndex); 63 | } 64 | 65 | public StringBuffer appendTail(StringBuffer sb) { 66 | return matcher.appendTail(sb); 67 | } 68 | 69 | public String group() { 70 | return matcher.group(); 71 | } 72 | 73 | public int groupCount() { 74 | return matcher.groupCount(); 75 | } 76 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/Pattern.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.compiler.Compiler; 4 | 5 | public class Pattern { 6 | private final java.util.regex.Pattern rawPattern; 7 | private final CaptureGroupIndex groupIndex; 8 | 9 | public static Pattern compile(Expression expression) { 10 | return Compiler.compile(expression); 11 | } 12 | 13 | public static Pattern compile(Expression expression, int flags) { 14 | return Compiler.compile(expression, flags); 15 | } 16 | 17 | public static Pattern quote(String literal) { 18 | return Compiler.compile(Re.string(literal)); 19 | } 20 | 21 | public Pattern(java.util.regex.Pattern rawPattern, CaptureGroupIndex groupIndex) { 22 | this.rawPattern = rawPattern; 23 | this.groupIndex = groupIndex; 24 | } 25 | 26 | public Matcher matcher(CharSequence input) { 27 | java.util.regex.Matcher matcher = rawPattern.matcher(input); 28 | return new Matcher(matcher, groupIndex); 29 | } 30 | 31 | public static boolean matches(Expression regex, CharSequence input) { 32 | return compile(regex).matcher(input).matches(); 33 | } 34 | 35 | public String[] split(CharSequence input) { 36 | return rawPattern.split(input); 37 | } 38 | 39 | public String[] split(CharSequence input, int limit) { 40 | return rawPattern.split(input, limit); 41 | } 42 | 43 | public java.util.stream.Stream splitAsStream(CharSequence input) { 44 | return rawPattern.splitAsStream(input); 45 | } 46 | 47 | public String pattern() { 48 | return rawPattern.pattern(); 49 | } 50 | 51 | @Override 52 | public String toString() { 53 | return rawPattern.toString(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/Re.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import com.github.sgreben.regex_builder.expression.Atomic; 4 | import com.github.sgreben.regex_builder.expression.BackReference; 5 | import com.github.sgreben.regex_builder.expression.BeginLine; 6 | import com.github.sgreben.regex_builder.expression.CharClassExpression; 7 | import com.github.sgreben.regex_builder.expression.Choice; 8 | import com.github.sgreben.regex_builder.expression.EndLine; 9 | import com.github.sgreben.regex_builder.expression.Literal; 10 | import com.github.sgreben.regex_builder.expression.NegativeLookahead; 11 | import com.github.sgreben.regex_builder.expression.NegativeLookbehind; 12 | import com.github.sgreben.regex_builder.expression.Optional; 13 | import com.github.sgreben.regex_builder.expression.OptionalPossessive; 14 | import com.github.sgreben.regex_builder.expression.OptionalReluctant; 15 | import com.github.sgreben.regex_builder.expression.PositiveLookahead; 16 | import com.github.sgreben.regex_builder.expression.PositiveLookbehind; 17 | import com.github.sgreben.regex_builder.expression.Repeat; 18 | import com.github.sgreben.regex_builder.expression.Repeat1; 19 | import com.github.sgreben.regex_builder.expression.Repeat1Possessive; 20 | import com.github.sgreben.regex_builder.expression.Repeat1Reluctant; 21 | import com.github.sgreben.regex_builder.expression.RepeatAtLeast; 22 | import com.github.sgreben.regex_builder.expression.RepeatAtLeastPossessive; 23 | import com.github.sgreben.regex_builder.expression.RepeatAtLeastReluctant; 24 | import com.github.sgreben.regex_builder.expression.RepeatPossessive; 25 | import com.github.sgreben.regex_builder.expression.RepeatReluctant; 26 | import com.github.sgreben.regex_builder.expression.Sequence; 27 | 28 | /** 29 | * Regular expression builder 30 | */ 31 | public class Re { 32 | private Re() { 33 | } 34 | 35 | /** 36 | * Match a string literal. 37 | */ 38 | public static Expression string(String s) { 39 | return new Literal(s); 40 | } 41 | 42 | /** 43 | * Match a character literal. 44 | */ 45 | public static Expression character(char c) { 46 | return string(Character.toString(c)); 47 | } 48 | 49 | /** 50 | * Match the beginning of a line. 51 | */ 52 | public static Expression beginLine() { 53 | return new BeginLine(); 54 | } 55 | 56 | /** 57 | * Match the end of a line. 58 | */ 59 | public static Expression endLine() { 60 | return new EndLine(); 61 | } 62 | 63 | /** 64 | * Match a character from the given char class. 65 | */ 66 | public static Expression charClass(CharClass charClass) { 67 | return new CharClassExpression(charClass); 68 | } 69 | 70 | /** 71 | * Match any character. 72 | */ 73 | public static Expression anyCharacter() { 74 | return charClass(CharClass.anyChar()); 75 | } 76 | 77 | /** 78 | * Repeat the expression zero or more times. 79 | */ 80 | public static Repeat repeat(Expression e) { 81 | return new Repeat(e); 82 | } 83 | 84 | /** 85 | * Repeat the expression zero or more times. 86 | */ 87 | public static Repeat repeat(CharClass c) { 88 | return new Repeat(charClass(c)); 89 | } 90 | 91 | /** 92 | * Repeat the expression zero or more times. 93 | */ 94 | public static Repeat repeat(String e) { 95 | return repeat(string(e)); 96 | } 97 | 98 | /** 99 | * Repeat the expression zero or more times. 100 | */ 101 | public static Repeat repeat(char e) { 102 | return repeat(character(e)); 103 | } 104 | 105 | /** 106 | * Repeat the expression zero or more times (possessive) 107 | */ 108 | public static RepeatPossessive repeatPossessive(Expression e) { 109 | return new RepeatPossessive(e); 110 | } 111 | 112 | /** 113 | * Repeat the expression zero or more times (possessive) 114 | */ 115 | public static RepeatPossessive repeatPossessive(CharClass c) { 116 | return new RepeatPossessive(charClass(c)); 117 | } 118 | 119 | /** 120 | * Repeat the expression zero or more times (possessive) 121 | */ 122 | public static RepeatPossessive repeatPossessive(String e) { 123 | return repeatPossessive(string(e)); 124 | } 125 | 126 | /** 127 | * Repeat the expression zero or more times (possessive) 128 | */ 129 | public static RepeatPossessive repeatPossessive(char e) { 130 | return repeatPossessive(character(e)); 131 | } 132 | 133 | /** 134 | * Repeat the expression zero or more times (reluctant) 135 | */ 136 | public static RepeatReluctant repeatReluctant(Expression e) { 137 | return new RepeatReluctant(e); 138 | } 139 | 140 | /** 141 | * Repeat the expression zero or more times (reluctant) 142 | */ 143 | public static RepeatReluctant repeatReluctant(CharClass c) { 144 | return new RepeatReluctant(charClass(c)); 145 | } 146 | 147 | /** 148 | * Repeat the expression zero or more times (reluctant) 149 | */ 150 | public static RepeatReluctant repeatReluctant(String e) { 151 | return repeatReluctant(string(e)); 152 | } 153 | 154 | /** 155 | * Repeat the expression zero or more times (reluctant) 156 | */ 157 | public static RepeatReluctant repeatReluctant(char e) { 158 | return repeatReluctant(character(e)); 159 | } 160 | 161 | /** 162 | * Repeat the expression the given number of times 163 | */ 164 | public static Repeat repeat(Expression e, int times) { 165 | return new Repeat(e, times); 166 | } 167 | 168 | /** 169 | * Repeat the expression the given number of times 170 | */ 171 | public static Repeat repeat(CharClass c, int times) { 172 | return repeat(charClass(c), times); 173 | } 174 | 175 | /** 176 | * Repeat the expression the given number of times 177 | */ 178 | public static Repeat repeat(String e, int times) { 179 | return repeat(string(e), times); 180 | } 181 | 182 | /** 183 | * Repeat the expression the given number of times 184 | */ 185 | public static Repeat repeat(char e, int times) { 186 | return repeat(character(e), times); 187 | } 188 | 189 | /** 190 | * Repeat the expression a number of times within the given range. 191 | */ 192 | public static Repeat repeat(Expression e, int timesMin, int timesMax) { 193 | return new Repeat(e, timesMin, timesMax); 194 | } 195 | 196 | /** 197 | * Repeat the expression a number of times within the given range. 198 | */ 199 | public static Repeat repeat(String e, int timesMin, int timesMax) { 200 | return repeat(string(e), timesMin, timesMax); 201 | } 202 | 203 | /** 204 | * Repeat the expression a number of times within the given range. 205 | */ 206 | public static Repeat repeat(CharClass c, int timesMin, int timesMax) { 207 | return repeat(charClass(c), timesMin, timesMax); 208 | } 209 | 210 | /** 211 | * Repeat the expression a number of times within the given range. 212 | */ 213 | public static Repeat repeat(char e, int timesMin, int timesMax) { 214 | return repeat(character(e), timesMin, timesMax); 215 | } 216 | 217 | /** 218 | * Repeat the expression a number of times (possessive). 219 | */ 220 | public static Expression repeatPossessive(Expression e, int times) { 221 | return new RepeatPossessive(e, times); 222 | } 223 | 224 | /** 225 | * Repeat the expression a number of times (possessive). 226 | */ 227 | public static Expression repeatPossessive(CharClass c, int times) { 228 | return repeatPossessive(charClass(c), times); 229 | } 230 | 231 | /** 232 | * Repeat the expression a number of times (possessive). 233 | */ 234 | public static Expression repeatPossessive(String e, int times) { 235 | return repeatPossessive(string(e), times); 236 | } 237 | 238 | /** 239 | * Repeat the expression a number of times (possessive). 240 | */ 241 | public static Expression repeatPossessive(char e, int times) { 242 | return repeatPossessive(character(e), times); 243 | } 244 | 245 | /** 246 | * Repeat the expression a number of times within the given range (possessive). 247 | */ 248 | public static Expression repeatPossessive(Expression e, int timesMin, int timesMax) { 249 | return new RepeatPossessive(e, timesMin, timesMax); 250 | } 251 | 252 | /** 253 | * Repeat the expression a number of times within the given range (possessive). 254 | */ 255 | public static Expression repeatPossessive(CharClass c, int timesMin, int timesMax) { 256 | return repeatPossessive(charClass(c), timesMin, timesMax); 257 | } 258 | 259 | /** 260 | * Repeat the expression a number of times within the given range (possessive). 261 | */ 262 | public static Expression repeatPossessive(String e, int timesMin, int timesMax) { 263 | return repeatPossessive(string(e), timesMin, timesMax); 264 | } 265 | 266 | /** 267 | * Repeat the expression a number of times within the given range (possessive). 268 | */ 269 | public static Expression repeatPossessive(char e, int timesMin, int timesMax) { 270 | return repeatPossessive(character(e), timesMin, timesMax); 271 | } 272 | 273 | /** 274 | * Repeat the expression the given number of times (reluctant). 275 | */ 276 | public static Expression repeatReluctant(Expression e, int times) { 277 | return new RepeatReluctant(e, times); 278 | } 279 | 280 | /** 281 | * Repeat the expression the given number of times (reluctant). 282 | */ 283 | public static Expression repeatReluctant(String e, int times) { 284 | return repeatReluctant(string(e), times); 285 | } 286 | 287 | /** 288 | * Repeat the expression the given number of times (reluctant). 289 | */ 290 | public static Expression repeatReluctant(CharClass c, int times) { 291 | return repeatReluctant(charClass(c), times); 292 | } 293 | 294 | /** 295 | * Repeat the expression the given number of times (reluctant). 296 | */ 297 | public static Expression repeatReluctant(char e, int times) { 298 | return repeatReluctant(character(e), times); 299 | } 300 | 301 | /** 302 | * Repeat the expression a number of times within the given range (reluctant). 303 | */ 304 | public static Expression repeatReluctant(Expression e, int timesMin, int timesMax) { 305 | return new RepeatReluctant(e, timesMin, timesMax); 306 | } 307 | 308 | /** 309 | * Repeat the expression a number of times within the given range (reluctant). 310 | */ 311 | public static Expression repeatReluctant(CharClass c, int timesMin, int timesMax) { 312 | return repeatReluctant(charClass(c), timesMin, timesMax); 313 | } 314 | 315 | /** 316 | * Repeat the expression a number of times within the given range (reluctant). 317 | */ 318 | public static Expression repeatReluctant(String e, int timesMin, int timesMax) { 319 | return repeatReluctant(string(e), timesMin, timesMax); 320 | } 321 | 322 | /** 323 | * Repeat the expression a number of times within the given range (reluctant). 324 | */ 325 | public static Expression repeatReluctant(char e, int timesMin, int timesMax) { 326 | return repeatReluctant(character(e), timesMin, timesMax); 327 | } 328 | 329 | /** 330 | * Repeat the expression one or more times. 331 | */ 332 | public static Repeat1 repeat1(Expression e) { 333 | return new Repeat1(e); 334 | } 335 | 336 | /** 337 | * Repeat the expression one or more times. 338 | */ 339 | public static Repeat1 repeat1(CharClass c) { 340 | return repeat1(charClass(c)); 341 | } 342 | 343 | /** 344 | * Repeat the expression one or more times. 345 | */ 346 | public static Repeat1 repeat1(String e) { 347 | return repeat1(string(e)); 348 | } 349 | 350 | /** 351 | * Repeat the expression one or more times. 352 | */ 353 | public static Repeat1 repeat1(char e) { 354 | return repeat1(character(e)); 355 | } 356 | 357 | /** 358 | * Repeat the expression one or more times (possessive). 359 | */ 360 | public static Expression repeat1Possessive(Expression e) { 361 | return new Repeat1Possessive(e); 362 | } 363 | 364 | /** 365 | * Repeat the expression one or more times (possessive). 366 | */ 367 | public static Expression repeat1Possessive(String e) { 368 | return repeat1Possessive(string(e)); 369 | } 370 | 371 | /** 372 | * Repeat the expression one or more times (possessive). 373 | */ 374 | public static Expression repeat1Possessive(char e) { 375 | return repeat1Possessive(character(e)); 376 | } 377 | 378 | /** 379 | * Repeat the expression one or more times (possessive). 380 | */ 381 | public static Expression repeat1Possessive(CharClass c) { 382 | return repeat1Possessive(charClass(c)); 383 | } 384 | 385 | /** 386 | * Repeat the expression one or more times (reluctant). 387 | */ 388 | public static Expression repeat1Reluctant(Expression e) { 389 | return new Repeat1Reluctant(e); 390 | } 391 | 392 | /** 393 | * Repeat the expression one or more times (reluctant). 394 | */ 395 | public static Expression repeat1Reluctant(CharClass c) { 396 | return repeat1Reluctant(charClass(c)); 397 | } 398 | 399 | /** 400 | * Repeat the expression one or more times (reluctant). 401 | */ 402 | public static Expression repeat1Reluctant(String e) { 403 | return repeat1Reluctant(string(e)); 404 | } 405 | 406 | /** 407 | * Repeat the expression one or more times (reluctant). 408 | */ 409 | public static Expression repeat1Reluctant(char e) { 410 | return repeat1Reluctant(character(e)); 411 | } 412 | 413 | /** 414 | * Repeat the expression at least the given number of times. 415 | */ 416 | public static Expression repeatAtLeast(Expression expression, int n) { 417 | return new RepeatAtLeast(expression, n); 418 | } 419 | 420 | /** 421 | * Repeat the expression at least the given number of times. 422 | */ 423 | public static Expression repeatAtLeast(String s, int n) { 424 | return repeatAtLeast(string(s), n); 425 | } 426 | 427 | /** 428 | * Repeat the expression at least the given number of times. 429 | */ 430 | public static Expression repeatAtLeast(char c, int n) { 431 | return repeatAtLeast(character(c), n); 432 | } 433 | 434 | /** 435 | * Repeat the expression at least the given number of times. 436 | */ 437 | public static Expression repeatAtLeast(CharClass c, int n) { 438 | return repeatAtLeast(charClass(c), n); 439 | } 440 | 441 | /** 442 | * Repeat the expression at least the given number of times (reluctant). 443 | */ 444 | public static Expression repeatAtLeastReluctant(Expression expression, int n) { 445 | return new RepeatAtLeastReluctant(expression, n); 446 | } 447 | 448 | /** 449 | * Repeat the expression at least the given number of times (reluctant). 450 | */ 451 | public static Expression repeatAtLeastReluctant(String s, int n) { 452 | return repeatAtLeastReluctant(string(s), n); 453 | } 454 | 455 | /** 456 | * Repeat the expression at least the given number of times (reluctant). 457 | */ 458 | public static Expression repeatAtLeastReluctant(char c, int n) { 459 | return repeatAtLeastReluctant(character(c), n); 460 | } 461 | 462 | /** 463 | * Repeat the expression at least the given number of times (reluctant). 464 | */ 465 | public static Expression repeatAtLeastReluctant(CharClass c, int n) { 466 | return repeatAtLeastReluctant(charClass(c), n); 467 | } 468 | 469 | /** 470 | * Repeat the expression at least the given number of times (possessive). 471 | */ 472 | public static Expression repeatAtLeastPossessive(Expression expression, int n) { 473 | return new RepeatAtLeastPossessive(expression, n); 474 | } 475 | 476 | /** 477 | * Repeat the expression at least the given number of times (possessive). 478 | */ 479 | public static Expression repeatAtLeastPossessive(String s, int n) { 480 | return repeatAtLeastPossessive(string(s), n); 481 | } 482 | 483 | /** 484 | * Repeat the expression at least the given number of times (possessive). 485 | */ 486 | public static Expression repeatAtLeastPossessive(char c, int n) { 487 | return repeatAtLeastPossessive(character(c), n); 488 | } 489 | 490 | /** 491 | * Repeat the expression at least the given number of times (possessive). 492 | */ 493 | public static Expression repeatAtLeastPossessive(CharClass c, int n) { 494 | return repeatAtLeastPossessive(charClass(c), n); 495 | } 496 | 497 | /** 498 | * Match a sequence of expessions. 499 | */ 500 | public static Expression sequence(Expression... es) { 501 | return new Sequence(es); 502 | } 503 | 504 | /** 505 | * Match a sequence of expessions. 506 | */ 507 | public static Expression sequence(Object... os) { 508 | return sequence(convertStrings(os)); 509 | } 510 | 511 | /** 512 | * Match one of the given expessions. 513 | */ 514 | public static Expression choice(Expression... es) { 515 | return new Choice(es); 516 | } 517 | 518 | /** 519 | * Match one of the given expessions. 520 | */ 521 | public static Expression choice(Object... es) { 522 | return choice(convertStrings(es)); 523 | } 524 | 525 | /** 526 | * Match the given expession zero or one times. 527 | */ 528 | public static Optional optional(Expression e) { 529 | return new Optional(e); 530 | } 531 | 532 | /** 533 | * Match the given expession zero or one times. 534 | */ 535 | public static Optional optional(CharClass c) { 536 | return optional(charClass(c)); 537 | } 538 | 539 | /** 540 | * Match the given expession zero or one times. 541 | */ 542 | public static Optional optional(String e) { 543 | return optional(string(e)); 544 | } 545 | 546 | /** 547 | * Match the given expession zero or one times. 548 | */ 549 | public static Optional optional(char e) { 550 | return optional(character(e)); 551 | } 552 | 553 | /** 554 | * Match the given expession zero or one times (possessive). 555 | */ 556 | public static Expression optionalPossessive(Expression e) { 557 | return new OptionalPossessive(e); 558 | } 559 | 560 | /** 561 | * Match the given expession zero or one times (possessive). 562 | */ 563 | public static Expression optionalPossessive(CharClass c) { 564 | return optionalPossessive(charClass(c)); 565 | } 566 | 567 | /** 568 | * Match the given expession zero or one times (possessive). 569 | */ 570 | public static Expression optionalPossessive(String e) { 571 | return optionalPossessive(string(e)); 572 | } 573 | 574 | /** 575 | * Match the given expession zero or one times (possessive). 576 | */ 577 | public static Expression optionalPossessive(char e) { 578 | return optionalPossessive(character(e)); 579 | } 580 | 581 | /** 582 | * Match the given expession zero or one times (reluctant). 583 | */ 584 | public static Expression optionalReluctant(Expression e) { 585 | return new OptionalReluctant(e); 586 | } 587 | 588 | /** 589 | * Match the given expession zero or one times (reluctant). 590 | */ 591 | public static Expression optionalReluctant(String e) { 592 | return optionalReluctant(string(e)); 593 | } 594 | 595 | /** 596 | * Match the given expession zero or one times (reluctant). 597 | */ 598 | public static Expression optionalReluctant(CharClass c) { 599 | return optionalReluctant(charClass(c)); 600 | } 601 | 602 | /** 603 | * Match the given expession zero or one times (reluctant). 604 | */ 605 | public static Expression optionalReluctant(char e) { 606 | return optionalReluctant(character(e)); 607 | } 608 | 609 | /** 610 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 611 | */ 612 | public static Expression separatedBy(Expression separator, Expression e) { 613 | return optional(separatedBy1(separator, e)); 614 | } 615 | 616 | /** 617 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 618 | */ 619 | public static Expression separatedBy(char separator, Expression e) { 620 | return optional(separatedBy1(character(separator), e)); 621 | } 622 | 623 | /** 624 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 625 | */ 626 | public static Expression separatedBy(String separator, Expression e) { 627 | return separatedBy(string(separator), e); 628 | } 629 | 630 | /** 631 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 632 | */ 633 | public static Expression separatedBy(Expression separator, String e) { 634 | return separatedBy(separator, string(e)); 635 | } 636 | 637 | /** 638 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 639 | */ 640 | public static Expression separatedBy(Expression separator, char e) { 641 | return separatedBy(separator, character(e)); 642 | } 643 | 644 | /** 645 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 646 | */ 647 | public static Expression separatedBy(String separator, String e) { 648 | return separatedBy(string(separator), string(e)); 649 | } 650 | 651 | /** 652 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 653 | */ 654 | public static Expression separatedBy(char separator, String e) { 655 | return separatedBy(character(separator), string(e)); 656 | } 657 | 658 | /** 659 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 660 | */ 661 | public static Expression separatedBy(String separator, char e) { 662 | return separatedBy(string(separator), character(e)); 663 | } 664 | 665 | /** 666 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 667 | */ 668 | public static Expression separatedBy(char separator, char e) { 669 | return separatedBy(character(separator), character(e)); 670 | } 671 | 672 | /** 673 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 674 | */ 675 | public static Expression separatedBy(CharClass separator, Expression e) { 676 | return separatedBy(charClass(separator), e); 677 | } 678 | 679 | /** 680 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 681 | */ 682 | public static Expression separatedBy(CharClass separator, String e) { 683 | return separatedBy(charClass(separator), string(e)); 684 | } 685 | 686 | /** 687 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 688 | */ 689 | public static Expression separatedBy(CharClass separator, char e) { 690 | return separatedBy(charClass(separator), character(e)); 691 | } 692 | 693 | /** 694 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 695 | */ 696 | public static Expression separatedBy(CharClass separator, CharClass c) { 697 | return separatedBy(charClass(separator), charClass(c)); 698 | } 699 | 700 | /** 701 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 702 | */ 703 | public static Expression separatedBy(Expression separator, CharClass c) { 704 | return separatedBy(separator, charClass(c)); 705 | } 706 | 707 | /** 708 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 709 | */ 710 | public static Expression separatedBy(String separator, CharClass c) { 711 | return separatedBy(separator, charClass(c)); 712 | } 713 | 714 | /** 715 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 716 | */ 717 | public static Expression separatedBy(char separator, CharClass c) { 718 | return separatedBy(separator, charClass(c)); 719 | } 720 | 721 | /** 722 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator]. 723 | */ 724 | public static Expression separatedBy1(Expression separator, Expression e) { 725 | return sequence(e, repeat(sequence(separator, e))); 726 | } 727 | 728 | /** 729 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 730 | */ 731 | public static Expression separatedBy1(String separator, Expression e) { 732 | return separatedBy1(string(separator), e); 733 | } 734 | 735 | /** 736 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 737 | */ 738 | public static Expression separatedBy1(Expression separator, String e) { 739 | return separatedBy1(separator, string(e)); 740 | } 741 | 742 | /** 743 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 744 | */ 745 | public static Expression separatedBy1(Expression separator, char e) { 746 | return separatedBy1(separator, character(e)); 747 | } 748 | 749 | /** 750 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 751 | */ 752 | public static Expression separatedBy1(String separator, String e) { 753 | return separatedBy1(string(separator), string(e)); 754 | } 755 | 756 | /** 757 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 758 | */ 759 | public static Expression separatedBy1(char separator, String e) { 760 | return separatedBy1(character(separator), string(e)); 761 | } 762 | 763 | /** 764 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 765 | */ 766 | public static Expression separatedBy1(char separator, Expression e) { 767 | return separatedBy1(character(separator), e); 768 | } 769 | 770 | /** 771 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 772 | */ 773 | public static Expression separatedBy1(String separator, char e) { 774 | return separatedBy1(string(separator), character(e)); 775 | } 776 | 777 | /** 778 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 779 | */ 780 | public static Expression separatedBy1(char separator, char e) { 781 | return separatedBy1(character(separator), character(e)); 782 | } 783 | 784 | /** 785 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 786 | */ 787 | public static Expression separatedBy1(CharClass separator, Expression e) { 788 | return separatedBy1(charClass(separator), e); 789 | } 790 | 791 | /** 792 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 793 | */ 794 | public static Expression separatedBy1(CharClass separator, String e) { 795 | return separatedBy1(charClass(separator), string(e)); 796 | } 797 | 798 | /** 799 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 800 | */ 801 | public static Expression separatedBy1(CharClass separator, char e) { 802 | return separatedBy1(charClass(separator), e); 803 | } 804 | 805 | /** 806 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 807 | */ 808 | public static Expression separatedBy1(CharClass separator, CharClass e) { 809 | return separatedBy1(charClass(separator), charClass(e)); 810 | } 811 | 812 | /** 813 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 814 | */ 815 | public static Expression separatedBy1(char separator, CharClass e) { 816 | return separatedBy1(character(separator), charClass(e)); 817 | } 818 | 819 | /** 820 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 821 | */ 822 | public static Expression separatedBy1(String separator, CharClass e) { 823 | return separatedBy1(string(separator), charClass(e)); 824 | } 825 | 826 | /** 827 | * Match a nonempty sequence of matches of [e], separated by matches of [separator]. 828 | */ 829 | public static Expression separatedBy1(Expression separator, CharClass e) { 830 | return separatedBy1(separator, charClass(e)); 831 | } 832 | 833 | /** 834 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 835 | * (possessive). 836 | */ 837 | public static Expression separatedByPossessive(Expression separator, Expression e) { 838 | return optionalPossessive(separatedBy1Possessive(separator, e)); 839 | } 840 | 841 | /** 842 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 843 | * (possessive). 844 | */ 845 | public static Expression separatedByPossessive(char separator, Expression e) { 846 | return optionalPossessive(separatedBy1Possessive(character(separator), e)); 847 | } 848 | 849 | /** 850 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 851 | * (possessive). 852 | */ 853 | public static Expression separatedByPossessive(CharClass separator, Expression e) { 854 | return optionalPossessive(separatedBy1Possessive(charClass(separator), e)); 855 | } 856 | 857 | /** 858 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 859 | * (possessive). 860 | */ 861 | public static Expression separatedByPossessive(String separator, Expression e) { 862 | return separatedBy(string(separator), e); 863 | } 864 | 865 | /** 866 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 867 | * (possessive). 868 | */ 869 | public static Expression separatedByPossessive(Expression separator, String e) { 870 | return separatedBy(separator, string(e)); 871 | } 872 | 873 | /** 874 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 875 | * (possessive). 876 | */ 877 | public static Expression separatedByPossessive(Expression separator, char e) { 878 | return separatedBy(separator, character(e)); 879 | } 880 | 881 | /** 882 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 883 | * (possessive). 884 | */ 885 | public static Expression separatedByPossessive(String separator, String e) { 886 | return separatedBy(string(separator), string(e)); 887 | } 888 | 889 | /** 890 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 891 | * (possessive). 892 | */ 893 | public static Expression separatedByPossessive(char separator, String e) { 894 | return separatedBy(character(separator), string(e)); 895 | } 896 | 897 | /** 898 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 899 | * (possessive). 900 | */ 901 | public static Expression separatedByPossessive(String separator, char e) { 902 | return separatedBy(string(separator), character(e)); 903 | } 904 | 905 | /** 906 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 907 | * (possessive). 908 | */ 909 | public static Expression separatedByPossessive(char separator, char e) { 910 | return separatedBy(character(separator), character(e)); 911 | } 912 | 913 | /** 914 | * Match a (possibly empty) sequence of matches of [e], separated by matches of [separator] 915 | * (possessive). 916 | */ 917 | public static Expression separatedBy1Possessive(Expression separator, Expression e) { 918 | return sequence(e, repeatPossessive(sequence(separator, e))); 919 | } 920 | 921 | /** 922 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 923 | * (possessive). 924 | */ 925 | public static Expression separatedBy1Possessive(String separator, Expression e) { 926 | return separatedBy1Possessive(string(separator), e); 927 | } 928 | 929 | /** 930 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 931 | * (possessive). 932 | */ 933 | public static Expression separatedBy1Possessive(char separator, Expression e) { 934 | return separatedBy1Possessive(character(separator), e); 935 | } 936 | 937 | /** 938 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 939 | * (possessive). 940 | */ 941 | public static Expression separatedBy1Possessive(CharClass separator, Expression e) { 942 | return separatedBy1Possessive(charClass(separator), e); 943 | } 944 | 945 | /** 946 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 947 | * (possessive). 948 | */ 949 | public static Expression separatedBy1Possessive(Expression separator, String e) { 950 | return separatedBy1Possessive(separator, string(e)); 951 | } 952 | 953 | /** 954 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 955 | * (possessive). 956 | */ 957 | public static Expression separatedBy1Possessive(Expression separator, char e) { 958 | return separatedBy1Possessive(separator, character(e)); 959 | } 960 | 961 | /** 962 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 963 | * (possessive). 964 | */ 965 | public static Expression separatedBy1Possessive(String separator, String e) { 966 | return separatedBy1Possessive(string(separator), string(e)); 967 | } 968 | 969 | /** 970 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 971 | * (possessive). 972 | */ 973 | public static Expression separatedBy1Possessive(char separator, String e) { 974 | return separatedBy1Possessive(character(separator), string(e)); 975 | } 976 | 977 | /** 978 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 979 | * (possessive). 980 | */ 981 | public static Expression separatedBy1Possessive(String separator, char e) { 982 | return separatedBy1Possessive(string(separator), character(e)); 983 | } 984 | 985 | /** 986 | * Match a nonempty sequence of matches of [e], separated by matches of [separator] 987 | * (possessive). 988 | */ 989 | public static Expression separatedBy1Possessive(char separator, char e) { 990 | return separatedBy1Possessive(character(separator), character(e)); 991 | } 992 | 993 | /** 994 | * Match a nonempty sequence of word characters. 995 | */ 996 | public static Expression word() { 997 | return repeat1(CharClass.wordChar()); 998 | } 999 | 1000 | /** 1001 | * Match a nonempty sequence of digits. 1002 | */ 1003 | public static Expression number() { 1004 | return repeat1(CharClass.digit()); 1005 | } 1006 | 1007 | /** 1008 | * Match a sequence of whitespace characters 1009 | */ 1010 | public static Expression whitespace() { 1011 | return repeat(CharClass.whitespaceChar()); 1012 | } 1013 | 1014 | /** 1015 | * Match a nonempty sequence of whitespace characters 1016 | */ 1017 | public static Expression whitespace1() { 1018 | return repeat1(CharClass.whitespaceChar()); 1019 | } 1020 | 1021 | public static Expression atomic(Expression expression) { 1022 | return new Atomic(expression); 1023 | } 1024 | 1025 | public static Expression atomic(Object... os) { 1026 | return atomic(sequence(os)); 1027 | } 1028 | 1029 | public static Expression atomic(String e) { 1030 | return atomic(string(e)); 1031 | } 1032 | 1033 | public static Expression atomic(char e) { 1034 | return atomic(character(e)); 1035 | } 1036 | 1037 | /** 1038 | * Return a capture group for the given expression. 1039 | */ 1040 | public static CaptureGroup capture(Expression expression) { 1041 | return new CaptureGroup(expression); 1042 | } 1043 | 1044 | /** 1045 | * Return a named capture group for the given expression. 1046 | */ 1047 | public static CaptureGroup captureNamed(String name, Expression expression) { 1048 | return new CaptureGroup(expression, name); 1049 | } 1050 | 1051 | /** 1052 | * Create a capture group for the given sequence of expressions. 1053 | */ 1054 | public static CaptureGroup capture(Object... os) { 1055 | return capture(sequence(os)); 1056 | } 1057 | 1058 | /** 1059 | * Create a named capture group for the given sequence of expressions. 1060 | */ 1061 | public static CaptureGroup captureNamed(String name, Object... os) { 1062 | return captureNamed(name, sequence(os)); 1063 | } 1064 | 1065 | /** 1066 | * Create a capture group for the given expression. 1067 | */ 1068 | public static CaptureGroup capture(String e) { 1069 | return capture(string(e)); 1070 | } 1071 | 1072 | /** 1073 | * Create a named capture group for the given expression. 1074 | */ 1075 | public static CaptureGroup captureNamed(String name, String e) { 1076 | return captureNamed(name, string(e)); 1077 | } 1078 | 1079 | /** 1080 | * Create a capture group for the given expression. 1081 | */ 1082 | public static CaptureGroup capture(CharClass e) { 1083 | return capture(charClass(e)); 1084 | } 1085 | 1086 | /** 1087 | * Create a named capture group for the given expression. 1088 | */ 1089 | public static CaptureGroup captureNamed(String name, CharClass e) { 1090 | return captureNamed(name, charClass(e)); 1091 | } 1092 | 1093 | /** 1094 | * Create a capture group for the given expression. 1095 | */ 1096 | public static CaptureGroup capture(char e) { 1097 | return capture(character(e)); 1098 | } 1099 | 1100 | /** 1101 | * Create a named capture group for the given expression. 1102 | */ 1103 | public static CaptureGroup captureNamed(String name, char e) { 1104 | return captureNamed(name, character(e)); 1105 | } 1106 | 1107 | public static Expression positiveLookbehind(Expression expression) { 1108 | return new PositiveLookbehind(expression); 1109 | } 1110 | 1111 | public static Expression positiveLookbehind(Object... os) { 1112 | return positiveLookbehind(sequence(os)); 1113 | } 1114 | 1115 | public static Expression positiveLookbehind(String e) { 1116 | return positiveLookbehind(string(e)); 1117 | } 1118 | 1119 | public static Expression positiveLookbehind(CharClass e) { 1120 | return positiveLookbehind(charClass(e)); 1121 | } 1122 | 1123 | public static Expression positiveLookbehind(char e) { 1124 | return positiveLookbehind(character(e)); 1125 | } 1126 | 1127 | public static Expression negativeLookbehind(Expression expression) { 1128 | return new NegativeLookbehind(expression); 1129 | } 1130 | 1131 | public static Expression negativeLookbehind(Object... os) { 1132 | return negativeLookbehind(sequence(os)); 1133 | } 1134 | 1135 | public static Expression negativeLookbehind(String e) { 1136 | return negativeLookbehind(string(e)); 1137 | } 1138 | 1139 | public static Expression negativeLookbehind(CharClass e) { 1140 | return negativeLookbehind(charClass(e)); 1141 | } 1142 | 1143 | public static Expression negativeLookbehind(char e) { 1144 | return negativeLookbehind(character(e)); 1145 | } 1146 | 1147 | public static Expression positiveLookahead(Expression expression) { 1148 | return new PositiveLookahead(expression); 1149 | } 1150 | 1151 | public static Expression positiveLookahead(Object... os) { 1152 | return positiveLookahead(sequence(os)); 1153 | } 1154 | 1155 | public static Expression positiveLookahead(String e) { 1156 | return positiveLookahead(string(e)); 1157 | } 1158 | 1159 | public static Expression positiveLookahead(CharClass e) { 1160 | return positiveLookahead(charClass(e)); 1161 | } 1162 | 1163 | public static Expression positiveLookahead(char e) { 1164 | return positiveLookahead(character(e)); 1165 | } 1166 | 1167 | public static Expression negativeLookahead(Expression expression) { 1168 | return new NegativeLookahead(expression); 1169 | } 1170 | 1171 | public static Expression negativeLookahead(Object... os) { 1172 | return negativeLookahead(sequence(os)); 1173 | } 1174 | 1175 | public static Expression negativeLookahead(String e) { 1176 | return negativeLookahead(string(e)); 1177 | } 1178 | 1179 | public static Expression negativeLookahead(CharClass e) { 1180 | return negativeLookahead(charClass(e)); 1181 | } 1182 | 1183 | public static Expression negativeLookahead(char e) { 1184 | return negativeLookahead(character(e)); 1185 | } 1186 | 1187 | /** 1188 | * Back-reference: re-matches the string matched by a capture group. 1189 | * 1190 | * @param group 1191 | * @return 1192 | */ 1193 | public static Expression backReference(CaptureGroup group) { 1194 | return new BackReference(group); 1195 | } 1196 | 1197 | /** 1198 | * A replacement expression consisting of a sequence of strings and capture groups. 1199 | */ 1200 | public static Replacement replacement(Object... os) { 1201 | Replacement replacement = new Replacement(); 1202 | for (Object o : os) { 1203 | if (o instanceof String) { 1204 | replacement.addPart(new StringReplacementPart((String) o)); 1205 | } else if (o instanceof Character) { 1206 | replacement.addPart(new StringReplacementPart("" + (Character) o)); 1207 | } else if (o instanceof CaptureGroup) { 1208 | replacement.addPart(new CaptureGroupReplacementPart((CaptureGroup) o)); 1209 | } else { 1210 | throw new IllegalArgumentException( 1211 | "A replacement must be a string, character, or a capture group."); 1212 | } 1213 | } 1214 | return replacement; 1215 | } 1216 | 1217 | /** 1218 | * Converts vararg actuals consisting of expressions, strings, characters and character classes 1219 | * into an array of expressions. 1220 | */ 1221 | private static Expression[] convertStrings(Object[] os) { 1222 | Expression[] es = new Expression[os.length]; 1223 | for (int i = 0; i < os.length; ++i) { 1224 | if (os[i] instanceof Expression) { 1225 | es[i] = (Expression) os[i]; 1226 | } else if (os[i] instanceof String) { 1227 | es[i] = string((String) os[i]); 1228 | } else if (os[i] instanceof CharClass) { 1229 | es[i] = charClass((CharClass) os[i]); 1230 | } else if (os[i] instanceof Character) { 1231 | es[i] = character((Character) os[i]); 1232 | } else { 1233 | throw new IllegalArgumentException( 1234 | "An expression must be an Expression, string, character, or a character class."); 1235 | } 1236 | } 1237 | return es; 1238 | } 1239 | 1240 | } 1241 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/Replacement.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import java.util.LinkedList; 4 | 5 | public class Replacement { 6 | private final LinkedList parts; 7 | private String replacementString = null; 8 | 9 | public Replacement() { 10 | parts = new LinkedList<>(); 11 | } 12 | 13 | public void addPart(ReplacementPart part) { 14 | parts.add(part); 15 | } 16 | 17 | public String toReplacementString(CaptureGroupIndex index) { 18 | if (replacementString != null) { 19 | return replacementString; 20 | } 21 | StringBuilder sb = new StringBuilder(); 22 | for (ReplacementPart part : parts) { 23 | sb.append(part.toReplacementString(index)); 24 | } 25 | replacementString = sb.toString(); 26 | return replacementString; 27 | } 28 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/ReplacementPart.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | @FunctionalInterface 4 | interface ReplacementPart { 5 | String toReplacementString(CaptureGroupIndex index); 6 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/StringReplacementPart.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | class StringReplacementPart implements ReplacementPart { 4 | private final String string; 5 | 6 | public StringReplacementPart(String string) { 7 | this.string = string; 8 | } 9 | 10 | @Override 11 | public String toReplacementString(CaptureGroupIndex index) { 12 | return string; 13 | } 14 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/AnyCharacter.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.DOT; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class AnyCharacter extends Nullary { 8 | @Override 9 | public CharClass complement() { 10 | return oneOf(""); 11 | } 12 | 13 | @Override 14 | public void compile(java.util.List output) { 15 | output.add(new DOT()); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/BeginInput.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class BeginInput extends Nullary { 8 | public BeginInput() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new RawComplement(this); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\A")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Binary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.Collections; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | import com.github.sgreben.regex_builder.CharClass; 7 | 8 | abstract class Binary extends CharClassBase { 9 | private final List children; 10 | 11 | public Binary(CharClass leftChild, CharClass rightChild) { 12 | List children = new LinkedList(); 13 | children.add(leftChild); 14 | children.add(rightChild); 15 | this.children = Collections.unmodifiableList(children); 16 | } 17 | 18 | @Override 19 | public Iterable children() { 20 | return children; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/CharClassBase.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | 5 | public abstract class CharClassBase extends CharClass { 6 | @Override 7 | public void accept(CharClassVisitor visitor) { 8 | visitor.visitPre(this); 9 | for (CharClass child : children()) { 10 | child.accept(visitor); 11 | } 12 | visitor.visitPost(this); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/CharClassVisitor.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | import com.github.sgreben.regex_builder.CharClass; 3 | 4 | public interface CharClassVisitor { 5 | void visitPre(CharClass node); 6 | void visitPost(CharClass node); 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Complement.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | 6 | public class Complement extends Unary { 7 | final CharClass child; 8 | 9 | public Complement(final CharClass child) { 10 | super(child); 11 | this.child = child; 12 | } 13 | 14 | @Override 15 | public CharClass complement() { 16 | return child; 17 | } 18 | 19 | @Override 20 | public void compile(final java.util.List output) { 21 | child.complement().compile(output); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Digit.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class Digit extends Nullary { 8 | public Digit() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonDigit(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\d")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/EndInput.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class EndInput extends Nullary { 8 | public EndInput() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new RawComplement(this); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\z")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/EndInputBeforeFinalTerminator.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class EndInputBeforeFinalTerminator extends Nullary { 8 | public EndInputBeforeFinalTerminator() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new RawComplement(this); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\z")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/HorizontalWhitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class HorizontalWhitespace extends Nullary { 8 | public HorizontalWhitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonHorizontalWhitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\h")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Intersection.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import com.github.sgreben.regex_builder.CharClass; 6 | import com.github.sgreben.regex_builder.tokens.CHAR_CLASS_INTERSECTION; 7 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 8 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 9 | import com.github.sgreben.regex_builder.tokens.TOKEN; 10 | 11 | public class Intersection extends Nary { 12 | public Intersection(CharClass... children) { 13 | super(children); 14 | } 15 | 16 | @Override 17 | public CharClass complement() { 18 | final List newChildren = new ArrayList<>(); 19 | for (CharClass child : this.children()) { 20 | newChildren.add(child.complement()); 21 | } 22 | return new Union(newChildren.toArray(new CharClass[newChildren.size()])); 23 | } 24 | 25 | @Override 26 | public void compile(java.util.List output) { 27 | boolean first = true; 28 | output.add(new START_CHAR_CLASS()); 29 | for (CharClass child : children()) { 30 | if (!first) { 31 | output.add(new CHAR_CLASS_INTERSECTION()); 32 | } 33 | first = false; 34 | output.add(new START_CHAR_CLASS()); 35 | child.compile(output); 36 | output.add(new END_CHAR_CLASS()); 37 | 38 | } 39 | output.add(new END_CHAR_CLASS()); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Java.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.List; 4 | import com.github.sgreben.regex_builder.CharClass; 5 | import com.github.sgreben.regex_builder.tokens.CHAR_CLASS_NAMED; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | /** 9 | * Created by Sergey on 04.10.2016. 10 | */ 11 | public class Java extends Nullary { 12 | private final String name; 13 | 14 | public Java(String name) { 15 | this.name = name; 16 | } 17 | 18 | @Override 19 | public CharClass complement() { 20 | return new RawComplement(this); 21 | } 22 | 23 | @Override 24 | public void compile(List output) { 25 | output.add(new CHAR_CLASS_NAMED(name)); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Nary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.Arrays; 4 | import java.util.Collections; 5 | import java.util.List; 6 | import com.github.sgreben.regex_builder.CharClass; 7 | 8 | abstract class Nary extends CharClassBase { 9 | private final List children; 10 | 11 | public Nary(final CharClass... childrenArray) { 12 | this.children = Collections.unmodifiableList(Arrays.asList(childrenArray)); 13 | } 14 | 15 | @Override 16 | public Iterable children() { 17 | return children; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonDigit.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonDigit extends Nullary { 8 | public NonDigit() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new Digit(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\D")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonHorizontalWhitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonHorizontalWhitespace extends Nullary { 8 | public NonHorizontalWhitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new HorizontalWhitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\H")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonVerticalWhitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonVerticalWhitespace extends Nullary { 8 | public NonVerticalWhitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new VerticalWhitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\V")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonWhitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonWhitespace extends Nullary { 8 | public NonWhitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new Whitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\S")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonWordBoundary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonWordBoundary extends Nullary { 8 | public NonWordBoundary() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new WordBoundary(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\B")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NonWordCharacter.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class NonWordCharacter extends Nullary { 8 | public NonWordCharacter() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new WordCharacter(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\W")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/NoneOf.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.CARET; 5 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 6 | import com.github.sgreben.regex_builder.tokens.RAW; 7 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class NoneOf extends Nullary { 11 | private final String chars; 12 | 13 | public NoneOf(String chars) { 14 | this.chars = chars; 15 | } 16 | 17 | @Override 18 | public CharClass complement() { 19 | return new OneOf(chars); 20 | } 21 | 22 | @Override 23 | public void compile(java.util.List output) { 24 | output.add(new START_CHAR_CLASS()); 25 | output.add(new CARET()); 26 | output.add(new RAW(chars)); 27 | output.add(new END_CHAR_CLASS()); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Nullary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.Collections; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | import com.github.sgreben.regex_builder.CharClass; 7 | 8 | abstract class Nullary extends CharClassBase { 9 | private static final List empty = 10 | Collections.unmodifiableList(new LinkedList()); 11 | 12 | public Nullary() { 13 | } 14 | 15 | @Override 16 | public Iterable children() { 17 | return empty; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/OneOf.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 5 | import com.github.sgreben.regex_builder.tokens.RAW; 6 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | public class OneOf extends Nullary { 10 | private final String chars; 11 | 12 | public OneOf(String chars) { 13 | this.chars = chars; 14 | } 15 | 16 | @Override 17 | public CharClass complement() { 18 | return new NoneOf(chars); 19 | } 20 | 21 | @Override 22 | public void compile(java.util.List output) { 23 | output.add(new START_CHAR_CLASS()); 24 | output.add(new RAW(chars)); 25 | output.add(new END_CHAR_CLASS()); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Posix.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.CHAR_CLASS_NAMED; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | import java.util.List; 8 | 9 | public class Posix extends Nullary { 10 | private final String name; 11 | public Posix(String name) { 12 | this.name = name; 13 | } 14 | 15 | @Override 16 | public CharClass complement() { return new RawComplement(this); } 17 | 18 | @Override 19 | public void compile(List output) { 20 | output.add(new CHAR_CLASS_NAMED(name)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Range.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.*; 5 | 6 | public class Range extends Nullary { 7 | private final char[] range; 8 | public Range(char... range) { 9 | this.range = range; 10 | } 11 | 12 | @Override 13 | public CharClass complement() { return new RangeComplement(range); } 14 | 15 | @Override 16 | public void compile(java.util.List output) { 17 | output.add(new START_CHAR_CLASS()); 18 | for(int i = 0; i < range.length; i += 2) { 19 | output.add(new RAW(""+range[i])); 20 | output.add(new DASH()); 21 | output.add(new RAW(""+range[i+1])); 22 | 23 | } 24 | output.add(new END_CHAR_CLASS()); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/RangeComplement.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.CARET; 5 | import com.github.sgreben.regex_builder.tokens.DASH; 6 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 7 | import com.github.sgreben.regex_builder.tokens.RAW; 8 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 9 | import com.github.sgreben.regex_builder.tokens.TOKEN; 10 | 11 | public class RangeComplement extends Nullary { 12 | private final char[] range; 13 | 14 | public RangeComplement(char... range) { 15 | this.range = range; 16 | } 17 | 18 | @Override 19 | public CharClass complement() { 20 | return new Range(range); 21 | } 22 | 23 | @Override 24 | public void compile(java.util.List output) { 25 | output.add(new START_CHAR_CLASS()); 26 | output.add(new CARET()); 27 | for (int i = 0; i < range.length; i += 2) { 28 | output.add(new RAW("" + range[i])); 29 | output.add(new DASH()); 30 | output.add(new RAW("" + range[i + 1])); 31 | 32 | } 33 | output.add(new END_CHAR_CLASS()); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/RawComplement.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.CARET; 5 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 6 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | class RawComplement extends Unary { 10 | final CharClass child; 11 | 12 | public RawComplement(final CharClass child) { 13 | super(child); 14 | this.child = child; 15 | } 16 | 17 | @Override 18 | public CharClass complement() { 19 | return child; 20 | } 21 | 22 | @Override 23 | public void compile(final java.util.List output) { 24 | output.add(new START_CHAR_CLASS()); 25 | output.add(new CARET()); 26 | for (final CharClass child : children()) { 27 | child.compile(output); 28 | } 29 | output.add(new END_CHAR_CLASS()); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Unary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.Collections; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | import com.github.sgreben.regex_builder.CharClass; 7 | 8 | public abstract class Unary extends CharClassBase { 9 | private final List children; 10 | 11 | public Unary(CharClass child) { 12 | List children = new LinkedList(); 13 | children.add(child); 14 | this.children = Collections.unmodifiableList(children); 15 | } 16 | 17 | @Override 18 | public Iterable children() { 19 | return children; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Union.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import com.github.sgreben.regex_builder.CharClass; 6 | import com.github.sgreben.regex_builder.tokens.END_CHAR_CLASS; 7 | import com.github.sgreben.regex_builder.tokens.START_CHAR_CLASS; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class Union extends Nary { 11 | public Union(CharClass... children) { 12 | super(children); 13 | } 14 | 15 | @Override 16 | public CharClass complement() { 17 | final List children = new ArrayList<>(); 18 | for (CharClass child : this.children()) { 19 | children.add(child.complement()); 20 | } 21 | return new Intersection(children.toArray(new CharClass[children.size()])); 22 | } 23 | 24 | @Override 25 | public void compile(java.util.List output) { 26 | output.add(new START_CHAR_CLASS()); 27 | for (CharClass child : children()) { 28 | child.compile(output); 29 | } 30 | output.add(new END_CHAR_CLASS()); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/VerticalWhitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class VerticalWhitespace extends Nullary { 8 | public VerticalWhitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonVerticalWhitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\v")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/Whitespace.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class Whitespace extends Nullary { 8 | public Whitespace() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonWhitespace(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\s")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/WordBoundary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class WordBoundary extends Nullary { 8 | public WordBoundary() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonWordBoundary(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\b")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/charclass/WordCharacter.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.charclass; 2 | 3 | import com.github.sgreben.regex_builder.CharClass; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class WordCharacter extends Nullary { 8 | public WordCharacter() { 9 | } 10 | 11 | @Override 12 | public CharClass complement() { 13 | return new NonWordCharacter(); 14 | } 15 | 16 | @Override 17 | public void compile(java.util.List output) { 18 | output.add(new RAW("\\w")); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/compiler/CaptureGroupVisitor.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.compiler; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroup; 4 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 5 | import com.github.sgreben.regex_builder.expression.ExpressionVisitor; 6 | import com.github.sgreben.regex_builder.Expression; 7 | 8 | class CaptureGroupVisitor implements ExpressionVisitor { 9 | private CaptureGroupIndex groupIndex; 10 | private int maxGroupIndex; 11 | 12 | public CaptureGroupVisitor() { 13 | this.groupIndex = new CaptureGroupIndex(); 14 | this.maxGroupIndex = 1; 15 | } 16 | 17 | public CaptureGroupIndex get() { 18 | return groupIndex; 19 | } 20 | 21 | public void visitPre(Expression node) { 22 | if(node.getClass() == CaptureGroup.class) { 23 | CaptureGroup group = (CaptureGroup)node; 24 | groupIndex.put(group, maxGroupIndex); 25 | maxGroupIndex += 1; 26 | } 27 | } 28 | 29 | public void visitPost(Expression node) {} 30 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/compiler/Compiler.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.compiler; 2 | 3 | import java.util.LinkedList; 4 | import com.github.sgreben.regex_builder.CaptureGroup; 5 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 6 | import com.github.sgreben.regex_builder.Expression; 7 | import com.github.sgreben.regex_builder.Pattern; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class Compiler { 11 | public static Pattern compile(Expression expression) { 12 | return compile(expression, 0); 13 | } 14 | 15 | public static Pattern compile(Expression expression, final int flags) { 16 | CaptureGroupVisitor visitor = new CaptureGroupVisitor(); 17 | CaptureGroup entireMatch = new CaptureGroup(expression); 18 | LinkedList tokens = new LinkedList(); 19 | StringBuilder sb = new StringBuilder(); 20 | entireMatch.accept(visitor); 21 | CaptureGroupIndex index = visitor.get(); 22 | entireMatch.compile(index, tokens); 23 | for (TOKEN op : tokens) { 24 | sb.append(op.regexString()); 25 | } 26 | String regexString = sb.toString(); 27 | java.util.regex.Pattern rawPattern = java.util.regex.Pattern.compile(regexString, flags); 28 | return new Pattern(rawPattern, index); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Atomic.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.START_GROUP_ATOMIC_NON_CAPTURING; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | public class Atomic extends Unary { 10 | public Atomic(Expression child) { super(child); } 11 | 12 | @Override 13 | public void compile(CaptureGroupIndex index, java.util.List output) { 14 | output.add(new START_GROUP_ATOMIC_NON_CAPTURING()); 15 | for(Expression child : children()) { 16 | child.compile(index, output); 17 | } 18 | output.add(new END_GROUP()); 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/BackReference.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroup; 4 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | import com.github.sgreben.regex_builder.tokens.BACK_REFERENCE; 7 | 8 | public class BackReference extends Nullary { 9 | private final CaptureGroup group; 10 | 11 | public BackReference(CaptureGroup group) { 12 | this.group = group; 13 | } 14 | 15 | @Override 16 | public void compile(CaptureGroupIndex index, java.util.List output) { 17 | output.add(new BACK_REFERENCE(index.get(group))); 18 | } 19 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/BeginLine.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class BeginLine extends Nullary { 8 | public BeginLine() { 9 | } 10 | 11 | @Override 12 | public void compile(CaptureGroupIndex index, java.util.List output) { 13 | output.add(new RAW("^")); 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Binary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | import java.util.Collections; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | 9 | abstract class Binary extends ExpressionBase { 10 | private final List children; 11 | 12 | public Binary(Expression leftChild, Expression rightChild) { 13 | List children = new LinkedList(); 14 | children.add(leftChild); 15 | children.add(rightChild); 16 | this.children = Collections.unmodifiableList(children); 17 | } 18 | 19 | @Override 20 | public Iterable children() { 21 | return children; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/CharClassExpression.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.CharClass; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class CharClassExpression extends Nullary { 8 | private final CharClass charClass; 9 | 10 | public CharClassExpression(CharClass charClass) { 11 | this.charClass = charClass; 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | charClass.compile(output); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Choice.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.PIPE; 7 | import com.github.sgreben.regex_builder.tokens.START_GROUP_NON_CAPTURING; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class Choice extends Nary { 11 | public Choice(Expression... children) { super(children); } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | boolean first = true; 16 | output.add(new START_GROUP_NON_CAPTURING()); 17 | for(Expression child : children()) { 18 | if(first) { 19 | first = false; 20 | } else { 21 | output.add(new PIPE()); 22 | } 23 | child.compile(index, output); 24 | } 25 | output.add(new END_GROUP()); 26 | } 27 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/EndLine.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.tokens.RAW; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class EndLine extends Nullary { 8 | public EndLine() { 9 | } 10 | 11 | @Override 12 | public void compile(CaptureGroupIndex index, java.util.List output) { 13 | output.add(new RAW("$")); 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/ExpressionBase.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | abstract class ExpressionBase implements Expression { 6 | @Override 7 | public void accept(ExpressionVisitor visitor) { 8 | visitor.visitPre(this); 9 | for (Expression child : children()) { 10 | child.accept(visitor); 11 | } 12 | visitor.visitPost(this); 13 | } 14 | 15 | @Override 16 | public Expression possessive() { 17 | return this; 18 | } 19 | 20 | @Override 21 | public Expression reluctant() { 22 | return this; 23 | } 24 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/ExpressionVisitor.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | public interface ExpressionVisitor { 6 | void visitPre(Expression node); 7 | void visitPost(Expression node); 8 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/ExpressionVisitorBase.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | public class ExpressionVisitorBase implements ExpressionVisitor { 6 | @Override 7 | public void visitPre(Expression node) {} 8 | 9 | @Override 10 | public void visitPost(Expression node) {} 11 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Literal.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | import com.github.sgreben.regex_builder.tokens.LITERAL; 6 | 7 | public class Literal extends Nullary { 8 | private final String literal; 9 | 10 | public Literal(String literal) { 11 | this.literal = literal; 12 | } 13 | 14 | public String getLiteral() { 15 | return literal; 16 | } 17 | 18 | @Override 19 | public void compile(CaptureGroupIndex index, java.util.List output) { 20 | output.add(new LITERAL(literal)); 21 | } 22 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Nary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | import java.util.Arrays; 6 | import java.util.Collections; 7 | import java.util.List; 8 | 9 | abstract class Nary extends ExpressionBase { 10 | private final List children; 11 | 12 | public Nary(final Expression... childrenArray) { 13 | this.children = Collections.unmodifiableList( 14 | Arrays.asList(childrenArray) 15 | ); 16 | } 17 | 18 | @Override 19 | public Iterable children() { 20 | return children; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/NegativeLookahead.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.START_NEGATIVE_LOOKAHEAD; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | public class NegativeLookahead extends Unary { 10 | public NegativeLookahead(Expression child) { 11 | super(child); 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | output.add(new START_NEGATIVE_LOOKAHEAD()); 17 | child().compile(index, output); 18 | output.add(new END_GROUP()); 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/NegativeLookbehind.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | import com.github.sgreben.regex_builder.tokens.START_NEGATIVE_LOOKBEHIND; 8 | 9 | public class NegativeLookbehind extends Unary { 10 | public NegativeLookbehind(Expression child) { 11 | super(child); 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | output.add(new START_NEGATIVE_LOOKBEHIND()); 17 | child().compile(index, output); 18 | output.add(new END_GROUP()); 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Nullary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | import java.util.Collections; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | 9 | abstract class Nullary extends ExpressionBase { 10 | private static final List empty = 11 | Collections.unmodifiableList(new LinkedList()); 12 | 13 | public Nullary() { 14 | } 15 | 16 | @Override 17 | public Iterable children() { 18 | return empty; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Optional.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.QUESTION; 7 | import com.github.sgreben.regex_builder.tokens.START_GROUP_NON_CAPTURING; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class Optional extends Unary { 11 | public Optional(Expression child) { 12 | super(child); 13 | } 14 | 15 | @Override 16 | public void compile(CaptureGroupIndex index, java.util.List output) { 17 | output.add(new START_GROUP_NON_CAPTURING()); 18 | for (Expression child : children()) { 19 | child.compile(index, output); 20 | } 21 | output.add(new END_GROUP()); 22 | output.add(new QUESTION()); 23 | } 24 | 25 | @Override 26 | public Expression possessive() { 27 | return new OptionalPossessive(child()); 28 | } 29 | 30 | @Override 31 | public Expression reluctant() { 32 | return new OptionalReluctant(child()); 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/OptionalPossessive.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.PLUS; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class OptionalPossessive extends Optional { 9 | public OptionalPossessive(Expression child) { 10 | super(child); 11 | } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | super.compile(index, output); 16 | output.add(new PLUS()); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/OptionalReluctant.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.PLUS; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class OptionalReluctant extends Optional { 9 | public OptionalReluctant(Expression child) { 10 | super(child); 11 | } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | super.compile(index, output); 16 | output.add(new PLUS()); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/PositiveLookahead.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.START_POSITIVE_LOOKAHEAD; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | public class PositiveLookahead extends Unary { 10 | public PositiveLookahead(Expression child) { 11 | super(child); 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | output.add(new START_POSITIVE_LOOKAHEAD()); 17 | child().compile(index, output); 18 | output.add(new END_GROUP()); 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/PositiveLookbehind.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.START_POSITIVE_LOOKBEHIND; 7 | import com.github.sgreben.regex_builder.tokens.TOKEN; 8 | 9 | public class PositiveLookbehind extends Unary { 10 | public PositiveLookbehind(Expression child) { 11 | super(child); 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | output.add(new START_POSITIVE_LOOKBEHIND()); 17 | child().compile(index, output); 18 | output.add(new END_GROUP()); 19 | } 20 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Raw.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.tokens.TOKEN; 5 | import com.github.sgreben.regex_builder.tokens.RAW; 6 | 7 | /* Raw regex string (for regex syntax not yet modeled in the builder). 8 | * 9 | */ 10 | public class Raw extends Nullary { 11 | private final String rawClass; 12 | 13 | public Raw(String rawClass) { 14 | this.rawClass = rawClass; 15 | } 16 | 17 | @Override 18 | public void compile(CaptureGroupIndex index, java.util.List output) { 19 | output.add(new RAW(rawClass)); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Repeat.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.*; 6 | 7 | public class Repeat extends Unary { 8 | private final Integer lowerBound; 9 | private final Integer upperBound; 10 | 11 | public Repeat(Expression child) { 12 | super(child); 13 | this.lowerBound = null; 14 | this.upperBound = null; 15 | } 16 | 17 | public Repeat(Expression child, Integer lowerBound, Integer upperBound) { 18 | super(child); 19 | this.lowerBound = lowerBound; 20 | this.upperBound = upperBound; 21 | } 22 | 23 | public Repeat(Expression child, Integer bound) { 24 | this(child, bound, bound); 25 | } 26 | 27 | @Override 28 | public void compile(CaptureGroupIndex index, java.util.List output) { 29 | output.add(new START_GROUP_NON_CAPTURING()); 30 | for (Expression child : children()) { 31 | child.compile(index, output); 32 | } 33 | output.add(new END_GROUP()); 34 | if (lowerBound != null && upperBound != null && !lowerBound.equals(upperBound)) { 35 | output.add(new BRACES(lowerBound, upperBound)); 36 | } else if (lowerBound != null) { 37 | output.add(new BRACES(lowerBound)); 38 | } else if (upperBound != null) { 39 | output.add(new BRACES(upperBound)); 40 | } else { 41 | output.add(new STAR()); 42 | } 43 | } 44 | 45 | @Override 46 | public Expression possessive() { 47 | return new RepeatPossessive(child(), lowerBound, upperBound); 48 | } 49 | 50 | @Override 51 | public Expression reluctant() { 52 | return new RepeatReluctant(child(), lowerBound, upperBound); 53 | } 54 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Repeat1.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 6 | import com.github.sgreben.regex_builder.tokens.PLUS; 7 | import com.github.sgreben.regex_builder.tokens.START_GROUP_NON_CAPTURING; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class Repeat1 extends Unary { 11 | public Repeat1(Expression child) { 12 | super(child); 13 | } 14 | 15 | @Override 16 | public void compile(CaptureGroupIndex index, java.util.List output) { 17 | output.add(new START_GROUP_NON_CAPTURING()); 18 | for (Expression child : children()) { 19 | child.compile(index, output); 20 | } 21 | output.add(new END_GROUP()); 22 | output.add(new PLUS()); 23 | } 24 | 25 | @Override 26 | public Expression possessive() { 27 | return new Repeat1Possessive(child()); 28 | } 29 | 30 | @Override 31 | public Expression reluctant() { 32 | return new Repeat1Reluctant(child()); 33 | } 34 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Repeat1Possessive.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.PLUS; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class Repeat1Possessive extends Repeat1 { 9 | public Repeat1Possessive(Expression child) { 10 | super(child); 11 | } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | super.compile(index, output); 16 | output.add(new PLUS()); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Repeat1Reluctant.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.tokens.QUESTION; 5 | import com.github.sgreben.regex_builder.Expression; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class Repeat1Reluctant extends Repeat1 { 9 | public Repeat1Reluctant(Expression child) { 10 | super(child); 11 | } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | super.compile(index, output); 16 | output.add(new QUESTION()); 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/RepeatAtLeast.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.BRACES; 6 | import com.github.sgreben.regex_builder.tokens.END_GROUP; 7 | import com.github.sgreben.regex_builder.tokens.START_GROUP_NON_CAPTURING; 8 | import com.github.sgreben.regex_builder.tokens.TOKEN; 9 | 10 | public class RepeatAtLeast extends Unary { 11 | private final Integer lowerBound; 12 | 13 | public RepeatAtLeast(Expression child, Integer lowerBound) { 14 | super(child); 15 | this.lowerBound = lowerBound; 16 | } 17 | 18 | @Override 19 | public void compile(CaptureGroupIndex index, java.util.List output) { 20 | output.add(new START_GROUP_NON_CAPTURING()); 21 | for (Expression child : children()) { 22 | child.compile(index, output); 23 | } 24 | output.add(new END_GROUP()); 25 | output.add(new BRACES(lowerBound, null)); 26 | } 27 | 28 | @Override 29 | public Expression possessive() { 30 | return new RepeatAtLeastPossessive(child(), lowerBound); 31 | } 32 | 33 | @Override 34 | public Expression reluctant() { 35 | return new RepeatAtLeastReluctant(child(), lowerBound); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/RepeatAtLeastPossessive.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.PLUS; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class RepeatAtLeastPossessive extends RepeatAtLeast { 9 | 10 | public RepeatAtLeastPossessive(Expression child, Integer lowerBound) { 11 | super(child, lowerBound); 12 | } 13 | 14 | @Override 15 | public void compile(CaptureGroupIndex index, java.util.List output) { 16 | super.compile(index, output); 17 | output.add(new PLUS()); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/RepeatAtLeastReluctant.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.QUESTION; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class RepeatAtLeastReluctant extends RepeatAtLeast { 9 | public RepeatAtLeastReluctant(Expression child, Integer lowerBound) { 10 | super(child, lowerBound); 11 | } 12 | 13 | @Override 14 | public void compile(CaptureGroupIndex index, java.util.List output) { 15 | super.compile(index, output); 16 | output.add(new QUESTION()); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/RepeatPossessive.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.PLUS; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class RepeatPossessive extends Repeat { 9 | public RepeatPossessive(Expression child, Integer lowerBound, Integer upperBound) { 10 | super(child, lowerBound, upperBound); 11 | } 12 | 13 | public RepeatPossessive(Expression child, Integer bound) { 14 | super(child, bound, bound); 15 | } 16 | 17 | public RepeatPossessive(Expression child) { 18 | super(child); 19 | } 20 | 21 | @Override 22 | public void compile(CaptureGroupIndex index, java.util.List output) { 23 | super.compile(index, output); 24 | output.add(new PLUS()); 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/RepeatReluctant.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.QUESTION; 6 | import com.github.sgreben.regex_builder.tokens.TOKEN; 7 | 8 | public class RepeatReluctant extends Repeat { 9 | public RepeatReluctant(Expression child, Integer lowerBound, Integer upperBound) { 10 | super(child, lowerBound, upperBound); 11 | } 12 | 13 | public RepeatReluctant(Expression child, Integer bound) { 14 | this(child, bound, bound); 15 | } 16 | 17 | public RepeatReluctant(Expression child) { 18 | super(child); 19 | } 20 | 21 | @Override 22 | public void compile(CaptureGroupIndex index, java.util.List output) { 23 | super.compile(index, output); 24 | output.add(new QUESTION()); 25 | } 26 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Sequence.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.CaptureGroupIndex; 4 | import com.github.sgreben.regex_builder.Expression; 5 | import com.github.sgreben.regex_builder.tokens.TOKEN; 6 | 7 | public class Sequence extends Nary { 8 | public Sequence(Expression... children) { 9 | super(children); 10 | } 11 | 12 | @Override 13 | public void compile(CaptureGroupIndex index, java.util.List output) { 14 | for (Expression child : children()) { 15 | child.compile(index, output); 16 | } 17 | } 18 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/expression/Unary.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.expression; 2 | 3 | import com.github.sgreben.regex_builder.Expression; 4 | 5 | import java.util.Collections; 6 | import java.util.LinkedList; 7 | import java.util.List; 8 | 9 | public abstract class Unary extends ExpressionBase { 10 | private final Expression child; 11 | private final List children; 12 | 13 | public Unary(Expression child) { 14 | this.child = child; 15 | List childrenList = new LinkedList<>(); 16 | childrenList.add(child); 17 | this.children = Collections.unmodifiableList(childrenList); 18 | } 19 | 20 | @Override 21 | public Iterable children() { 22 | return children; 23 | } 24 | 25 | public Expression child() { 26 | return child; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/hamcrest/MatchesPattern.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.hamcrest; 2 | 3 | import org.hamcrest.Description; 4 | import org.hamcrest.Matcher; 5 | import org.hamcrest.TypeSafeMatcher; 6 | 7 | import com.github.sgreben.regex_builder.Pattern; 8 | 9 | 10 | 11 | public class MatchesPattern extends TypeSafeMatcher { 12 | private final Pattern pattern; 13 | 14 | public MatchesPattern(Pattern pattern) { 15 | this.pattern = pattern; 16 | } 17 | 18 | @Override 19 | protected boolean matchesSafely(String item) { 20 | return pattern.matcher(item).matches(); 21 | } 22 | 23 | @Override 24 | public void describeTo(Description description) { 25 | description.appendText("a string matching the pattern '" + pattern + "'"); 26 | } 27 | 28 | /** 29 | * Creates a matcher of {@link java.lang.String} that matches when the examined string 30 | * exactly matches the given {@link com.github.sgreben.regex_builder.Pattern}. 31 | */ 32 | public static Matcher matchesPattern(Pattern pattern) { 33 | return new MatchesPattern(pattern); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/BACK_REFERENCE.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class BACK_REFERENCE implements TOKEN { 4 | private final int groupIndex; 5 | 6 | public BACK_REFERENCE(int groupIndex) { 7 | this.groupIndex = groupIndex; 8 | } 9 | 10 | public String regexString() { 11 | return "\\"+groupIndex; 12 | } 13 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/BRACES.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class BRACES implements TOKEN { 4 | private final String bracesString; 5 | 6 | public BRACES(Integer... numbers) { 7 | if(numbers.length == 0) { 8 | bracesString = ""; 9 | } else if (numbers.length == 1) { 10 | bracesString = "{"+numbers[0]+"}"; 11 | } else if (numbers.length == 2 && numbers[1] == null) { 12 | bracesString = "{"+numbers[0]+",}"; 13 | } else { 14 | StringBuilder sb = new StringBuilder(); 15 | sb.append("{"); 16 | sb.append(numbers[0]); 17 | for(int i = 1; i < numbers.length; ++i) { 18 | sb.append(","); 19 | sb.append(numbers[i]); 20 | } 21 | sb.append("}"); 22 | bracesString = sb.toString(); 23 | } 24 | } 25 | 26 | public String regexString() { 27 | return bracesString; 28 | } 29 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/CARET.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class CARET implements TOKEN { 4 | public String regexString() { 5 | return "^"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/CHAR_CLASS_INTERSECTION.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class CHAR_CLASS_INTERSECTION implements TOKEN { 4 | public String regexString() { 5 | return "&&"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/CHAR_CLASS_NAMED.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class CHAR_CLASS_NAMED implements TOKEN { 4 | private final String regexString; 5 | 6 | public CHAR_CLASS_NAMED(String name) { 7 | this.regexString = "\\p{"+name+"}"; 8 | } 9 | 10 | @Override 11 | public String regexString() { 12 | return regexString; 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/DASH.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class DASH implements TOKEN { 4 | public String regexString() { 5 | return "-"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/DOT.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class DOT implements TOKEN { 4 | public String regexString() { 5 | return "."; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/END_CHAR_CLASS.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class END_CHAR_CLASS implements TOKEN { 4 | public String regexString() { 5 | return "]"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/END_GROUP.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class END_GROUP implements TOKEN { 4 | public String regexString() { 5 | return ")"; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/LITERAL.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | import java.util.regex.Pattern; 4 | 5 | public class LITERAL implements TOKEN { 6 | private final String literalRegex; 7 | 8 | public LITERAL(String literal) { 9 | this.literalRegex = Pattern.quote(literal); 10 | } 11 | 12 | public String regexString() { 13 | return literalRegex; 14 | } 15 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/PIPE.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class PIPE implements TOKEN { 4 | public String regexString() { 5 | return "|"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/PLUS.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class PLUS implements TOKEN { 4 | public String regexString() { 5 | return "+"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/QUESTION.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class QUESTION implements TOKEN { 4 | public String regexString() { 5 | return "?"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/RAW.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class RAW implements TOKEN { 4 | private final String rawRegex; 5 | 6 | public RAW(String rawRegex) { 7 | this.rawRegex = rawRegex; 8 | } 9 | 10 | public String regexString() { 11 | return rawRegex; 12 | } 13 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/STAR.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class STAR implements TOKEN { 4 | public String regexString() { 5 | return "*"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_CHAR_CLASS.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_CHAR_CLASS implements TOKEN { 4 | public String regexString() { 5 | return "["; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_GROUP.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_GROUP implements TOKEN { 4 | public String regexString() { 5 | return "("; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_GROUP_ATOMIC_NON_CAPTURING.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_GROUP_ATOMIC_NON_CAPTURING implements TOKEN { 4 | public String regexString() { 5 | return "(?>"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_GROUP_NAMED.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_GROUP_NAMED implements TOKEN { 4 | private final String name; 5 | 6 | public START_GROUP_NAMED(String name) { 7 | this.name = name; 8 | } 9 | 10 | public String regexString() { 11 | return "(?<" + name + ">"; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_GROUP_NON_CAPTURING.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_GROUP_NON_CAPTURING implements TOKEN { 4 | public String regexString() { 5 | return "(?:"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_NEGATIVE_LOOKAHEAD.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_NEGATIVE_LOOKAHEAD implements TOKEN { 4 | public String regexString() { 5 | return "(?!"; 6 | } 7 | } -------------------------------------------------------------------------------- /src/main/java/com/github/sgreben/regex_builder/tokens/START_NEGATIVE_LOOKBEHIND.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder.tokens; 2 | 3 | public class START_NEGATIVE_LOOKBEHIND implements TOKEN { 4 | public String regexString() { 5 | return "(?\\Qfoo\\E)(\\Qbar\\E))", p.toString()); 90 | } 91 | 92 | @Test 93 | public void apacheLogLine() { 94 | String logLine = 95 | "127.0.0.1 - - [21/Jul/2014:9:55:27 -0800] \"GET /home.html HTTP/1.1\" 200 2048"; 96 | // "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) 97 | // (\\d+)"; 98 | 99 | CaptureGroup ip, client, user, dateTime, method, request, protocol, responseCode, size; 100 | FluentRe nonWhitespace = FluentRe.match(nonWhitespaceChar()).repeat1(); 101 | 102 | ip = nonWhitespace.captureNamed("ip"); 103 | client = nonWhitespace.capture(); 104 | user = nonWhitespace.capture(); 105 | dateTime = FluentRe.match(union(wordChar(), oneOf(":/"))).repeat1().then(whitespaceChar()) 106 | .then(oneOf("+\\-")).then(FluentRe.match(digit()).repeat(4)).capture(); 107 | method = nonWhitespace.capture(); 108 | request = nonWhitespace.capture(); 109 | protocol = nonWhitespace.capture(); 110 | responseCode = FluentRe.match(digit()).repeat(3).captureNamed("code"); 111 | size = FluentRe.match(digit()).repeat1().capture(); 112 | 113 | Pattern p = FluentRe.match(beginInput()).then(ip).then(' ').then(client).then(' ') 114 | .then(user).then(" [").then(dateTime).then("] \"").then(method).then(' ') 115 | .then(request).then(' ').then(protocol).then("\" ").then(responseCode).then(' ') 116 | .then(size).then(endInput()).compile(); 117 | 118 | assertEquals( 119 | "(\\A(?(?:\\S)+)\\Q \\E((?:\\S)+)\\Q \\E((?:\\S)+)\\Q [\\E((?:[\\w[:/]])+\\s[+\\-](?:\\d){4})\\Q] \"\\E((?:\\S)+)\\Q \\E((?:\\S)+)\\Q \\E((?:\\S)+)\\Q\" \\E(?(?:\\d){3})\\Q \\E((?:\\d)+)\\z)", 120 | p.toString()); 121 | 122 | Matcher m = p.matcher(logLine); 123 | assertTrue(m.matches()); 124 | assertEquals("127.0.0.1", m.group(ip)); 125 | assertEquals("-", m.group(client)); 126 | assertEquals("-", m.group(user)); 127 | assertEquals("21/Jul/2014:9:55:27 -0800", m.group(dateTime)); 128 | assertEquals("GET", m.group(method)); 129 | assertEquals("/home.html", m.group(request)); 130 | assertEquals("HTTP/1.1", m.group(protocol)); 131 | assertEquals("200", m.group(responseCode)); 132 | assertEquals("2048", m.group(size)); 133 | assertEquals("127.0.0.1 - /home.html - 200", 134 | m.replaceAll(replacement(ip, " - ", request, " - ", responseCode))); 135 | assertEquals("127.0.0.1 - /home.html - 200", 136 | m.replaceFirst(replacement(ip, " - ", request, " - ", responseCode))); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/test/java/com/github/sgreben/regex_builder/MatcherTest.java: -------------------------------------------------------------------------------- 1 | package com.github.sgreben.regex_builder; 2 | 3 | import static com.github.sgreben.regex_builder.Re.capture; 4 | import static com.github.sgreben.regex_builder.Re.captureNamed; 5 | import static com.github.sgreben.regex_builder.Re.repeat; 6 | import static com.github.sgreben.regex_builder.Re.repeat1; 7 | import static com.github.sgreben.regex_builder.Re.replacement; 8 | import static com.github.sgreben.regex_builder.Re.sequence; 9 | import static java.util.regex.Pattern.CASE_INSENSITIVE; 10 | import static org.junit.Assert.assertEquals; 11 | import static org.junit.Assert.assertFalse; 12 | import static org.junit.Assert.assertTrue; 13 | import org.junit.Test; 14 | 15 | public class MatcherTest { 16 | @Test 17 | public void matchNumber_matchedIsTrue() { 18 | String s = "123"; 19 | Pattern p = Pattern.compile(Re.number()); 20 | Matcher m = p.matcher(s); 21 | assertTrue(m.matches()); 22 | } 23 | 24 | @Test 25 | public void matchNumber_matchedIsTrue_static() { 26 | String s = "123"; 27 | assertTrue(Pattern.matches(Re.number(), s)); 28 | } 29 | 30 | @Test 31 | public void matchNumber_matchedIsFalse_static() { 32 | String s = "abc"; 33 | assertFalse(Pattern.matches(Re.number(), s)); 34 | } 35 | 36 | @Test 37 | public void matchAnyNumberAny_matchedIsTrue() { 38 | String s = "abc 123 def"; 39 | Expression nonNumbers = Re.repeat(CharClass.nonDigit()); 40 | Pattern p = Pattern.compile(Re.sequence(nonNumbers, Re.number(), nonNumbers)); 41 | Matcher m = p.matcher(s); 42 | assertTrue(m.matches()); 43 | } 44 | 45 | @Test 46 | public void matchNumberCaptureNumber_returnsNumber() { 47 | String s = "123"; 48 | CaptureGroup number = Re.capture(Re.number()); 49 | Pattern p = Pattern.compile(number); 50 | Matcher m = p.matcher(s); 51 | m.matches(); 52 | assertEquals("123", m.group(number)); 53 | } 54 | 55 | @Test 56 | public void matchAnyNumberAnyCaptureNumber_returnsNumber() { 57 | String s = "abc 123 def"; 58 | CaptureGroup number = Re.capture(Re.number()); 59 | Expression nonNumbers = Re.repeat(CharClass.nonDigit()); 60 | Pattern p = Pattern.compile(Re.sequence(nonNumbers, number, nonNumbers)); 61 | Matcher m = p.matcher(s); 62 | m.matches(); 63 | assertEquals("123", m.group(number)); 64 | } 65 | 66 | @Test 67 | public void matchNumbers_separatedBySpaces() { 68 | String s = "123 456 789"; 69 | CaptureGroup number = Re.capture(Re.number()); 70 | Pattern p = Pattern.compile(Re.sequence(number, Re.optional(Re.whitespace()))); 71 | Matcher m = p.matcher(s); 72 | m.find(); 73 | assertEquals("123", m.group(number)); 74 | m.find(); 75 | assertEquals("456", m.group(number)); 76 | m.find(); 77 | assertEquals("789", m.group(number)); 78 | } 79 | 80 | @Test 81 | public void matchNumbers_replaceByParenthesized() { 82 | String s = "123 456 789"; 83 | CaptureGroup number = Re.capture(Re.number()); 84 | Pattern p = Pattern.compile(Re.sequence(number, Re.optional(Re.whitespace()))); 85 | Matcher m = p.matcher(s); 86 | String result = m.replaceAll(Re.replacement("(number ", number, ")")); 87 | assertEquals("(number 123)(number 456)(number 789)", result); 88 | } 89 | 90 | @Test 91 | public void matchWords_replaceByDoubled() { 92 | String s = "abc def ghi"; 93 | CaptureGroup word = Re.capture(Re.word()); 94 | Pattern p = Pattern.compile(word); 95 | Matcher m = p.matcher(s); 96 | String result = m.replaceAll(Re.replacement(word, word)); 97 | assertEquals("abcabc defdef ghighi", result); 98 | } 99 | 100 | @Test 101 | public void matchChar_replaceByDoubled() { 102 | String s = "abc def ghi"; 103 | CaptureGroup b = Re.capture(Re.character('b')); 104 | Pattern p = Pattern.compile(b); 105 | Matcher m = p.matcher(s); 106 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 107 | assertEquals("ac def ghi", result); 108 | } 109 | 110 | @Test 111 | public void matchChar_literalSyntaxString_replaceByDoubled() { 112 | String s = "abc def ghi"; 113 | CaptureGroup b = Re.capture("b"); 114 | Pattern p = Pattern.compile(b); 115 | Matcher m = p.matcher(s); 116 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 117 | assertEquals("ac def ghi", result); 118 | } 119 | 120 | @Test 121 | public void matchWithBackReference_capturesCorrectly() { 122 | String s = "abc abc def"; 123 | CaptureGroup word = Re.capture(Re.word()); 124 | CaptureGroup sameWordTwice = 125 | Re.capture(Re.sequence(word, Re.whitespace1(), Re.backReference(word))); 126 | Pattern p = Pattern.compile(sameWordTwice); 127 | Matcher m = p.matcher(s); 128 | m.find(); 129 | assertEquals("abc abc", m.group(sameWordTwice)); 130 | assertEquals("abc", m.group(word)); 131 | } 132 | 133 | @Test 134 | public void matchChar_literalSyntaxChar_replaceByDoubled() { 135 | String s = "abc def ghi"; 136 | CaptureGroup b = Re.capture('b'); 137 | Pattern p = Pattern.compile(b); 138 | Matcher m = p.matcher(s); 139 | String result = m.replaceAll(Re.replacement('<', b, b, '>')); 140 | assertEquals("ac def ghi", result); 141 | } 142 | 143 | @Test 144 | public void matchCharTwoGroup_replaceByDoubled() { 145 | String s = "abc def ghi"; 146 | CaptureGroup a = Re.capture(Re.character('a')); 147 | CaptureGroup b = Re.capture(Re.character('b')); 148 | Pattern p = Pattern.compile(Re.sequence(a, b)); 149 | Matcher m = p.matcher(s); 150 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 151 | assertEquals("c def ghi", result); 152 | } 153 | 154 | @Test 155 | public void matchCharTwoGroup_replaceByDoubled_caseInsensitive() { 156 | String s = "abc ABC def ghi"; 157 | CaptureGroup a = Re.capture(Re.character('a')); 158 | CaptureGroup b = Re.capture(Re.character('b')); 159 | Pattern p = Pattern.compile(Re.sequence(a, b), CASE_INSENSITIVE); 160 | Matcher m = p.matcher(s); 161 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 162 | assertEquals("c C def ghi", result); 163 | } 164 | 165 | @Test 166 | public void matchCharTwoGroup_replaceByDoubled_caseInsensitive_namedGroup() { 167 | String s = "abc ABC def ghi"; 168 | CaptureGroup a = Re.captureNamed("a", Re.character('a')); 169 | CaptureGroup b = Re.capture(Re.character('b')); 170 | Pattern p = Pattern.compile(Re.sequence(a, b), CASE_INSENSITIVE); 171 | Matcher m = p.matcher(s); 172 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 173 | assertEquals("c C def ghi", result); 174 | } 175 | 176 | @Test 177 | public void matchCharTwoGroup_replaceByDoubled_caseInsensitive_namedGroups() { 178 | String s = "abc ABC def ghi"; 179 | CaptureGroup a = Re.captureNamed("groupA", Re.character('a')); 180 | CaptureGroup b = Re.captureNamed("groupB", Re.character('b')); 181 | Pattern p = Pattern.compile(Re.sequence(a, b), CASE_INSENSITIVE); 182 | Matcher m = p.matcher(s); 183 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 184 | assertEquals("c C def ghi", result); 185 | } 186 | 187 | @Test 188 | public void matchChars_literalSyntax_replaceByDoubled() { 189 | String s = "abc def ghi"; 190 | CaptureGroup a = Re.capture("a"); 191 | CaptureGroup b = Re.capture("b"); 192 | Pattern p = Pattern.compile(Re.sequence(a, b, "c")); 193 | Matcher m = p.matcher(s); 194 | String result = m.replaceAll(Re.replacement("<", b, b, ">")); 195 | assertEquals(" def ghi", result); 196 | } 197 | 198 | @Test 199 | public void nestedCapture_returnsBoth() { 200 | String s = "There are things. Things have properties."; 201 | CaptureGroup word = Re.capture(Re.word()); 202 | CaptureGroup sentence = 203 | Re.capture(Re.sequence(Re.separatedBy(Re.whitespace(), word), Re.character('.'))); 204 | Pattern p = Pattern.compile(Re.sequence(sentence, Re.optional(Re.whitespace()))); 205 | Matcher m = p.matcher(s); 206 | m.find(); 207 | assertEquals("There are things.", m.group(sentence)); 208 | m.find(); 209 | assertEquals("Things have properties.", m.group(sentence)); 210 | } 211 | 212 | @Test 213 | public void hexColorExampleFromReadme() { 214 | Expression hexDigit = Re.charClass(CharClass.union(CharClass.range('a', 'f'), 215 | CharClass.range('A', 'F'), CharClass.digit())); 216 | Expression threeHexDigits = Re.repeat(hexDigit, 3); 217 | CaptureGroup hexValue = Re.capture(threeHexDigits, // #FFF 218 | Re.optional(threeHexDigits) // #FFFFFF 219 | ); 220 | Expression hexColor = Re.sequence('#', hexValue); 221 | Pattern p = Pattern.compile(hexColor); 222 | Matcher m = p.matcher("#0FAFF3 and #1bf"); 223 | m.find(); 224 | assertEquals("0FAFF3", m.group(hexValue)); 225 | m.find(); 226 | assertEquals("1bf", m.group(hexValue)); 227 | } 228 | 229 | @Test 230 | public void hexColorExampleFromReadme_alternativeBuild() { 231 | Expression hexDigit = Re.charClass(CharClass.range('a', 'f', 'A', 'F', '0', '9')); 232 | Expression threeHexDigits = Re.repeat(hexDigit, 3); 233 | CaptureGroup hexValue = Re.capture(Re.sequence(threeHexDigits, // #FFF 234 | Re.optional(threeHexDigits) // #FFFFFF 235 | )); 236 | Expression hexColor = Re.sequence('#', hexValue); 237 | Pattern p = Pattern.compile(hexColor); 238 | Matcher m = p.matcher("#0FAFF3 and #1bf"); 239 | m.find(); 240 | assertEquals("0FAFF3", m.group(hexValue)); 241 | m.find(); 242 | assertEquals("1bf", m.group(hexValue)); 243 | } 244 | 245 | @Test 246 | public void hexColorExampleFromReadme_alternativeBuildUsingBuiltinHexdigit() { 247 | Expression threeHexDigits = Re.repeat(CharClass.hexDigit(), 3); 248 | CaptureGroup hexValue = Re.capture(Re.sequence(threeHexDigits, // #FFF 249 | Re.optional(threeHexDigits) // #FFFFFF 250 | )); 251 | Expression hexColor = Re.sequence('#', hexValue); 252 | Pattern p = Pattern.compile(hexColor); 253 | Matcher m = p.matcher("#0FAFF3 and #1bf"); 254 | m.find(); 255 | assertEquals("0FAFF3", m.group(hexValue)); 256 | m.find(); 257 | assertEquals("1bf", m.group(hexValue)); 258 | } 259 | 260 | @Test 261 | public void possessiveQualifierTest() { 262 | Expression xxy = 263 | Re.sequence( 264 | Re.repeatPossessive( 265 | Re.sequence(Re.repeatPossessive('x'), Re.repeatPossessive('x'))), 266 | 'y'); 267 | Pattern p = Pattern.compile(xxy); 268 | Matcher m = p.matcher("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); 269 | assertEquals(false, m.matches()); 270 | } 271 | 272 | @Test 273 | public void possessiveQualifierTest_positive() { 274 | Expression xxy = Re.sequence( 275 | Re.repeat(Re.sequence(Re.repeat('x').possessive(), Re.repeat('x').possessive())) 276 | .possessive(), 277 | 'y'); 278 | Pattern p = Pattern.compile(xxy); 279 | Matcher m = p.matcher("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy"); 280 | assertEquals(true, m.matches()); 281 | } 282 | 283 | @Test 284 | public void charClassIntersectionTest() { 285 | Expression d = Re.charClass(CharClass.intersection(CharClass.oneOf("abcd"), 286 | CharClass.oneOf("defg"), CharClass.wordChar())); 287 | Pattern p = Pattern.compile(d); 288 | Matcher m1 = p.matcher("a"); 289 | Matcher m2 = p.matcher("d"); 290 | assertFalse(m1.matches()); 291 | assertTrue(m2.matches()); 292 | 293 | } 294 | 295 | @Test 296 | public void positiveLookaheadTest() { 297 | Expression abc = Re.sequence("abc", Re.positiveLookahead("def")); 298 | Pattern p = Pattern.compile(abc); 299 | assertFalse(p.matcher("123abc").find()); 300 | assertTrue(p.matcher("123abcdef").find()); 301 | assertFalse(p.matcher("123abc123def").find()); 302 | assertTrue(p.matcher("abcdef123").find()); 303 | } 304 | 305 | @Test 306 | public void positiveLookbehindTest() { 307 | Expression abc = Re.sequence(Re.positiveLookbehind("def"), "abc"); 308 | Pattern p = Pattern.compile(abc); 309 | assertFalse(p.matcher("def123abc").find()); 310 | assertTrue(p.matcher("123defabc").find()); 311 | assertFalse(p.matcher("def123abc123def").find()); 312 | assertTrue(p.matcher("defabc123").find()); 313 | } 314 | 315 | @Test 316 | public void negativeLookbehindTest() { 317 | Expression abc = Re.sequence(Re.negativeLookbehind("def"), "abc"); 318 | Pattern p = Pattern.compile(abc); 319 | assertTrue(p.matcher("def123abc").find()); 320 | assertFalse(p.matcher("123defabc").find()); 321 | assertTrue(p.matcher("def123abc123def").find()); 322 | assertFalse(p.matcher("defabc123").find()); 323 | } 324 | 325 | @Test 326 | public void negativeLookaheadTest() { 327 | Expression abc = Re.sequence("abc", Re.negativeLookahead("def")); 328 | Pattern p = Pattern.compile(abc); 329 | assertTrue(p.matcher("123abc").find()); 330 | assertFalse(p.matcher("123abcdef").find()); 331 | assertTrue(p.matcher("123abc123def").find()); 332 | assertFalse(p.matcher("abcdef123").find()); 333 | } 334 | 335 | @Test 336 | public void apacheLogLine() { 337 | String logLine = 338 | "127.0.0.1 - - [21/Jul/2014:9:55:27 -0800] \"GET /home.html HTTP/1.1\" 200 2048"; 339 | // "^(\\S+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) 340 | // (\\d+)"; 341 | 342 | CaptureGroup ip, client, user, dateTime, method, request, protocol, responseCode, size; 343 | Expression nonWhitespace = repeat1(CharClass.nonWhitespaceChar()); 344 | 345 | ip = captureNamed("ip", nonWhitespace); 346 | client = capture(nonWhitespace); 347 | user = capture(nonWhitespace); 348 | dateTime = capture(sequence(repeat1(CharClass.union(CharClass.wordChar(), ':', '/')), // 21/Jul/2014:9:55:27 349 | CharClass.whitespaceChar(), CharClass.oneOf("+\\-"), // - 350 | repeat(CharClass.digit(), 4) // 0800 351 | )); 352 | method = capture(nonWhitespace); 353 | request = capture(nonWhitespace); 354 | protocol = capture(nonWhitespace); 355 | responseCode = captureNamed("code", repeat(CharClass.digit(), 3)); 356 | size = capture(repeat1(CharClass.digit())); 357 | 358 | Pattern p = Pattern.compile(sequence(CharClass.beginInput(), ip, ' ', client, ' ', user, 359 | " [", dateTime, "] \"", method, ' ', request, ' ', protocol, "\" ", responseCode, 360 | ' ', size, CharClass.endInput())); 361 | 362 | Matcher m = p.matcher(logLine); 363 | assertTrue(m.matches()); 364 | assertEquals("127.0.0.1", m.group(ip)); 365 | assertEquals("-", m.group(client)); 366 | assertEquals("-", m.group(user)); 367 | assertEquals("21/Jul/2014:9:55:27 -0800", m.group(dateTime)); 368 | assertEquals("GET", m.group(method)); 369 | assertEquals("/home.html", m.group(request)); 370 | assertEquals("HTTP/1.1", m.group(protocol)); 371 | assertEquals("200", m.group(responseCode)); 372 | assertEquals("2048", m.group(size)); 373 | assertEquals("127.0.0.1 - /home.html - 200", 374 | m.replaceAll(replacement(ip, " - ", request, " - ", responseCode))); 375 | assertEquals("127.0.0.1 - /home.html - 200", 376 | m.replaceFirst(replacement(ip, " - ", request, " - ", responseCode))); 377 | } 378 | } 379 | --------------------------------------------------------------------------------