├── .gitignore ├── LICENSE ├── bin ├── os-name.sh └── os-arch.sh ├── src ├── main │ └── java │ │ └── com │ │ └── logentries │ │ └── re2 │ │ ├── Encoding.java │ │ ├── RegExprException.java │ │ ├── entity │ │ ├── CaptureGroup.java │ │ └── NamedGroup.java │ │ ├── LibraryLoader.java │ │ ├── Options.h │ │ ├── UTF8CharOffset.java │ │ ├── RE2String.java │ │ ├── RE2.h │ │ ├── EmbeddedLibraryTools.java │ │ ├── RE2Matcher.java │ │ ├── Options.java │ │ ├── RE2.java │ │ ├── op.h │ │ └── RE2.cpp └── test │ └── java │ └── com │ └── logentries │ └── re2_test │ ├── GenString.java │ ├── TestExceptions.java │ ├── Main.java │ ├── TestUtf8CharOffset.java │ ├── TestThreads.java │ ├── GenRegExpr.java │ ├── TestRandomExpr.java │ └── TestMatcherFind.java ├── Makefile ├── pom.xml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.re2* 2 | *.iml 3 | obj/ 4 | re2* 5 | target/ 6 | src/main/resources 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This library can be distributed and used under the terms of The BSD 3-Clause License. Text of the license can be found here: http://opensource.org/licenses/BSD-3-Clause . 2 | -------------------------------------------------------------------------------- /bin/os-name.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | UNAME=$(uname -o) 4 | 5 | if [[ "$UNAME" == *Linux* ]]; then 6 | echo 'Linux' 7 | else 8 | echo 'unknown' 9 | fi 10 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/Encoding.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | public enum Encoding { 11 | UTF8, Latin1; 12 | } 13 | -------------------------------------------------------------------------------- /bin/os-arch.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | UNAME=$(uname -i) 4 | 5 | # RHEL seems to use -m flag 6 | if [ "$UNAME" == 'unknown' ]; then 7 | UNAME=$(uname -m) 8 | fi 9 | 10 | if [ "$UNAME" == 'x86_64' ]; then 11 | echo 'amd64' 12 | elif [ "$UNAME" == 'amd64' ]; then 13 | echo 'amd64' 14 | else 15 | echo 'unknown' 16 | fi 17 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/RegExprException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | public class RegExprException extends Exception { 11 | public RegExprException(final String msg) { 12 | super(msg); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/entity/CaptureGroup.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2.entity; 2 | 3 | /** 4 | * Matching text and the location of that text. 5 | */ 6 | public class CaptureGroup { 7 | public final int start, end; 8 | public final String matchingText; 9 | 10 | public CaptureGroup(final String matchingText, final int start, final int end) { 11 | this.matchingText = matchingText; 12 | this.start = start; 13 | this.end = end; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/LibraryLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | public class LibraryLoader { 11 | static { 12 | if (!EmbeddedLibraryTools.LOADED_RE2) { 13 | System.loadLibrary("re2"); 14 | } 15 | if (!EmbeddedLibraryTools.LOADED_RE2_JAVA) { 16 | System.loadLibrary("re2-java"); 17 | } 18 | } 19 | 20 | protected LibraryLoader() { } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/Options.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_logentries_re2_Options */ 4 | 5 | #ifndef _Included_com_logentries_re2_Options 6 | #define _Included_com_logentries_re2_Options 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_logentries_re2_Options 12 | * Method: setDefaults 13 | * Signature: ()V 14 | */ 15 | JNIEXPORT void JNICALL Java_com_logentries_re2_Options_setDefaults 16 | (JNIEnv *, jobject); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/entity/NamedGroup.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2.entity; 2 | 3 | /** 4 | * Name, matching text and the location of that text. 5 | */ 6 | public class NamedGroup { 7 | public final String name; 8 | public final CaptureGroup captureGroup; 9 | 10 | public NamedGroup(final String name, final CaptureGroup captureGroup) { 11 | this.name = name; 12 | this.captureGroup = captureGroup; 13 | } 14 | 15 | public NamedGroup(final String name, final String matchingText, final int start, final int end) { 16 | this(name, new CaptureGroup(matchingText, start, end)); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/GenString.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import java.util.Collection; 4 | import java.util.List; 5 | import java.util.ArrayList; 6 | import java.util.Random; 7 | 8 | public class GenString { 9 | private List mAlphabet; 10 | private int mMaxLen; 11 | private Random mRand = new Random(); 12 | 13 | public GenString(final List alphabet, final int maxLen) { 14 | mAlphabet = new ArrayList(alphabet); 15 | mMaxLen = maxLen; 16 | } 17 | 18 | public String next() { 19 | final int len = mRand.nextInt(100) == 0 ? mRand.nextInt(mMaxLen) : mRand.nextInt(mMaxLen - 1) + 1; 20 | final int asize = mAlphabet.size(); 21 | String ret = new String(); 22 | for (int i = 0; i < len; ++i) { 23 | ret += mAlphabet.get(mRand.nextInt(asize)); 24 | } 25 | return ret; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/TestExceptions.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import com.logentries.re2.RE2; 4 | import com.logentries.re2.Options; 5 | import com.logentries.re2.RegExprException; 6 | 7 | import org.junit.Test; 8 | import static org.junit.Assert.assertEquals; 9 | import static org.junit.Assert.assertTrue; 10 | import static org.junit.Assert.assertFalse; 11 | import static org.junit.Assert.assertNotNull; 12 | import static org.junit.Assert.fail; 13 | 14 | public class TestExceptions { 15 | @Test 16 | public void testCorrect() { 17 | try { 18 | assertNotNull(new RE2("Everything Works")); 19 | } catch (RegExprException e) { 20 | fail(); 21 | } 22 | } 23 | 24 | @Test(expected=RegExprException.class) 25 | public void testWrong() throws RegExprException { 26 | try { 27 | RE2 re2 = new RE2("(Nothing Works", new Options().setLogErrors(false)); 28 | System.err.println("re2 = " + re2); 29 | } catch (RegExprException e) { 30 | System.err.println("Exdeption thrown, msg: " + e.getMessage()); 31 | throw e; 32 | } catch (Throwable e) { 33 | e.printStackTrace(); 34 | } 35 | } 36 | } 37 | 38 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/Main.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import org.junit.runner.JUnitCore; 4 | import org.junit.runner.Result; 5 | import org.junit.runner.notification.Failure; 6 | 7 | public class Main { 8 | private static void testThreads() { 9 | Result result = JUnitCore.runClasses(TestThreads.class); 10 | for (Failure failure : result.getFailures()) { 11 | System.out.println(failure.toString()); 12 | } 13 | } 14 | 15 | private static void testRandomExpr() { 16 | Result result = JUnitCore.runClasses(TestRandomExpr.class); 17 | for (Failure failure : result.getFailures()) { 18 | System.out.println(failure.toString()); 19 | } 20 | } 21 | 22 | public static void main(String[] args) { 23 | testThreads(); 24 | testRandomExpr(); 25 | 26 | /* 27 | System.err.println("Generating random sequences"); 28 | for (int i = 0; i < 30; ++i) { 29 | System.err.println(new GenRegExpr(Arrays.asList("aaa", "b", "ccc"), 3, 12).next()); 30 | } 31 | System.err.println("Generating random strings"); 32 | final GenString gs = new GenString(Arrays.asList("aaa", "b", "ccc"), 12); 33 | for (int i = 0; i < 30; ++i) { 34 | System.err.println(gs.next()); 35 | } 36 | */ 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Inspired by https://github.com/xerial/snappy-java/blob/develop/Makefile . 3 | # 4 | 5 | OBJ=obj 6 | MVN=mvn 7 | NATIVES-TARGET=src/main/resources/NATIVE/$(shell bin/os-arch.sh)/$(shell bin/os-name.sh) 8 | 9 | all: build 10 | build: $(OBJ)/libre2-java.so class 11 | 12 | .re2.download.stamp: 13 | # hg clone https://re2.googlecode.com/hg re2 14 | wget http://re2.googlecode.com/files/re2-20140304.tgz -O re2.tgz 15 | tar xvf re2.tgz 16 | touch .re2.download.stamp 17 | 18 | .re2.compile.stamp: .re2.download.stamp 19 | cd re2 && make 20 | touch .re2.compile.stamp 21 | 22 | $(OBJ)/RE2.o: .re2.download.stamp $(addprefix src/main/java/com/logentries/re2/, RE2.cpp RE2.h) 23 | mkdir -p $(OBJ) 24 | $(CXX) -O3 -g -fPIC -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux -Ire2 -c src/main/java/com/logentries/re2/RE2.cpp -o $(OBJ)/RE2.o 25 | 26 | $(OBJ)/libre2-java.so: $(OBJ)/RE2.o .re2.compile.stamp 27 | $(CXX) -shared -Wl,-soname,libre2-java.so -o $(OBJ)/libre2-java.so $(OBJ)/RE2.o -Lre2/obj/so -lre2 -lpthread 28 | strip $(OBJ)/* 29 | 30 | class: build-class 31 | 32 | build-class: target/libre2-java-1.0-SNAPSHOT.jar 33 | 34 | target/libre2-java-1.0-SNAPSHOT.jar: add-so 35 | $(MVN) package -Dmaven.test.skip=true 36 | 37 | add-so: .re2.compile.stamp $(OBJ)/libre2-java.so 38 | mkdir -p $(NATIVES-TARGET) 39 | cp $(OBJ)/libre2-java.so re2/obj/so/libre2.so $(NATIVES-TARGET) 40 | 41 | lib: add-so 42 | 43 | clean: 44 | rm -fr re2 45 | rm -f re2.tgz 46 | rm -fr obj 47 | rm -fr target 48 | rm -fr src/main/resources/NATIVE 49 | rm -f .*.stamp 50 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.logentries.re2 5 | libre2-java 6 | jar 7 | 1.2.5 8 | libre2-java 9 | http://maven.apache.org 10 | 11 | 12 | 13 | junit 14 | junit 15 | 4.11 16 | test 17 | 18 | 19 | 20 | 21 | 22 | 23 | org.apache.maven.plugins 24 | maven-compiler-plugin 25 | 3.2 26 | 27 | 1.8 28 | 1.8 29 | 30 | 31 | 32 | org.apache.maven.plugins 33 | maven-source-plugin 34 | 2.2.1 35 | 36 | 37 | attach-sources 38 | 39 | jar 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | org.apache.maven.plugins 48 | maven-surefire-plugin 49 | 2.17 50 | 51 | true 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/UTF8CharOffset.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2; 2 | 3 | import java.nio.charset.StandardCharsets; 4 | 5 | public class UTF8CharOffset { 6 | 7 | static float AVG_BYTE_PER_CHAR = StandardCharsets.UTF_8.newEncoder().averageBytesPerChar(); 8 | 9 | private int[] byte2char; 10 | private int byteSize; 11 | private int[] char2byte; 12 | private int charSize; 13 | public UTF8CharOffset(CharSequence input) { 14 | this(input, (int)(input.length() * AVG_BYTE_PER_CHAR)); 15 | } 16 | public UTF8CharOffset(CharSequence input, int utf8Len) { 17 | char2byte = new int[input.length()]; 18 | charSize = input.length(); 19 | byte2char = new int[utf8Len]; 20 | byteSize = 0; 21 | int strPos = 0; 22 | for (int i=0; i byteSize) throw new IndexOutOfBoundsException(""+bytePos); 56 | if (bytePos == byteSize) return charSize; 57 | else return byte2char[bytePos]; 58 | } 59 | 60 | public int fromStringToByte(int charPos) { 61 | if (charPos < 0) throw new IndexOutOfBoundsException(""+charPos); 62 | if (charPos > charSize) throw new IndexOutOfBoundsException(""+charPos); 63 | if (charPos == charSize) return byteSize; 64 | else return char2byte[charPos]; 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/RE2String.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2; 2 | 3 | import java.nio.ByteBuffer; 4 | import java.nio.CharBuffer; 5 | import java.nio.charset.CharsetEncoder; 6 | import java.nio.charset.CodingErrorAction; 7 | import java.nio.charset.StandardCharsets; 8 | 9 | public class RE2String implements CharSequence, AutoCloseable { 10 | 11 | private static native long createStringBuffer(final byte[] input); 12 | private static native void releaseStringBuffer(final byte[] input, final long pointer); 13 | 14 | private CharSequence input; 15 | private byte[] utf8CString; 16 | private long utf8StringPointer = 0; 17 | private UTF8CharOffset utf8Offset; 18 | 19 | 20 | public RE2String(CharSequence input) { 21 | this.input = input; 22 | try { 23 | this.utf8CString = createUtf8CString(input); 24 | } catch (Exception e ){ 25 | throw new IllegalArgumentException("Unable to encode input using UTF-8", e); 26 | } 27 | this.utf8StringPointer = createStringBuffer(utf8CString); 28 | this.utf8Offset = new UTF8CharOffset(input); 29 | } 30 | 31 | public int bytePos(int charPosition) { 32 | check(); 33 | return utf8Offset.fromStringToByte(charPosition); 34 | } 35 | public int charPos(int bytePosition) { 36 | check(); 37 | return utf8Offset.fromByteToChar(bytePosition); 38 | } 39 | public boolean isClosed() { 40 | return utf8StringPointer == 0; 41 | } 42 | 43 | /** 44 | * @deprecated 45 | */ 46 | @Deprecated() 47 | long pointer() { 48 | return utf8StringPointer; 49 | } 50 | 51 | 52 | private void check() { 53 | if (utf8StringPointer == 0) 54 | throw new IllegalStateException("Buffer has been already closed!"); 55 | } 56 | 57 | private void free() { 58 | if (utf8StringPointer != 0) { 59 | releaseStringBuffer(utf8CString, utf8StringPointer); 60 | utf8StringPointer = 0; 61 | } 62 | } 63 | @Override 64 | public void close() { 65 | free(); 66 | } 67 | 68 | @Override 69 | protected void finalize() throws Throwable { 70 | free(); 71 | super.finalize(); 72 | } 73 | 74 | 75 | @Override 76 | public int length() { 77 | return input.length(); 78 | } 79 | 80 | @Override 81 | public char charAt(int index) { 82 | return input.charAt(index); 83 | } 84 | 85 | @Override 86 | public CharSequence subSequence(int start, int end) { 87 | return input.subSequence(start, end); 88 | } 89 | 90 | @Override 91 | public String toString() { 92 | return input.toString(); 93 | } 94 | 95 | private byte[] createUtf8CString(CharSequence s) throws Exception { 96 | CharsetEncoder encoder = StandardCharsets.UTF_8.newEncoder() 97 | .onMalformedInput(CodingErrorAction.REPLACE) 98 | .onUnmappableCharacter(CodingErrorAction.REPLACE); 99 | 100 | ByteBuffer bytes = encoder.encode(CharBuffer.wrap(s)); 101 | 102 | if (bytes.limit() == bytes.capacity()) { 103 | ByteBuffer newBuffer = ByteBuffer.allocate(bytes.limit()+1); 104 | System.arraycopy(bytes.array(), 0, newBuffer.array(), 0, bytes.limit()); 105 | bytes = newBuffer; 106 | } else 107 | bytes.limit(bytes.limit()+1); 108 | 109 | bytes.put(bytes.limit()-1, (byte) 0); 110 | return bytes.array(); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/RE2.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class com_logentries_re2_RE2 */ 4 | 5 | #ifndef _Included_com_logentries_re2_RE2 6 | #define _Included_com_logentries_re2_RE2 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: com_logentries_re2_RE2 12 | * Method: compileImpl 13 | * Signature: (Ljava/lang/String;Lcom/logentries/re2/Options;)J 14 | */ 15 | JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2_compileImpl 16 | (JNIEnv *, jclass, jstring, jobject); 17 | 18 | /* 19 | * Class: com_logentries_re2_RE2 20 | * Method: releaseImpl 21 | * Signature: (J)V 22 | */ 23 | JNIEXPORT void JNICALL Java_com_logentries_re2_RE2_releaseImpl 24 | (JNIEnv *, jclass, jlong); 25 | 26 | /* 27 | * Class: com_logentries_re2_RE2 28 | * Method: fullMatchImpl 29 | * Signature: (Ljava/lang/String;J[Ljava/lang/Object;)Z 30 | */ 31 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_fullMatchImpl__Ljava_lang_String_2J_3Ljava_lang_Object_2 32 | (JNIEnv *, jclass, jstring, jlong, jobjectArray); 33 | 34 | /* 35 | * Class: com_logentries_re2_RE2 36 | * Method: partialMatchImpl 37 | * Signature: (Ljava/lang/String;J[Ljava/lang/Object;)Z 38 | */ 39 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_partialMatchImpl__Ljava_lang_String_2J_3Ljava_lang_Object_2 40 | (JNIEnv *, jclass, jstring, jlong, jobjectArray); 41 | 42 | /* 43 | * Class: com_logentries_re2_RE2 44 | * Method: fullMatchImpl 45 | * Signature: (Ljava/lang/String;Ljava/lang/String;[Ljava/lang/Object;)Z 46 | */ 47 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_fullMatchImpl__Ljava_lang_String_2Ljava_lang_String_2_3Ljava_lang_Object_2 48 | (JNIEnv *, jclass, jstring, jstring, jobjectArray); 49 | 50 | /* 51 | * Class: com_logentries_re2_RE2 52 | * Method: partialMatchImpl 53 | * Signature: (Ljava/lang/String;Ljava/lang/String;[Ljava/lang/Object;)Z 54 | */ 55 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_partialMatchImpl__Ljava_lang_String_2Ljava_lang_String_2_3Ljava_lang_Object_2 56 | (JNIEnv *, jclass, jstring, jstring, jobjectArray); 57 | 58 | /* 59 | * Class: com_logentries_re2_RE2 60 | * Method: captureGroupNamesImpl 61 | * Signature: (J[Ljava/lang/Object;)Ljava/util/List; 62 | */ 63 | JNIEXPORT jobject JNICALL Java_com_logentries_re2_RE2_getCaptureGroupNamesImpl 64 | (JNIEnv *, jclass, jlong, jobjectArray); 65 | 66 | /* 67 | * Class: com_logentries_re2_RE2 68 | * Method: numberOfCapturingGroupsImpl 69 | * Signature: (J)I 70 | */ 71 | JNIEXPORT jint JNICALL Java_com_logentries_re2_RE2_numberOfCapturingGroupsImpl 72 | (JNIEnv *, jclass, jlong); 73 | 74 | 75 | /* 76 | * Class: com_logentries_re2_RE2String 77 | * Method: createStringBuffer 78 | * Signature: ([B)J 79 | */ 80 | JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2String_createStringBuffer 81 | (JNIEnv *, jclass, jbyteArray); 82 | 83 | /* 84 | * Class: com_logentries_re2_RE2String 85 | * Method: releaseStringBuffer 86 | * Signature: ([BJ)V 87 | */ 88 | JNIEXPORT void JNICALL Java_com_logentries_re2_RE2String_releaseStringBuffer 89 | (JNIEnv *, jclass, jbyteArray, jlong); 90 | 91 | 92 | /* 93 | * Class: com_logentries_re2_RE2Matcher 94 | * Method: findImpl 95 | * Signature: (Ljava/lang/Object;JJIII)Z 96 | */ 97 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2Matcher_findImpl 98 | (JNIEnv *, jclass, jobject, jlong, jlong, jint, jint, jint); 99 | 100 | #ifdef __cplusplus 101 | } 102 | #endif 103 | #endif 104 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/EmbeddedLibraryTools.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | /* 11 | * Inspired by https://github.com/zeromq/jzmq/tree/master/src/org/zeromq . 12 | */ 13 | 14 | import java.io.BufferedOutputStream; 15 | import java.io.File; 16 | import java.io.FileOutputStream; 17 | import java.io.IOException; 18 | import java.io.InputStream; 19 | import java.io.OutputStream; 20 | import java.net.URL; 21 | import java.util.ArrayList; 22 | import java.util.Collection; 23 | import java.util.Enumeration; 24 | import java.util.List; 25 | import java.util.jar.JarEntry; 26 | import java.util.jar.JarFile; 27 | 28 | public class EmbeddedLibraryTools { 29 | public static final boolean VERBOSE = Boolean.parseBoolean(System.getProperty("verbose","false")); 30 | 31 | public static final boolean LOADED_RE2; 32 | public static final boolean LOADED_RE2_JAVA; 33 | 34 | static { 35 | LOADED_RE2 = loadEmbeddedLibrary("libre2"); 36 | LOADED_RE2_JAVA = LOADED_RE2 && loadEmbeddedLibrary("libre2-java"); 37 | } 38 | 39 | public static String getCurrentPlatformIdentifier() { 40 | String osName = System.getProperty("os.name"); 41 | if (osName.toLowerCase().indexOf("windows") > -1) { 42 | osName = "Windows"; 43 | } 44 | return System.getProperty("os.arch") + "/" + osName; 45 | } 46 | 47 | private static boolean loadEmbeddedLibrary(final String name) { 48 | // attempt to locate embedded native library within JAR at following location: 49 | // /NATIVE/${os.arch}/${os.name}/libre2{,-java}.[so|dylib|dll] 50 | String[] allowedExtensions = new String[]{"so", "dylib", "dll",}; 51 | StringBuilder url = new StringBuilder(); 52 | url.append("/NATIVE/"); 53 | url.append(getCurrentPlatformIdentifier()); 54 | url.append('/'); 55 | url.append(name); 56 | url.append('.'); 57 | URL nativeLibraryUrl = null; 58 | //System.out.println(url.toString()); 59 | // loop through extensions, stopping after finding first one 60 | for (String ext : allowedExtensions) { 61 | if (VERBOSE) { 62 | System.err.println("Looking for native library: " + url.toString() + ext); 63 | } 64 | nativeLibraryUrl = RE2.class.getResource(url.toString() + ext); 65 | if (nativeLibraryUrl != null) 66 | break; 67 | } 68 | // 69 | if (nativeLibraryUrl != null) { 70 | // native library found within JAR, extract and load 71 | try { 72 | final File libfile = File.createTempFile(name, ".lib"); 73 | libfile.deleteOnExit(); // just in case 74 | // 75 | final InputStream in = nativeLibraryUrl.openStream(); 76 | final OutputStream out = new BufferedOutputStream(new FileOutputStream(libfile)); 77 | // 78 | int len = 0; 79 | byte[] buffer = new byte[8192]; 80 | while ((len = in.read(buffer)) > -1) 81 | out.write(buffer, 0, len); 82 | out.close(); 83 | in.close(); 84 | System.load(libfile.getAbsolutePath()); 85 | //do not delete the lib file now, in certain environments this 86 | //may lead library loading to fail 87 | if (VERBOSE) System.out.println("Loaded "+nativeLibraryUrl.toString()); 88 | 89 | //libfile.delete(); 90 | return true; 91 | } catch (IOException x) { 92 | if (VERBOSE) x.printStackTrace(); 93 | // mission failed, do nothing 94 | } 95 | } // nativeLibraryUrl exists 96 | return false; 97 | } 98 | 99 | private EmbeddedLibraryTools() { 100 | } 101 | 102 | ; 103 | } 104 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/TestUtf8CharOffset.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import com.logentries.re2.UTF8CharOffset; 4 | import org.hamcrest.CoreMatchers; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | import org.junit.runner.RunWith; 8 | import org.junit.runners.Parameterized; 9 | 10 | import java.nio.ByteBuffer; 11 | import java.nio.CharBuffer; 12 | import java.nio.charset.Charset; 13 | import java.nio.charset.StandardCharsets; 14 | import java.util.ArrayList; 15 | import java.util.Arrays; 16 | import java.util.Collection; 17 | import java.util.List; 18 | 19 | import static java.util.Arrays.asList; 20 | import static org.hamcrest.CoreMatchers.equalTo; 21 | import static org.junit.Assert.assertEquals; 22 | import static org.junit.Assert.assertThat; 23 | 24 | @RunWith(Parameterized.class) 25 | public class TestUtf8CharOffset { 26 | 27 | @Parameterized.Parameters(name = "{0}") 28 | public static Collection data() { 29 | // input char -> byte byte -> char 30 | 31 | return asList(new Object[][]{{ 32 | "abcd efg", asList(0,1,2,3,4,5,6,7), asList(0,1,2,3,4,5,6,7), 33 | },{ 34 | "abcd èfg", asList(0,1,2,3,4,5,7,8), asList(0,1,2,3,4,5,5,6,7) 35 | },{ 36 | "abcd €fg", asList(0,1,2,3,4,5,8,9), asList(0,1,2,3,4,5,5,5,6,7) 37 | },{ 38 | "abcd €€€", asList(0,1,2,3,4,5,8,11), asList(0,1,2,3,4,5,5,5,6,6,6,7,7,7) 39 | },{ 40 | "àbcd €fg", asList(0,2,3,4,5,6,9,10), asList(0,0,1,2,3,4,5,5,5,6,7) 41 | },{ 42 | "a\uD83D\uDC36cd efg", asList(0,1,1,5,6,7,8,9,10), asList(0,1,1,1,1,3,4,5,6,7,8) 43 | },{ 44 | // but why on earth we have to spend time to support chars!!!!! 45 | "\uD83D\uDC36\uD83D\uDC3Ecd efg", asList(0,0,4,4,8,9,10,11,12,13), asList(0,0,0,0,2,2,2,2,4,5,6,7,8,9) 46 | },{ 47 | "\uD83D\uDD0D sp", asList(0,0,4,5,6), asList(0,0,0,0,2,3,4) 48 | }}); 49 | } 50 | 51 | @Parameterized.Parameter(value = 0) 52 | public String input; 53 | @Parameterized.Parameter(value = 1) 54 | public List char2byte; 55 | @Parameterized.Parameter(value = 2) 56 | public List byte2char; 57 | 58 | @Test 59 | public void test() throws Exception { 60 | 61 | UTF8CharOffset offset = new UTF8CharOffset(input); 62 | byte[] utf8 = input.getBytes("UTF-8"); 63 | 64 | assertEquals("check test consistency: ", utf8.length, byte2char.size()); 65 | assertEquals("check test consistency: ", input.length(), char2byte.size()); 66 | 67 | List myChar2Byte = new ArrayList<>(); 68 | for (int i=0; i myByte2Char = new ArrayList<>(); 70 | for (int i=0; i { 9 | 10 | private static native boolean findImpl( 11 | final Object matcher, 12 | final long re2_pointer, 13 | final long str_pointer, 14 | final int fetch_groups, 15 | final int start, 16 | final int end 17 | ); 18 | 19 | static class Range { 20 | int start, end; 21 | static Range of(int start, int end) { 22 | Range r = new Range(); 23 | r.start = start; 24 | r.end = end; 25 | return r; 26 | } 27 | } 28 | 29 | public static void addGroup(RE2Matcher obj, int start, int end) { 30 | if (start >= 0 && end >= 0) { 31 | start = obj.utf8input.charPos(start); 32 | end = obj.utf8input.charPos(end); 33 | } 34 | obj.groups.add(Range.of(start, end)); 35 | } 36 | 37 | 38 | private ArrayList groups; 39 | protected RE2String utf8input; 40 | protected RE2String managedString; 41 | protected long re2Pointer = 0; 42 | protected RE2 regex; 43 | private boolean matched; 44 | private boolean fetchGroups; 45 | 46 | RE2Matcher(RE2String input, RE2 regex, long re2Pointer, boolean fetchGroups) { 47 | this.utf8input = input; 48 | this.matched = false; 49 | this.groups = new ArrayList<>(fetchGroups? regex.numberOfCapturingGroups() + 1 : 1); 50 | this.re2Pointer = re2Pointer; 51 | this.regex = regex; //to avoid that re2Pointer could be garbaged 52 | this.fetchGroups = fetchGroups; 53 | this.managedString = null; 54 | } 55 | 56 | 57 | RE2Matcher(CharSequence input, RE2 regex, long re2Pointer, boolean fetchGroups) { 58 | this(new RE2String(input), regex, re2Pointer, fetchGroups); 59 | this.managedString = utf8input; 60 | 61 | } 62 | public void close() { 63 | if (managedString != null) 64 | managedString.close(); 65 | } 66 | 67 | 68 | 69 | public boolean found() { 70 | return matched; 71 | } 72 | 73 | public boolean findNext() { 74 | if (!matched) return find(); 75 | else return find(end(0)); 76 | } 77 | 78 | public boolean find() { 79 | return find(0); 80 | } 81 | public boolean find(int start) { 82 | return find(start, utf8input.length()); 83 | } 84 | 85 | public boolean find(int start, int end) { 86 | groups.clear(); 87 | matched = false; 88 | 89 | if (utf8input.isClosed()) throw new IllegalStateException("String buffer has been already closed"); 90 | if (regex.isClosed()) throw new IllegalStateException("Regex has been already closed"); 91 | 92 | start = utf8input.bytePos(start); 93 | end = utf8input.bytePos(end); 94 | int ngroups = fetchGroups ? regex.numberOfCapturingGroups() + 1 : 1; 95 | @SuppressWarnings("deprecation") 96 | long stringPointer = utf8input.pointer(); 97 | return matched = findImpl(this, re2Pointer, stringPointer, ngroups, start, end); 98 | } 99 | 100 | private void checkGroup(int group) { 101 | if (!matched) throw new IllegalStateException("The pattern has not been matched!"); 102 | if (group >= groups.size()) throw new IllegalStateException("Group n. "+group+" is not in pattern!"); 103 | } 104 | 105 | @Override 106 | public int start() { 107 | return start(0); 108 | } 109 | 110 | @Override 111 | public int start(int group) { 112 | checkGroup(group); 113 | return groups.get(group).start; 114 | } 115 | 116 | @Override 117 | public int end() { 118 | return end(0); 119 | } 120 | 121 | @Override 122 | public int end(int group) { 123 | checkGroup(group); 124 | return groups.get(group).end; 125 | } 126 | 127 | @Override 128 | public String group() { 129 | return group(0); 130 | } 131 | 132 | @Override 133 | public String group(int group) { 134 | checkGroup(group); 135 | if (groups.get(group).start < 0) 136 | return null; 137 | else 138 | return utf8input.subSequence(groups.get(group).start, groups.get(group).end).toString(); 139 | } 140 | 141 | @Override 142 | public int groupCount() { 143 | checkGroup(0); 144 | return groups.size(); 145 | } 146 | 147 | @Override 148 | public Iterator iterator() { 149 | 150 | return new Iterator() { 151 | boolean moved = false; 152 | boolean hasnext = false; 153 | @Override 154 | public boolean hasNext() { 155 | if (!moved) { 156 | hasnext = findNext(); 157 | moved = true; 158 | } 159 | return hasnext; 160 | } 161 | @Override 162 | public MatchResult next() { 163 | if (hasNext()) { 164 | moved = false; 165 | return RE2Matcher.this; 166 | } else 167 | throw new NoSuchElementException(); 168 | } 169 | @Override 170 | public void remove() { 171 | throw new UnsupportedOperationException(); 172 | } 173 | }; 174 | } 175 | 176 | @Override 177 | public String toString() { 178 | StringBuffer buffer = new StringBuffer(); 179 | buffer.append(matched); 180 | for (int i=0; i 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | public final class Options extends LibraryLoader { 11 | private Encoding encoding; 12 | private boolean posixSyntax; 13 | private boolean longestMatch; 14 | private boolean logErrors; 15 | private long maxMem; 16 | private boolean literal; 17 | private boolean neverNl; 18 | private boolean neverCapture; 19 | private boolean caseSensitive; 20 | private boolean perlClasses; 21 | private boolean wordBoundary; 22 | private boolean oneLine; 23 | 24 | private native void setDefaults(); 25 | 26 | public Options() { 27 | setDefaults(); 28 | } 29 | 30 | public Options setEncoding(final Encoding encoding) { 31 | this.encoding = encoding; 32 | return this; 33 | } 34 | public Options setPosixSyntax(final boolean posixSyntax) { 35 | this.posixSyntax = posixSyntax; 36 | return this; 37 | } 38 | public Options setLongestMatch(final boolean longestMatch) { 39 | this.longestMatch = longestMatch; 40 | return this; 41 | } 42 | public Options setLogErrors(final boolean logErrors) { 43 | this.logErrors = logErrors; 44 | return this; 45 | } 46 | public Options setMaxMem(final long maxMem) { 47 | this.maxMem = maxMem; 48 | return this; 49 | } 50 | public Options setLiteral(final boolean literal) { 51 | this.literal = literal; 52 | return this; 53 | } 54 | public Options setNeverNl(final boolean neverNl) { 55 | this.neverNl = neverNl; 56 | return this; 57 | } 58 | public Options setNeverCapture(final boolean neverCapture) { 59 | this.neverCapture = neverCapture; 60 | return this; 61 | } 62 | public Options setCaseSensitive(final boolean caseSensitive) { 63 | this.caseSensitive = caseSensitive; 64 | return this; 65 | } 66 | public Options setCaseInsensitive(final boolean caseInsensitive) { 67 | this.caseSensitive = !caseInsensitive; 68 | return this; 69 | } 70 | public Options setPerlClasses(final boolean perlClasses) { 71 | this.perlClasses = perlClasses; 72 | return this; 73 | } 74 | public Options setWordBoundary(final boolean wordBoundary) { 75 | this.wordBoundary = wordBoundary; 76 | return this; 77 | } 78 | public Options setOneLine(final boolean oneLine) { 79 | this.oneLine = oneLine; 80 | return this; 81 | } 82 | 83 | /// FLAGS 84 | public static interface Flag { 85 | public void apply(Options opt); 86 | } 87 | 88 | public static final Flag POSIX_SINTAX = POSIX_SINTAX(true); 89 | public static Flag POSIX_SINTAX(final boolean v) { 90 | return new Flag() { 91 | @Override 92 | public void apply(Options opt) { 93 | opt.setPosixSyntax(v); 94 | } 95 | }; 96 | } 97 | public static final Flag LONGEST_MATCH = LONGEST_MATCH (true); 98 | public static Flag LONGEST_MATCH(final boolean v) { 99 | return new Flag() { 100 | @Override 101 | public void apply(Options opt) { 102 | opt.setLongestMatch(v); 103 | } 104 | }; 105 | } 106 | public static final Flag LOG_ERRORS = LOG_ERRORS (true); 107 | public static Flag LOG_ERRORS(final boolean v) { 108 | return new Flag() { 109 | @Override 110 | public void apply(Options opt) { 111 | opt.setLogErrors(v); 112 | } 113 | }; 114 | } 115 | public static final Flag LITERAL = LITERAL (true); 116 | public static Flag LITERAL(final boolean v) { 117 | return new Flag() { 118 | @Override 119 | public void apply(Options opt) { 120 | opt.setLiteral(v); 121 | } 122 | }; 123 | } 124 | public static final Flag NEVER_NL = NEVER_NL (true); 125 | public static Flag NEVER_NL(final boolean v) { 126 | return new Flag() { 127 | @Override 128 | public void apply(Options opt) { 129 | opt.setNeverNl(v); 130 | } 131 | }; 132 | } 133 | public static final Flag NEVER_CAPTURE = NEVER_CAPTURE (true); 134 | public static Flag NEVER_CAPTURE(final boolean v) { 135 | return new Flag() { 136 | @Override 137 | public void apply(Options opt) { 138 | opt.setNeverCapture(v); 139 | } 140 | }; 141 | } 142 | public static final Flag CASE_SENSITIVE = CASE_SENSITIVE (true); 143 | public static Flag CASE_SENSITIVE(final boolean v) { 144 | return new Flag() { 145 | @Override 146 | public void apply(Options opt) { 147 | opt.setCaseSensitive(v); 148 | } 149 | }; 150 | } 151 | public static final Flag CASE_INSENSITIVE = CASE_INSENSITIVE (true); 152 | public static Flag CASE_INSENSITIVE(final boolean v) { 153 | return new Flag() { 154 | @Override 155 | public void apply(Options opt) { 156 | opt.setCaseInsensitive(v); 157 | } 158 | }; 159 | } 160 | public static final Flag PERL_CLASSES = PERL_CLASSES (true); 161 | public static Flag PERL_CLASSES(final boolean v) { 162 | return new Flag() { 163 | @Override 164 | public void apply(Options opt) { 165 | opt.setPerlClasses(v); 166 | } 167 | }; 168 | } 169 | public static final Flag WORD_BOUNDARY = WORD_BOUNDARY (true); 170 | public static Flag WORD_BOUNDARY(final boolean v) { 171 | return new Flag() { 172 | @Override 173 | public void apply(Options opt) { 174 | opt.setWordBoundary(v); 175 | } 176 | }; 177 | } 178 | public static final Flag ONE_LINE = ONE_LINE (true); 179 | public static Flag ONE_LINE(final boolean v) { 180 | return new Flag() { 181 | @Override 182 | public void apply(Options opt) { 183 | opt.setOneLine(v); 184 | } 185 | }; 186 | } 187 | 188 | public static Flag MAX_MEMORY(final long m) { 189 | return new Flag() { 190 | @Override 191 | public void apply(Options opt) { 192 | opt.setMaxMem(m); 193 | } 194 | }; 195 | } 196 | 197 | public static final Flag UTF8_ENCODING = ENCODING(Encoding.UTF8); 198 | public static final Flag LATIN1_ENCODING = ENCODING(Encoding.Latin1); 199 | public static Flag ENCODING(final Encoding e) { 200 | return new Flag() { 201 | @Override 202 | public void apply(Options opt) { 203 | opt.setEncoding(e); 204 | } 205 | }; 206 | } 207 | 208 | 209 | } 210 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/GenRegExpr.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import java.util.List; 4 | import java.util.Arrays; 5 | import java.util.Collection; 6 | import java.util.ArrayList; 7 | import java.util.Stack; 8 | import java.util.Random; 9 | 10 | public class GenRegExpr { 11 | public abstract class Operator { 12 | public abstract int getArity(); 13 | public abstract String call(String ... args); 14 | } 15 | 16 | public abstract class NullaryOperator extends Operator { 17 | public final int getArity() { 18 | return 0; 19 | } 20 | public String call(String ... args) { 21 | assert args.length == 0; 22 | return callNullary(); 23 | } 24 | protected abstract String callNullary(); 25 | } 26 | 27 | public abstract class UnaryOperator extends Operator { 28 | public final int getArity() { 29 | return 1; 30 | } 31 | public String call(String ... args) { 32 | assert args.length == 1; 33 | return callUnary(args[0]); 34 | } 35 | protected abstract String callUnary(final String arg); 36 | } 37 | 38 | public abstract class BinaryOperator extends Operator { 39 | public final int getArity() { 40 | return 2; 41 | } 42 | public String call(String ... args) { 43 | assert args.length == 2; 44 | return callBinary(args[0], args[1]); 45 | } 46 | protected abstract String callBinary(final String leftArg, final String rightArg); 47 | } 48 | 49 | public final class ConstOperator extends NullaryOperator { 50 | private final String m_val; 51 | 52 | public ConstOperator(final String val) { 53 | m_val = val; 54 | } 55 | 56 | protected String callNullary() { 57 | return m_val; 58 | } 59 | 60 | public String toString() { 61 | return "" + '"' + m_val + '"'; 62 | } 63 | } 64 | 65 | public final class OperatorUnaryStar extends UnaryOperator { 66 | protected String callUnary(final String arg) { 67 | return arg + '*'; 68 | } 69 | 70 | public String toString() { 71 | return "*"; 72 | } 73 | } 74 | 75 | public final class OperatorUnaryPlus extends UnaryOperator { 76 | protected String callUnary(final String arg) { 77 | return arg + '+'; 78 | } 79 | 80 | public String toString() { 81 | return "+"; 82 | } 83 | } 84 | 85 | public final class OperatorUnaryQM extends UnaryOperator { 86 | protected String callUnary(final String arg) { 87 | return arg + '?'; 88 | } 89 | 90 | public String toString() { 91 | return "?"; 92 | } 93 | } 94 | 95 | public final class OperatorBinaryConcat extends BinaryOperator { 96 | protected String callBinary(final String leftArg, final String rightArg) { 97 | return leftArg + rightArg; 98 | } 99 | 100 | public String toString() { 101 | return "<>"; 102 | } 103 | } 104 | 105 | public final class OperatorBinaryPipe extends BinaryOperator { 106 | protected String callBinary(final String leftArg, final String rightArg) { 107 | return leftArg + '|' + rightArg; 108 | } 109 | 110 | public String toString() { 111 | return "|"; 112 | } 113 | } 114 | 115 | /* Member Variables */ 116 | 117 | Random mRand = new Random(); 118 | 119 | // Operators that are not nullary 120 | private List mOps = Arrays.asList(new OperatorUnaryStar(), 121 | new OperatorUnaryPlus(), 122 | new OperatorUnaryQM(), 123 | new OperatorBinaryConcat(), 124 | new OperatorBinaryPipe() 125 | ); 126 | 127 | // Nullary operators 128 | private List mNullary; // Generated from input 129 | 130 | private int mMaxAtoms; 131 | private int mMaxOps; 132 | 133 | /* Member Functions */ 134 | 135 | public GenRegExpr(final Collection consts, final int maxAtoms, final int maxOps) { 136 | mMaxAtoms = maxAtoms; 137 | mMaxOps = maxOps; 138 | mNullary = new ArrayList(consts.size()); 139 | for (String s: consts) { 140 | mNullary.add(new ConstOperator(s)); 141 | } 142 | } 143 | 144 | protected String group(final String s) { 145 | return "(?:" + s + ")"; 146 | } 147 | 148 | protected String runPostfix(Stack opsStack) { 149 | /* 150 | String ret = "Stack"; 151 | for (Operator item: stack) { 152 | ret += " " + item.toString(); 153 | } 154 | return ret; 155 | */ 156 | Stack valsStack = new Stack(); 157 | for (Operator item: opsStack) { 158 | switch (item.getArity()) { 159 | case 0: 160 | valsStack.push(item.call()); 161 | break; 162 | case 1: 163 | final String arg = valsStack.pop(); 164 | valsStack.push( group(item.call(arg)) ); 165 | break; 166 | case 2: 167 | final String rightArg = valsStack.pop(); 168 | final String leftArg = valsStack.pop(); 169 | valsStack.push( group(item.call(leftArg, rightArg)) ); 170 | break; 171 | default: 172 | assert false; 173 | break; 174 | } 175 | } 176 | assert valsStack.size() == 1; 177 | return valsStack.pop(); 178 | } 179 | 180 | protected String genPostfix(Stack stack, final int nstk, final int ops, final int atoms) { 181 | for (;;) { 182 | if (nstk + ops >= mMaxOps) { 183 | return null; 184 | } 185 | 186 | if (nstk == 1 && mRand.nextInt(2) == 0) { 187 | return runPostfix(stack); 188 | } 189 | 190 | if (atoms < mMaxAtoms && mRand.nextInt(2) == 0) { 191 | stack.push( mNullary.get(mRand.nextInt(mNullary.size())) ); 192 | final String ret = genPostfix(stack, nstk + 1, ops, atoms + 1); 193 | stack.pop(); 194 | if (ret != null) { 195 | return ret; 196 | } 197 | } 198 | 199 | if (ops < mMaxOps && mRand.nextInt(2) == 0) { 200 | final Operator op = mOps.get(mRand.nextInt(mOps.size())); 201 | if (op.getArity() <= nstk) { 202 | stack.push(op); 203 | final String ret = genPostfix(stack, nstk - op.getArity() + 1, ops + 1, atoms); 204 | stack.pop(); 205 | if (ret != null) { 206 | return ret; 207 | } 208 | } 209 | } 210 | } 211 | } 212 | 213 | public String next() { 214 | return genPostfix(new Stack(), 0, 0, 0); 215 | } 216 | } 217 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/RE2.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | package com.logentries.re2; 9 | 10 | import com.logentries.re2.entity.CaptureGroup; 11 | import com.logentries.re2.entity.NamedGroup; 12 | 13 | import java.util.ArrayList; 14 | import java.util.List; 15 | import java.util.regex.MatchResult; 16 | 17 | public final class RE2 extends LibraryLoader implements AutoCloseable { 18 | private static native long compileImpl(final String pattern, final Options options) throws RegExprException; 19 | private static native void releaseImpl(final long pointer); 20 | private static native boolean fullMatchImpl(final String str, final long pointer, Object ... args); 21 | private static native boolean partialMatchImpl(final String str, final long pointer, Object ... args); 22 | private static native boolean fullMatchImpl(final String str, final String pattern, Object ... args); 23 | private static native boolean partialMatchImpl(final String str, final String pattern, Object ... args); 24 | private static native List getCaptureGroupNamesImpl(final long pointer, Object ... args); 25 | private static native int numberOfCapturingGroupsImpl(final long pointer); 26 | 27 | private long pointer; 28 | 29 | private void checkState() throws IllegalStateException { 30 | if (pointer == 0) { 31 | throw new IllegalStateException(); 32 | } 33 | } 34 | boolean isClosed() { 35 | return pointer == 0; 36 | } 37 | 38 | public RE2(final String pattern, final Options options) throws RegExprException { 39 | pointer = compileImpl(pattern, options); 40 | } 41 | public RE2(final String pattern, final Options.Flag... options) throws RegExprException { 42 | Options opt = new Options(); 43 | for (Options.Flag f : options) f.apply(opt); 44 | pointer = compileImpl(pattern, opt); 45 | } 46 | 47 | public static RE2 compile(final String pattern, final Options.Flag... options) { 48 | try { 49 | return new RE2(pattern, options); 50 | } catch (RegExprException ree) { 51 | throw new IllegalArgumentException(ree); 52 | } 53 | } 54 | 55 | public int numberOfCapturingGroups() { 56 | checkState(); 57 | return numberOfCapturingGroupsImpl(pointer); 58 | } 59 | 60 | public void dispoze() { 61 | if (pointer != 0) { 62 | releaseImpl(pointer); 63 | pointer = 0; 64 | } 65 | } 66 | 67 | public void close() { 68 | dispoze(); 69 | } 70 | 71 | protected void finalize() throws Throwable { 72 | dispoze(); 73 | super.finalize(); 74 | } 75 | 76 | static private int checkArg(final Object obj) throws IllegalArgumentException { 77 | if (obj instanceof int[]) { 78 | return ((int[])obj).length; 79 | } 80 | if (obj instanceof long[]) { 81 | return ((long[])obj).length; 82 | } 83 | if (obj instanceof float[]) { 84 | return ((float[])obj).length; 85 | } 86 | if (obj instanceof double[]) { 87 | return ((double[])obj).length; 88 | } 89 | if (obj instanceof String[]) { 90 | return ((String[])obj).length; 91 | } 92 | throw new IllegalArgumentException(); 93 | } 94 | 95 | static private void checkArgs(Object ... args) throws IllegalArgumentException { 96 | int length = 0; 97 | for (Object arg: args) { 98 | if ((length += checkArg(arg)) > 31) { 99 | throw new IllegalArgumentException("Only up to 31 arguments supported"); 100 | } 101 | } 102 | } 103 | 104 | public static boolean fullMatch(final String str, final String pattern, Object ... args) { 105 | checkArgs(args); 106 | return fullMatchImpl(str, pattern, args); 107 | } 108 | 109 | public static boolean partialMatch(final String str, final String pattern, Object ... args) { 110 | checkArgs(args); 111 | return partialMatchImpl(str, pattern, args); 112 | } 113 | 114 | public boolean fullMatch(final String str, Object ... args) throws IllegalStateException { 115 | checkState(); 116 | checkArgs(args); 117 | return fullMatchImpl(str, pointer, args); 118 | } 119 | 120 | public boolean partialMatch(final String str, Object ... args) throws IllegalStateException { 121 | checkState(); 122 | checkArgs(args); 123 | return partialMatchImpl(str, pointer, args); 124 | } 125 | 126 | /** 127 | * This method returns ordered names. 128 | * 129 | * @param args 130 | * @return List of names for the capture groups 131 | * @throws IllegalStateException 132 | */ 133 | public List getCaptureGroupNames(Object... args) throws IllegalStateException { 134 | checkState(); 135 | checkArgs(args); 136 | return getCaptureGroupNamesImpl(pointer, args); 137 | } 138 | 139 | public RE2Matcher matcher(final CharSequence str) { 140 | return matcher(str, true); 141 | } 142 | public RE2Matcher matcher(final CharSequence str, boolean fetchGroups) { 143 | checkState(); 144 | return new RE2Matcher(str, this, pointer, fetchGroups); 145 | } 146 | public RE2Matcher matcher(final RE2String str) { 147 | return matcher(str, true); 148 | } 149 | public RE2Matcher matcher(final RE2String str, boolean fetchGroups) { 150 | checkState(); 151 | return new RE2Matcher(str, this, pointer, fetchGroups); 152 | } 153 | 154 | /** 155 | * Gets the ordered capture groups for this event message and pattern. 156 | * @param str is an events message. 157 | * @return is a list of CaptureGroups. 158 | */ 159 | public List getCaptureGroups(final String str) { 160 | checkState(); 161 | List captureGroups = new ArrayList<>(); 162 | RE2Matcher re2match = this.matcher(str); 163 | 164 | try { 165 | for (MatchResult match : re2match) { 166 | for (int i = 1; i < match.groupCount(); i++) { 167 | if (match.start() > -1) { 168 | captureGroups.add(new CaptureGroup(match.group(i), match.start(i), match.end(i))); 169 | } 170 | } 171 | } 172 | } catch (IndexOutOfBoundsException e) { 173 | return captureGroups; 174 | } 175 | return captureGroups; 176 | } 177 | 178 | /** 179 | * Returns a list of named capture groups and their position information in the event message. 180 | * @param names is a list of names to match against. 181 | * @param str is an events message. 182 | * @return is a list of named capture groups. 183 | */ 184 | public List getNamedCaptureGroups(List names, final String str) { 185 | List namedGroups = new ArrayList<>(); 186 | List captureGroups = getCaptureGroups(str); 187 | int len = names.size(); 188 | 189 | if (len != captureGroups.size()) { 190 | // Matching text for a named group hasn't been found. 191 | return namedGroups; 192 | } 193 | 194 | for (int i = 0; i < len; i++) { 195 | if (captureGroups.get(i).start > -1) { 196 | namedGroups.add(new NamedGroup(names.get(i), captureGroups.get(i))); 197 | } 198 | } 199 | return namedGroups; 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/TestRandomExpr.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import com.logentries.re2.RE2; 4 | import com.logentries.re2.RegExprException; 5 | import org.junit.Test; 6 | 7 | import java.util.Arrays; 8 | import java.util.List; 9 | import java.util.regex.Pattern; 10 | 11 | import static org.junit.Assert.assertEquals; 12 | import static org.junit.Assert.fail; 13 | 14 | public class TestRandomExpr { 15 | private static class InterruptibleCharSequence implements CharSequence { 16 | CharSequence inner; 17 | 18 | public InterruptibleCharSequence(CharSequence inner) { 19 | super(); 20 | this.inner = inner; 21 | } 22 | 23 | public char charAt(int index) { 24 | if (Thread.interrupted()) { // clears flag if set 25 | throw new RuntimeException(new InterruptedException()); 26 | } 27 | return inner.charAt(index); 28 | } 29 | 30 | public int length() { 31 | return inner.length(); 32 | } 33 | 34 | public CharSequence subSequence(int start, int end) { 35 | return new InterruptibleCharSequence(inner.subSequence(start, end)); 36 | } 37 | 38 | @Override 39 | public String toString() { 40 | return inner.toString(); 41 | } 42 | } 43 | 44 | private List mAlphabet = Arrays.asList("aaa", "b", "ccc"); 45 | 46 | private GenRegExpr genRegExpr = new GenRegExpr(mAlphabet, 3, 12); 47 | private GenString genString = new GenString(mAlphabet, 15); 48 | 49 | private static Boolean applyMatches(final Pattern pattern, final String str) { 50 | class ApplyMatches implements Runnable { 51 | private volatile Boolean res = null; 52 | public Boolean getRes() { 53 | return res; 54 | } 55 | 56 | public void run() { 57 | res = pattern.matcher(new InterruptibleCharSequence(str)).matches(); 58 | } 59 | } 60 | ApplyMatches am = new ApplyMatches(); 61 | Thread thread = new Thread(am); 62 | thread.start(); 63 | try { 64 | thread.join(1500); 65 | } catch (InterruptedException ex) { 66 | } 67 | final Boolean res = am.getRes(); 68 | if (res == null) { 69 | thread.interrupt(); 70 | try { 71 | thread.join(); 72 | } catch (InterruptedException ex) { 73 | } 74 | } 75 | return res; 76 | } 77 | 78 | private static Boolean applyFind(final Pattern pattern, final String str) { 79 | class ApplyFind implements Runnable { 80 | private volatile Boolean res = null; 81 | public Boolean getRes() { 82 | return res; 83 | } 84 | 85 | public void run() { 86 | res = pattern.matcher(new InterruptibleCharSequence(str)).find(); 87 | } 88 | } 89 | ApplyFind af = new ApplyFind(); 90 | Thread thread = new Thread(af); 91 | thread.start(); 92 | try { 93 | thread.join(1500); 94 | } catch (InterruptedException ex) { 95 | } 96 | final Boolean res = af.getRes(); 97 | if (res == null) { 98 | thread.interrupt(); 99 | try { 100 | thread.join(); 101 | } catch (InterruptedException ex) { 102 | } 103 | } 104 | return res; 105 | } 106 | 107 | private void compareOneRandExpr(final int index) { 108 | final String regExprStr = genRegExpr.next(); 109 | System.err.println("Runnig i = " + index + "\t" + regExprStr); 110 | 111 | Pattern pattern = Pattern.compile(regExprStr); 112 | System.err.println("\t+Pattern.compile()"); 113 | RE2 re2 = null; 114 | try { 115 | re2 = new RE2(regExprStr); 116 | } catch (RegExprException e) { 117 | System.err.println("Cannot construct re: [" + regExprStr + "] : " + e.getMessage()); 118 | fail("Unexpected error in RE"); 119 | } 120 | System.err.println("\t+new RE2()"); 121 | 122 | for (int i = 0; i < 25; ++i) { 123 | final String str = genString.next(); 124 | System.err.println("\t" + str); 125 | final Boolean matches = applyMatches(pattern, str); 126 | if (matches == null) { 127 | System.err.println("Timeout of matches(.) for re=[" + regExprStr + "] and string=[" + str + "]"); 128 | } 129 | System.err.println("\t\tPattern.matches()"); 130 | final boolean re2_matches = re2.fullMatch(str); 131 | System.err.println("\t\tRE2.matches()"); 132 | final Boolean found = applyFind(pattern, str); 133 | if (found == null) { 134 | System.err.println("Timeout of find(.) for re=[" + regExprStr + "] and string=[" + str + "]"); 135 | } 136 | System.err.println("\t\tPattern.find()"); 137 | final boolean re2_found = re2.partialMatch(str); 138 | System.err.println("\t\tRE2.partialMatch()"); 139 | if ((matches != null && matches != re2_matches) || (found != null && found != re2_found)) { 140 | System.err.println("reg-expr:[" + regExprStr + "]; str:[" + str + "] " + matches + "\t" + re2_matches + "\t" + found + "\t" + re2_found); 141 | } 142 | if (matches != null) { 143 | assertEquals(matches, re2_matches); 144 | } 145 | if (found != null) { 146 | assertEquals(found, re2_found); 147 | } 148 | } 149 | re2.dispoze(); 150 | } 151 | 152 | public void testRandExpr() { 153 | for (int i = 0; i < 200; ++i) { 154 | compareOneRandExpr(i); 155 | } 156 | } 157 | 158 | @Test 159 | public void testRandRE2() { 160 | class Worker implements Runnable { 161 | public void run() { 162 | for (int i = 0; i < 2000; ++i) { 163 | // runOneRandRE2(i); 164 | compareOneRandExpr(i); 165 | } 166 | } 167 | } 168 | 169 | Thread[] ths = new Thread[8]; 170 | for (int i = 0; i < ths.length; ++i) { 171 | (ths[i] = new Thread(new Worker())).start(); 172 | } 173 | for (int i = 0; i < ths.length; ++i) { 174 | try { 175 | ths[i].join(); 176 | } catch (InterruptedException e) { 177 | } 178 | } 179 | } 180 | 181 | public void runOneRandRE2(final int index) { 182 | final String regExprStr = genRegExpr.next(); 183 | System.err.println("Runnig i = " + index + "\t" + regExprStr); 184 | 185 | 186 | RE2 re2 = null; 187 | try { 188 | new RE2(regExprStr); 189 | } catch (RegExprException e) { 190 | System.err.println("Cannot construct re: [" + regExprStr + "] : " + e.getMessage()); 191 | fail("Unexpected error in RE"); 192 | } 193 | System.err.println("\t+new RE2()"); 194 | 195 | for (int i = 0; i < 25; ++i) { 196 | final String str = genString.next(); 197 | System.err.println("\t" + str); 198 | final boolean re2_matches = re2.fullMatch(str); 199 | System.err.println("\t\tRE2.matches()"); 200 | final boolean re2_found = re2.partialMatch(str); 201 | System.err.println("\t\tRE2.partialMatch()"); 202 | System.err.println("reg-expr:[" + regExprStr + "]; str:[" + str + "] " + re2_matches + "\t" + re2_found); 203 | } 204 | re2.dispoze(); 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/main/java/com/logentries/re2/op.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Java Bindings for the RE2 Library 3 | * 4 | * (c) 2012 Daniel Fiala 5 | * 6 | */ 7 | 8 | #ifndef COM_LOGENTRIES_RE2_OP_H 9 | # define COM_LOGENTRIES_RE2_OP_H 10 | 11 | union JavaRE2_Any { 12 | jint i_; 13 | jlong l_; 14 | jfloat f_; 15 | jdouble d_; 16 | char sbuf_[sizeof(re2::StringPiece)]; // union cannot contain member with constructor 17 | // FIXME: Memory alignment 18 | 19 | re2::StringPiece *get_s(void) { 20 | re2::StringPiece *s = reinterpret_cast(sbuf_); 21 | BOOST_VERIFY( reinterpret_cast(sbuf_) == reinterpret_cast(s) ); 22 | return s; 23 | } 24 | 25 | re2::StringPiece *construct_s(void) { 26 | re2::StringPiece *s = new(sbuf_) re2::StringPiece(); 27 | BOOST_VERIFY(reinterpret_cast(s) == reinterpret_cast(get_s())); 28 | return s; 29 | } 30 | 31 | void destroy_s(void) { 32 | re2::StringPiece *s = get_s(); 33 | s->~StringPiece(); 34 | } 35 | }; 36 | 37 | enum JavaRE2_AnyType { 38 | JavaRE2_INT, 39 | JavaRE2_LONG, 40 | JavaRE2_FLOAT, 41 | JavaRE2_DOUBLE, 42 | JavaRE2_STRING, 43 | }; 44 | 45 | class JavaRE2_Arg { 46 | private: 47 | RE2::Arg arg_; 48 | JavaRE2_Any any_; // arg_ contains pointer to any_ 49 | JavaRE2_AnyType type_; 50 | jarray j_array_; 51 | jsize j_index_; 52 | 53 | private: 54 | RE2::Arg init_arg(const JavaRE2_AnyType type) { 55 | switch (type) { 56 | case JavaRE2_INT: 57 | return RE2::Arg(&any_.i_); 58 | case JavaRE2_LONG: 59 | return RE2::Arg(&any_.l_); 60 | case JavaRE2_FLOAT: 61 | return RE2::Arg(&any_.f_); 62 | case JavaRE2_DOUBLE: 63 | return RE2::Arg(&any_.d_); 64 | case JavaRE2_STRING: 65 | any_.construct_s(); 66 | return RE2::Arg(any_.get_s()); 67 | default: 68 | BOOST_VERIFY(0); 69 | } 70 | } 71 | 72 | public: 73 | JavaRE2_Arg(JavaRE2_AnyType type, jarray j_array, const jsize j_index) 74 | : type_(type), 75 | arg_(init_arg(type)), // any_ is set here too 76 | j_array_(j_array), 77 | j_index_(j_index) 78 | { } 79 | 80 | void transfer(JNIEnv *env) { 81 | switch (type_) { 82 | case JavaRE2_INT: { 83 | jintArray j_int_arr = static_cast(j_array_); 84 | env->SetIntArrayRegion(j_int_arr, j_index_, 1, &any_.i_); 85 | } 86 | break; 87 | case JavaRE2_LONG: { 88 | jlongArray j_long_arr = static_cast(j_array_); 89 | env->SetLongArrayRegion(j_long_arr, j_index_, 1, &any_.l_); 90 | } 91 | break; 92 | case JavaRE2_FLOAT: { 93 | jfloatArray j_float_arr = static_cast(j_array_); 94 | env->SetFloatArrayRegion(j_float_arr, j_index_, 1, &any_.f_); 95 | } 96 | break; 97 | case JavaRE2_DOUBLE: { 98 | jdoubleArray j_double_arr = static_cast(j_array_); 99 | env->SetDoubleArrayRegion(j_double_arr, j_index_, 1, &any_.d_); 100 | } 101 | break; 102 | case JavaRE2_STRING: { 103 | re2::StringPiece *s = any_.get_s(); 104 | jobjectArray j_obj_arr = static_cast(j_array_); 105 | jstring j_str = env->NewStringUTF(s->as_string().c_str()); 106 | env->SetObjectArrayElement(j_obj_arr, j_index_, j_str); 107 | } 108 | break; 109 | default: 110 | BOOST_VERIFY(0); 111 | } 112 | } 113 | 114 | RE2::Arg *get_re2_arg(void) { 115 | return &arg_; 116 | } 117 | 118 | ~JavaRE2_Arg(void) { 119 | if (type_ == JavaRE2_STRING) { 120 | any_.destroy_s(); 121 | } 122 | } 123 | }; 124 | 125 | static bool is_int_arr(JNIEnv *env, jclass j_cls) { 126 | jclass j_arr_cls = env->FindClass("[I"); 127 | return env->IsAssignableFrom(j_cls, j_arr_cls); 128 | } 129 | 130 | static bool is_long_arr(JNIEnv *env, jclass j_cls) { 131 | jclass j_arr_cls = env->FindClass("[J"); 132 | return env->IsAssignableFrom(j_cls, j_arr_cls); 133 | } 134 | 135 | static bool is_float_arr(JNIEnv *env, jclass j_cls) { 136 | jclass j_arr_cls = env->FindClass("[F"); 137 | return env->IsAssignableFrom(j_cls, j_arr_cls); 138 | } 139 | 140 | static bool is_double_arr(JNIEnv *env, jclass j_cls) { 141 | jclass j_arr_cls = env->FindClass("[D"); 142 | return env->IsAssignableFrom(j_cls, j_arr_cls); 143 | } 144 | 145 | static bool is_string_arr(JNIEnv *env, jclass j_cls) { 146 | jclass j_arr_cls = env->FindClass("[Ljava/lang/String;"); 147 | BOOST_VERIFY(j_arr_cls); 148 | return env->IsAssignableFrom(j_cls, j_arr_cls); 149 | } 150 | 151 | static JavaRE2_AnyType get_type(JNIEnv *env, jobject j_object) { 152 | jclass j_cls = env->GetObjectClass(j_object); 153 | if (is_int_arr(env, j_cls)) { 154 | return JavaRE2_INT; 155 | } 156 | if (is_long_arr(env, j_cls)) { 157 | return JavaRE2_LONG; 158 | } 159 | if (is_float_arr(env, j_cls)) { 160 | return JavaRE2_FLOAT; 161 | } 162 | if (is_double_arr(env, j_cls)) { 163 | return JavaRE2_DOUBLE; 164 | } 165 | if (is_string_arr(env, j_cls)) { 166 | return JavaRE2_STRING; 167 | } 168 | BOOST_VERIFY(!"Unexpected parameter supplied"); // This should not occure, args are checked from Java 169 | } 170 | 171 | static jsize sum_lengths(JNIEnv *env, jobjectArray j_args) { 172 | jsize j_sum = 0; 173 | const jsize j_args_length = env->GetArrayLength(j_args); 174 | for (jsize j_i = 0; j_i < j_args_length; ++j_i) { 175 | jarray j_arr = static_cast(env->GetObjectArrayElement(j_args, j_i)); 176 | j_sum += env->GetArrayLength(j_arr); 177 | } 178 | return j_sum; 179 | } 180 | 181 | template 182 | static bool do_op(JNIEnv *env, const Op &op, jobjectArray j_args) { 183 | struct Buf { 184 | char _[sizeof(JavaRE2_Arg)]; // FIXME: Memory alignment 185 | 186 | JavaRE2_Arg *get_arg(void) { 187 | JavaRE2_Arg *arg = reinterpret_cast(_); 188 | BOOST_VERIFY(reinterpret_cast(arg) == reinterpret_cast(_)); 189 | return arg; 190 | } 191 | 192 | JavaRE2_Arg *construct_arg(JavaRE2_AnyType type, jarray j_array, const jsize j_index) { 193 | JavaRE2_Arg *arg = new(_) JavaRE2_Arg(type, j_array, j_index); 194 | BOOST_VERIFY(reinterpret_cast(arg) == reinterpret_cast(get_arg())); 195 | return arg; 196 | } 197 | 198 | void destroy_arg(void) { 199 | get_arg()->~JavaRE2_Arg(); 200 | } 201 | }; 202 | const jsize j_args_len = env->GetArrayLength(j_args); 203 | const jsize j_total_len = sum_lengths(env, j_args); 204 | if (j_total_len > 31) { 205 | BOOST_VERIFY(!"Megical constant from re2 source code exceeded"); // This should not occure, args are checked from Java 206 | } 207 | 208 | Buf buf_args[j_total_len]; 209 | RE2::Arg *args[j_total_len]; 210 | 211 | for (jsize j_i = 0, j_index = 0; j_i < j_args_len; ++j_i) { 212 | jarray j_arr = static_cast( env->GetObjectArrayElement(j_args, j_i) ); 213 | const jsize j_len = env->GetArrayLength(j_arr); 214 | const JavaRE2_AnyType type = get_type(env, j_arr); 215 | for (jsize j_j = 0; j_j < j_len; ++j_j) { 216 | args[j_index] = buf_args[j_index].construct_arg(type, j_arr, j_j)->get_re2_arg(); 217 | ++j_index; 218 | } 219 | } 220 | 221 | const int total_len = static_cast(j_total_len); 222 | BOOST_VERIFY(static_cast(total_len) == j_total_len); 223 | BOOST_VERIFY(total_len > 0 == j_total_len > 0); 224 | bool ret = op(args, j_total_len); 225 | 226 | for (jsize j_i = 0; j_i < j_total_len; ++j_i) { 227 | buf_args[j_i].get_arg()->transfer(env); 228 | buf_args[j_i].destroy_arg(); 229 | } 230 | return ret; 231 | } 232 | 233 | #endif 234 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | re2-java 2 | ======== 3 | 4 | re2 for Java 5 | 6 | Warning: Only 64bit Linux is supported for now. It should be easy to add support for other platforms. 7 | 8 | ## Licence ## 9 | 10 | Like [RE2 library](http://code.google.com/p/re2/) iteself, this library can be distributed and used under the terms of [The BSD 3-Clause License](http://opensource.org/licenses/BSD-3-Clause). 11 | 12 | 13 | ## Installation ## 14 | 15 | ### Requirements ### 16 | * Java 7 (JDK 1.7, never tested on Java 8). Set environment variable `JAVA_HOME` to point to the root directory of JDK. 17 | * Maven 3.x , http://maven.apache.org/ . 18 | - Check that mvn command can be run from your command line. 19 | * gcc 4.5.x or higher. 20 | * Boost C++ Library (http://www.boost.org/), version newer than Stonehenge should be enough. 21 | * wget 22 | 23 | ### Compilation ### 24 | 25 | Simply type: 26 | 27 | $ make 28 | 29 | It downloads latest stable revision of re2, builds re2 library in separate directory and builds another library with JNI bindigs as well. 30 | Finally, jar file that includes so libraries files is produced in the target folder. 31 | 32 | You can type: 33 | 34 | $ make clean 35 | 36 | to clean all files that come into existence during normal run of make. 37 | 38 | After seccessfull compilation you can run: 39 | 40 | $ mvn test 41 | 42 | But tests are very time and memory consuming and, at present, they print a lot of debug messages. Sorry if it is annoying, 43 | this binding is actually under development. 44 | 45 | ### Generating header files - example ### 46 | 47 | From root folder containing pom.xml, src/ etc. 48 | 49 | javah -jni -classpath "/home//repos/re2-java/src/main/java" -o src/main/java/com/logentries/re2/RE2.h com.logentries.re2.RE2 50 | 51 | ### Installation ### 52 | 53 | After running of `make`, directory `target` contains jar file with the library. You can include it to your `classpath`. 54 | Native library files (libre2.so and libre2-java.so) are part of the jar file as well. They are extracted after JVM 55 | startup, saved into temporary files and dynamically loaded into the address space of the JVM. 56 | 57 | ### Changelog ### 58 | 59 | #### v1.2 60 | 61 | - added `RE2.compile` static method, similar to `Pattern.compile`. The main difference with the `RE2` constructor 62 | is that `compile` method doesn't use checked exception and you can avoid `try/catch` block. 63 | 64 | - support for `RE2String` that can be reused with multiple patterns, in order to avoid multiple copies of the same string. 65 | 66 | - generalization of `RE2.matcher` that now accepts `CharSequence` rather than `String` 67 | 68 | #### v1.1 69 | 70 | - support for `RE2Matcher` 71 | 72 | 73 | ## Usage ## 74 | 75 | For usage of the library, please import `com.logentries.re2.RE2` and `com.logentries.re2.Options` . 76 | 77 | Basic usage of java-re2 is quite similar to the C++ library. 78 | 79 | Static functions `RE2.fullMatch(.)` and `RE2.partialMatch(.)` can be used. 80 | 81 | You can create precompiled RE in this way: 82 | 83 | RE2 re = new RE2("\\d+"); 84 | 85 | as the object allocates some memory that is not under the control of JVM, it should be freed explicitly. 86 | You can either use member function `dispoze()`, or member function `close()` . 87 | Class RE2 contains overloaded method `finalize()` that is automatically called before the object is destroyed by the Garbage Collector. 88 | This method ensures that the additional memory is freed and may be frees it on its own. 89 | But it is usually bad idea to rely on Java GC. :-) 90 | 91 | Any try to use the object after the call of `dispoze()` or `close()` will cause the thrown of `IllegalStateException` . 92 | 93 | Precompiled RE supports member functions `partialMatch(.)` or `fullMatch(.)`. 94 | 95 | re.fullMatch("2569"); 96 | re.partialMatch("xxx=2569"); 97 | 98 | `RE2` constructor is declared with checked exception that can be raised if the regex is malformed. This is quite annoying if 99 | the regex is a static variable instantiated at startup. You can then use static method `RE2.compile` that wraps checked exception 100 | to the unchecked `IllegalArgumentException`. 101 | 102 | public class MyClass { 103 | private static RE2 regex = RE2.compile("..."); 104 | } 105 | 106 | ### Matcher ### 107 | 108 | `RE2` object supports also a more javaesque interface, similar to `java.util.regex.Pattern` and `java.util.regex.Matcher`. 109 | 110 | RE2 re = new RE2("..(..)"); 111 | RE2Matcher matcher = re.matcher("my input string"); 112 | if (matcher.find()) { 113 | // get matching string(s), 114 | // see java.util.regex.Matcher javadoc or 115 | // com.logentries.re2.RE2Matcher code for additional details 116 | // eg. matcher.group() or matcher.start() and matcher.end() 117 | ... 118 | } 119 | 120 | You can also iterate over the input string searching for repeated pattern 121 | 122 | RE2 re = new RE2("bla?"); 123 | RE2Matcher matcher = re.matcher("my bla input string bl bla"); 124 | while (matcher.findNext()) { 125 | // 3 iterations, get positions using matcher.start() and matcher.end() 126 | } 127 | 128 | `R2Matcher` also implements `java.util.Iterable`. 129 | It can be used this way 130 | 131 | int c = 0; 132 | for (MatchResult mr : new RE2("t").matcher("input text")) { 133 | // play with matches using mr.start, mr.end, mr.group 134 | } 135 | assertEquals(3, c); 136 | 137 | This can be very useful when playing with this library in Scala: 138 | 139 | import scala.collection.JavaConversions._ 140 | import com.logentries.re2._ 141 | 142 | new RE2("abc?") matcher "abc and abc ab ab" map( _.group ) foreach println 143 | 144 | If you are not interested in fetching groups offset you can disable this feature, by using 145 | 146 | RE2Matcher m = new RE2("ab(c?)").matcher("abc and abc ab ab", false); 147 | assertEquals(1, m.GroupCount()); 148 | // now m contains information only for group 0 149 | // so m.start(), m.end() and m.group() 150 | // trying m.{start|end|group}(n : n > 0) always fails 151 | 152 | If your regex is very complex (most likely programmatically composed by concatenating different patterns) and the 153 | number of groups is huge, this can improve performance significantly (data structures to contain all possible matches 154 | are not allocated). 155 | 156 | **NOTE 1**: `RE2Matcher` object maintains a pointer to a char buffer that is used in C++ stack to manage the current string, in order to avoid a copy for each iteration. 157 | For this reason, `RE2Matcher` object implements AutoCloseable interface, to be used in `try-with-resource` statement. 158 | Close method is called in `finalize()`, so garbage collector will ensure (sooner or later) to free the memory. This is the same pattern that has been used for 159 | `RE2` object, but, usually, `RE2` regex are compiled and then used multiple times while `RE2Matcher` objects 160 | are used in stack and most likely you will want to delete it as soon as has been used. 161 | In this case, you can use the `try-with-resource` block to make sure you don't miss anything 162 | 163 | try (RE2Matcher matcher = re.matcher("my bla input string bl bla")) { 164 | matcher. .... 165 | } 166 | 167 | **NOTE 2**: `RE2Matcher` is not thread-safe, just like `java.util.regex.Matcher` 168 | 169 | ### Re-using strings ### 170 | 171 | Whenever a `RE2Matcher` is created, the content of the string is copied to make it accessible from C++ stack. If you have to 172 | check and search for several patterns on the same string, this could affect performances, because you are copying 173 | the same string multiple times. 174 | 175 | For this reason, from version v1.2, we have implemented a new object, `RE2String` that is a wrapper for a `CharSequence`. 176 | You can create an instance of this object in advance, and then create a `RE2Matcher` using your `RE2String`. This new object 177 | can be re-used multiple times to create matchers for different patterns. 178 | When `RE2Matcher` is created using a `RE2String`, it doesn't copy the string and when you close it (see above about the `AutoCloseable` interface) 179 | simply does nothing. Similarly, `RE2String` implements `AutoCloseable` interface and `finalize` method has been overridden to let the GC 180 | clean resources for you. 181 | 182 | 183 | RE2 regex1 = RE2.compile("\\b[\\d]{5}\\b"); 184 | RE2 regex2 = RE2.compile("\\b[a-zA-Z]{5}\\b"); 185 | 186 | String input = .... 187 | RE2String rstring = new RE2String(input); 188 | 189 | RE2Matcher m1 = regex1.matcher(rstring); 190 | RE2Matcher m2 = regex2.matcher(rstring); 191 | while(m1.find()) { 192 | int endFirst = m1.end(); 193 | if (m2.find(endFirst, endFirst + 10)) { 194 | ... 195 | } 196 | } 197 | 198 | // here m1.close() and m2.close() do nothing 199 | 200 | 201 | ### Submatch extraction ### 202 | 203 | Both static and member match functions support additional parameters in which submatches will be stored. 204 | Java does not support passing arguments by reference, so we use arrays to store submatches: 205 | 206 | int[] x = new int[1]; 207 | long[] y = new int[1]; 208 | RE2.fullMatch("121:2569856321142", "(\\d+):(\\d+)", x, y); 209 | // x[0] == 121, y[0] == 2569856321142 210 | 211 | Array of length bigger then 1 can be used. Then it is used to store as much consecutive submatches as is the length of the array: 212 | 213 | int[] x = new int[2]; 214 | String[] s = new String[1]; 215 | long[] y = new long[3]; 216 | new RE2 re = new RE2("(\\d+):(\\d+)-([a-zA-Z]+)-(\\d+):(\\d+):(\\d+)"); 217 | re.fullMatch("225:3-xxx-2:2555422298777:7", x, s, y); 218 | // x[0] == 225, x[1] == 3, s[0] == xxx, y[0] == 2, y[1] == 2555422298777, y[2] == 7 219 | 220 | So far, only int[], long[], float[], double[] and String[] are supported. Adding of other types should be quite easy. 221 | 222 | ### Little comment about the interface and passing by reference ### 223 | 224 | I know that a lot of Java programmers may complain that the interface based on passing of parameters by reference through the trick with arrays 225 | is quite bad practise, dirty trick and that it introduces something what is in fact not present in Java. 226 | 227 | But after I try it in a real code I decided that it is the best way to pass the values of submatches. 228 | ~~If you have any idea how to implement it in different way, please give me know.~~ *See Matcher interface above* 229 | 230 | ### Named capture group extraction ### 231 | 232 | Capture group entities have a sub-string and a reference to the beginning and end index that this string corresponds to 233 | in a matched event. Named capture group entities wrap this and include a name. 234 | 235 | getCaptureGroups(), and getCaptureGroupNames() are two methods that are called by getNamedCaptureGroups() to create a list 236 | of NamedGroup entities. The lists returned by these methods are in order, allowing getNamedCaptureGroups to associate them, 237 | if the length of the returned lists differ we can assume that we cannot maintain association and return an empty list. 238 | 239 | getCaptureGroupNames uses the native RE2 method, getCaptureGroups uses the contributor code to get RE2Matcher objects. 240 | 241 | ### Options ### 242 | 243 | Object `com.logentries.re2.Options` encapsulates possible configuration that is used during creation of the RE2 object. It is more or less equivalent to RE2::Options 244 | from C++ interface. It can be passed as a second argument to RE2 constructor. 245 | 246 | It uses several setter methods to set the configuration values: 247 | 248 | Options opt = new Options(); 249 | opt.setNeverNl(true); 250 | opt.setWordBoundary(false); 251 | 252 | or equivalently: 253 | 254 | Options opt = new Options().setNeverNl(true).setWordBoundary(false); 255 | 256 | `RE2` constructor is now overloaded to support for explicit flag list, to mimic C++ style: 257 | 258 | RE2 regex = new RE2("TGIF?", 259 | Options.CASE_INSENSITIVE, 260 | Options.ENCODING(Encoding.UTF8), 261 | Options.PERL_CLASSES(false) 262 | ); 263 | 264 | see `Options` static fields for further details. 265 | -------------------------------------------------------------------------------- /src/test/java/com/logentries/re2_test/TestMatcherFind.java: -------------------------------------------------------------------------------- 1 | package com.logentries.re2_test; 2 | 3 | import com.logentries.re2.*; 4 | import com.logentries.re2.entity.NamedGroup; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | import java.util.List; 9 | import java.util.Random; 10 | import java.util.regex.MatchResult; 11 | 12 | import static org.junit.Assert.*; 13 | 14 | 15 | public class TestMatcherFind { 16 | final String oneNamedGroup = "(?Pcode)"; 17 | final String twoNamedGroups = "(?Ptest).*co(?Pde)"; 18 | final String nestedNamedGroups = "(?P(?Ptest).*co(?Pde))"; 19 | final String optionalNamedGroup = "(?Phello)?"; 20 | 21 | @Test 22 | public void testFindSimple() throws Exception { 23 | 24 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 25 | 26 | RE2Matcher matcher = regex.matcher("https://dandelion.eu/datatxt"); 27 | assertTrue(matcher.find()); 28 | assertEquals(8, matcher.start()); 29 | assertEquals(20, matcher.end()); 30 | assertEquals("dandelion.eu", matcher.group()); 31 | assertEquals(8, matcher.start(0)); 32 | assertEquals(20, matcher.end(0)); 33 | assertEquals("dandelion.eu", matcher.group(0)); 34 | assertEquals(-1, matcher.start(1)); 35 | assertEquals(-1, matcher.end(1)); 36 | assertNull(matcher.group(1)); 37 | 38 | } 39 | @Test 40 | public void testFindNoGroups() throws Exception { 41 | 42 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 43 | 44 | RE2Matcher matcher = regex.matcher("https://www.dandelion.eu/datatxt", false); 45 | assertTrue(matcher.find()); 46 | assertEquals("www.dandelion.eu", matcher.group()); 47 | assertEquals(1, matcher.groupCount()); 48 | } 49 | 50 | @Test 51 | public void testMatchGroups() throws Exception { 52 | RE2 regex = new RE2("(www\\.)?dandelion\\.(eu)"); 53 | RE2Matcher matcher = regex.matcher("€€ https://dandelion.eu/datatxt - www.dandelion.eu/datatxt"); 54 | assertTrue(matcher.findNext()); 55 | assertEquals("dandelion.eu", matcher.group()); 56 | assertNull(matcher.group(1)); 57 | assertEquals("eu", matcher.group(2)); 58 | 59 | assertTrue(matcher.findNext()); 60 | assertEquals("www.dandelion.eu", matcher.group()); 61 | assertEquals("www.",matcher.group(1)); 62 | assertEquals("eu", matcher.group(2)); 63 | 64 | assertFalse(matcher.findNext()); 65 | } 66 | 67 | @Test 68 | public void testGetCaptureGroupNames() throws Exception { 69 | assertEquals(1, new RE2(oneNamedGroup).getCaptureGroupNames().size()); 70 | assertEquals(2, new RE2(twoNamedGroups).getCaptureGroupNames().size()); 71 | assertEquals(3, new RE2(nestedNamedGroups).getCaptureGroupNames().size()); 72 | assertEquals(1, new RE2(optionalNamedGroup).getCaptureGroupNames().size()); 73 | 74 | for (int i = 0; i < 3; i++) { 75 | assertEquals("name"+(i+1), new RE2(nestedNamedGroups).getCaptureGroupNames().get(i)); 76 | } 77 | } 78 | 79 | @Test 80 | public void testSingleNamedCaptureGroupsTest() throws Exception { 81 | String event = "test code best log"; 82 | RE2 regex = new RE2(oneNamedGroup); 83 | 84 | List names = regex.getCaptureGroupNames(); 85 | List namedCaptureGroups = regex.getNamedCaptureGroups(names, event); 86 | 87 | assertEquals(1, namedCaptureGroups.size()); 88 | assertEquals("name1", namedCaptureGroups.get(0).name); 89 | assertEquals("code", namedCaptureGroups.get(0).captureGroup.matchingText); 90 | } 91 | 92 | @Test 93 | public void testMultipleNamedCaptureGroupsTest() throws Exception { 94 | String event = "test code best log"; 95 | RE2 regex = new RE2(twoNamedGroups); 96 | 97 | List names = regex.getCaptureGroupNames(); 98 | List namedCaptureGroups = regex.getNamedCaptureGroups(names, event); 99 | 100 | assertEquals(2, namedCaptureGroups.size()); 101 | assertEquals("name1", namedCaptureGroups.get(0).name); 102 | assertEquals("test", namedCaptureGroups.get(0).captureGroup.matchingText); 103 | assertEquals("name2", namedCaptureGroups.get(1).name); 104 | assertEquals("de", namedCaptureGroups.get(1).captureGroup.matchingText); 105 | } 106 | 107 | @Test 108 | public void testNestedNamedCaptureGroupsTest() throws Exception { 109 | String event = "test code best log"; 110 | RE2 regex = new RE2(nestedNamedGroups); 111 | 112 | List names = regex.getCaptureGroupNames(); 113 | List namedCaptureGroups = regex.getNamedCaptureGroups(names, event); 114 | 115 | assertEquals(3, namedCaptureGroups.size()); 116 | assertEquals("name1", namedCaptureGroups.get(0).name); 117 | assertEquals("test code", namedCaptureGroups.get(0).captureGroup.matchingText); 118 | assertEquals("name2", namedCaptureGroups.get(1).name); 119 | assertEquals("test", namedCaptureGroups.get(1).captureGroup.matchingText); 120 | assertEquals("name3", namedCaptureGroups.get(2).name); 121 | assertEquals("de", namedCaptureGroups.get(2).captureGroup.matchingText); 122 | } 123 | 124 | @Test 125 | public void testOptionalNamedCaptureGroupsTest() throws Exception { 126 | String event = "hello log"; 127 | RE2 regex = new RE2(optionalNamedGroup); 128 | 129 | List names = regex.getCaptureGroupNames(); 130 | List namedCaptureGroups = regex.getNamedCaptureGroups(names, event); 131 | 132 | assertEquals(1, namedCaptureGroups.size()); 133 | assertEquals("name1", namedCaptureGroups.get(0).name); 134 | assertEquals("hello", namedCaptureGroups.get(0).captureGroup.matchingText); 135 | 136 | String event2 = "test log"; 137 | RE2 regex2 = new RE2(optionalNamedGroup); 138 | 139 | List names2 = regex.getCaptureGroupNames(); 140 | List namedCaptureGroups2 = regex2.getNamedCaptureGroups(names2, event2); 141 | 142 | assertEquals(0, namedCaptureGroups2.size()); 143 | } 144 | 145 | @Test 146 | public void testFindNext() throws Exception { 147 | RE2 regex = new RE2("(www\\.)?dandelion\\.(eu)"); 148 | RE2Matcher matcher = regex.matcher("€€ https://dandelion.euwww.dandelion.eu"); 149 | assertTrue(matcher.findNext()); 150 | assertEquals("dandelion.eu", matcher.group()); 151 | assertNull(matcher.group(1)); 152 | assertEquals("eu", matcher.group(2)); 153 | 154 | assertTrue(matcher.findNext()); 155 | assertEquals("www.dandelion.eu", matcher.group()); 156 | assertEquals("www.",matcher.group(1)); 157 | assertEquals("eu", matcher.group(2)); 158 | 159 | assertFalse(matcher.findNext()); 160 | } 161 | 162 | @Test(expected = IllegalStateException.class) 163 | public void testFindGroupOverflow() throws Exception { 164 | 165 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 166 | 167 | RE2Matcher matcher = regex.matcher("https://dandelion.eu/datatxt"); 168 | assertTrue(matcher.find()); 169 | matcher.group(2); 170 | 171 | } 172 | 173 | @Test 174 | public void testFindStart() throws Exception { 175 | 176 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 177 | 178 | RE2Matcher matcher = regex.matcher("Datatxt: https://dandelion.eu/datatxt - the named entity extraction tool by Spaziodati"); 179 | assertTrue(matcher.find()); 180 | assertTrue(matcher.find(17)); 181 | assertFalse(matcher.find(18)); 182 | assertFalse(matcher.find(40)); 183 | } 184 | 185 | @Test 186 | public void testFindEnd() throws Exception { 187 | 188 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 189 | 190 | RE2Matcher matcher = regex.matcher("Datatxt: https://dandelion.eu/datatxt -"); 191 | assertTrue(matcher.find()); 192 | assertTrue(matcher.find(17,29)); 193 | assertFalse(matcher.find(18,29)); 194 | assertTrue(matcher.find(0, 39)); 195 | } 196 | 197 | @Test 198 | public void testOffsetSpecialChars() throws Exception { 199 | 200 | RE2 regex = new RE2("dandelion\\.eu"); 201 | 202 | String[] input = { 203 | "Dàtàtxt: https://dandelion.eu/datatxt - ", //offset 2 204 | "D€t€t€€: https://dandelion.eu/datatxt - ", //offset 3 205 | "€€€€€€€: https://dandelion.eu/datatxt €€€", //offset 3 206 | }; 207 | 208 | for (String i : input) { 209 | RE2Matcher matcher = regex.matcher(i); 210 | assertTrue(i, matcher.find()); 211 | assertEquals(i, "dandelion.eu", matcher.group()); 212 | assertTrue(i, matcher.find(17)); 213 | assertEquals(i, "dandelion.eu", matcher.group()); 214 | assertFalse(i, matcher.find(18)); 215 | } 216 | 217 | } 218 | @Test 219 | public void testSurrogateChars() throws Exception { 220 | 221 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 222 | 223 | String[] input = { 224 | "D\uD801\uDC28t\uD801\uDC28t\uD801\uDC28€: https://dandelion.eu/datatxt - ", //surrogate 225 | "D\uD83D\uDC3Et\uD83D\uDC3Et\uD83D\uDC3E€: https://dandelion.eu/datatxt - ", //surrogate 226 | }; 227 | 228 | for (String i : input) { 229 | RE2Matcher matcher = regex.matcher(i); 230 | assertTrue(i, matcher.find()); 231 | assertEquals(i, "dandelion.eu", matcher.group()); 232 | assertTrue(i, matcher.find(20)); 233 | assertEquals(i, "dandelion.eu", matcher.group()); 234 | assertFalse(i, matcher.find(21)); 235 | } 236 | 237 | } 238 | 239 | 240 | @Test 241 | public void testEmptyStrings() throws Exception { 242 | RE2 regex = new RE2("(www\\.)?dandelion\\.eu"); 243 | assertFalse(regex.matcher("").find()); 244 | assertFalse(regex.matcher("a").find()); 245 | assertFalse(regex.matcher("€").find()); 246 | } 247 | 248 | @Test() 249 | public void testIterator() throws Exception { 250 | int c = 0; 251 | for (MatchResult mr : new RE2("t").matcher("input text")) c++; 252 | assertEquals(3, c); 253 | } 254 | 255 | 256 | 257 | @Test(expected = IllegalStateException.class) 258 | public void testClosed() throws Exception { 259 | RE2Matcher m = new RE2("test").matcher("input text"); 260 | m.close(); 261 | m.find(); 262 | } 263 | 264 | @Test() 265 | public void testTryWith() throws Exception { 266 | RE2 r = new RE2("t"); 267 | try (RE2Matcher m = r.matcher("input text")) { 268 | assertTrue(m.findNext()); 269 | assertTrue(m.findNext()); 270 | assertTrue(m.findNext()); 271 | assertFalse(m.findNext()); 272 | } 273 | } 274 | 275 | 276 | @Test(expected = IllegalStateException.class) 277 | public void testReClosed() throws Exception { 278 | RE2 regex = new RE2("test"); 279 | RE2Matcher m = regex.matcher("input text"); 280 | regex.close(); 281 | m.find(); 282 | } 283 | 284 | @Test 285 | public void testOptionsList() throws Exception { 286 | RE2 regex = new RE2("TGIF?", 287 | Options.CASE_INSENSITIVE, 288 | Options.ENCODING(Encoding.UTF8), 289 | Options.PERL_CLASSES(false) 290 | ); 291 | } 292 | 293 | @Test 294 | public void testMoreGroups() throws Exception { 295 | String pattern = ""; 296 | char c = 'a'; 297 | for (int i=0; i<25; i++) { 298 | char cnext = (char)(c+i); 299 | if (i>0) pattern += "|("+cnext+")"; 300 | else pattern += "("+ cnext +")"; 301 | } 302 | 303 | RE2Matcher matcher = new RE2(pattern).matcher("a very beatiful string"); 304 | assertTrue(matcher.findNext()); //a 305 | assertTrue(matcher.findNext()); //v 306 | assertEquals("v", matcher.group()); 307 | assertEquals("v", matcher.group('v' - 'a' + 1)); 308 | } 309 | 310 | static String rnd(int len) { 311 | Random r = new Random(); 312 | String s = new String(); 313 | for (int i=0; i 5 | * 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "RE2.h" 16 | #include "op.h" 17 | #include "Options.h" 18 | 19 | using re2::StringPiece; 20 | using namespace std; 21 | 22 | template 23 | static Dst safe_cast(Src src) { 24 | Dst dst = static_cast(src); 25 | BOOST_VERIFY(static_cast(dst) == src); 26 | BOOST_VERIFY(dst > 0 == src > 0); 27 | return dst; 28 | } 29 | 30 | RE2::Options::Encoding get_re2_encoding(JNIEnv *env, jobject j_encoding) { 31 | jclass j_cls = env->FindClass("com/logentries/re2/Encoding"); 32 | 33 | jmethodID equals_id = env->GetMethodID(j_cls, "equals", "(Ljava/lang/Object;)Z"); 34 | 35 | const char *fields[] = {"UTF8", "Latin1", }; 36 | const RE2::Options::Encoding enc_fields[] = {RE2::Options::EncodingUTF8, RE2::Options::EncodingLatin1, }; 37 | for (int i = 0; i < sizeof(fields)/sizeof(*fields); ++i) { 38 | jfieldID fid = env->GetStaticFieldID(j_cls, fields[i], "Lcom/logentries/re2/Encoding;"); 39 | jobject item = env->GetStaticObjectField(j_cls, fid); 40 | if (env->CallBooleanMethod(item, equals_id, j_encoding)) { 41 | return enc_fields[i]; 42 | } 43 | } 44 | BOOST_VERIFY(0); 45 | } 46 | 47 | jobject get_j_encoding(JNIEnv *env, RE2::Options::Encoding enc) { 48 | const char *fields[] = {"UTF8", "Latin1", }; 49 | const RE2::Options::Encoding enc_fields[] = {RE2::Options::EncodingUTF8, RE2::Options::EncodingLatin1, }; 50 | for (int i = 0; i < sizeof(enc_fields)/sizeof(*enc_fields); ++i) { 51 | RE2::Options::Encoding enc_item = enc_fields[i]; 52 | if (enc_item == enc) { 53 | jclass j_cls = env->FindClass("com/logentries/re2/Encoding"); 54 | jfieldID fid = env->GetStaticFieldID(j_cls, fields[i], "Lcom/logentries/re2/Encoding;"); 55 | jobject item = env->GetStaticObjectField(j_cls, fid); 56 | return item; 57 | } 58 | } 59 | BOOST_VERIFY(0); 60 | } 61 | 62 | static jfieldID get_field_id_safe(JNIEnv *env, jclass j_cls, const char *name, const char *sig) { 63 | jfieldID fid = env->GetFieldID(j_cls, name, sig); 64 | BOOST_VERIFY(fid != NULL); 65 | return fid; 66 | } 67 | 68 | JNIEXPORT void JNICALL Java_com_logentries_re2_Options_setDefaults 69 | (JNIEnv *env, jobject j_this) { 70 | RE2::Options options; 71 | jclass j_cls = env->GetObjectClass(j_this); 72 | env->SetObjectField(j_this, get_field_id_safe(env, j_cls, "encoding", "Lcom/logentries/re2/Encoding;"), get_j_encoding(env, options.encoding())); 73 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "posixSyntax", "Z"), options.posix_syntax()); 74 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "longestMatch", "Z"), options.longest_match()); 75 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "logErrors", "Z"), options.log_errors()); 76 | env->SetLongField(j_this, get_field_id_safe(env, j_cls, "maxMem", "J"), safe_cast(options.max_mem())); 77 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "literal", "Z"), options.literal()); 78 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "neverNl", "Z"), options.never_nl()); 79 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "neverCapture", "Z"), options.never_capture()); 80 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "caseSensitive", "Z"), options.case_sensitive()); 81 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "perlClasses", "Z"), options.perl_classes()); 82 | env->SetBooleanField(j_this, get_field_id_safe(env, j_cls, "wordBoundary", "Z"), options.word_boundary()); 83 | } 84 | 85 | static void cpy_options(RE2::Options &options, JNIEnv *env, jobject j_options) { 86 | BOOST_VERIFY(j_options != 0); 87 | jclass j_options_cls = env->GetObjectClass(j_options); 88 | options.set_encoding(get_re2_encoding(env, env->GetObjectField(j_options, get_field_id_safe(env, j_options_cls, "encoding", "Lcom/logentries/re2/Encoding;")))); 89 | options.set_posix_syntax(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "posixSyntax", "Z"))); 90 | options.set_longest_match(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "longestMatch", "Z"))); 91 | options.set_log_errors(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "logErrors", "Z"))); 92 | options.set_max_mem(safe_cast(env->GetLongField(j_options, get_field_id_safe(env, j_options_cls, "maxMem", "J")))); 93 | options.set_literal(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "literal", "Z"))); 94 | options.set_never_nl(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "neverNl", "Z"))); 95 | options.set_never_capture(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "neverCapture", "Z"))); 96 | options.set_case_sensitive(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "caseSensitive", "Z"))); 97 | options.set_perl_classes(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "perlClasses", "Z"))); 98 | options.set_word_boundary(env->GetBooleanField(j_options, get_field_id_safe(env, j_options_cls, "wordBoundary", "Z"))); 99 | } 100 | 101 | class Options : public RE2::Options { 102 | public: 103 | Options(JNIEnv *env, jobject j_options) { 104 | if (j_options != 0) { 105 | cpy_options(*this, env, j_options); 106 | } 107 | } 108 | }; 109 | 110 | static bool is_empty_arr(JNIEnv *env, jarray j_arr) { 111 | return j_arr == 0 || env->GetArrayLength(j_arr) == 0; 112 | 113 | } 114 | 115 | static bool throw_RegExprException(JNIEnv *env, const char *msg) { 116 | const char *class_name = "com/logentries/re2/RegExprException" ; 117 | 118 | jclass j_cls = env->FindClass(class_name); 119 | if (j_cls == NULL) { 120 | BOOST_VERIFY(!"Cannot find exception class :-("); 121 | } 122 | 123 | return env->ThrowNew(j_cls, msg) == 0; 124 | } 125 | 126 | JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2_compileImpl 127 | (JNIEnv *env, jclass cls, jstring j_str, jobject j_options) { 128 | Options options(env, j_options); 129 | const char *str = env->GetStringUTFChars(j_str, 0); 130 | RE2 *pointer = new RE2(str, options); 131 | if (pointer->ok()) { 132 | env->ReleaseStringUTFChars(j_str, str); 133 | jlong j_pointer = reinterpret_cast(pointer); 134 | BOOST_VERIFY(reinterpret_cast(j_pointer) == pointer); 135 | return j_pointer; 136 | } else { 137 | throw_RegExprException(env, pointer->error().c_str()); 138 | delete pointer; 139 | return 0; 140 | } 141 | } 142 | 143 | JNIEXPORT void JNICALL Java_com_logentries_re2_RE2_releaseImpl 144 | (JNIEnv *env, jclass cls, jlong j_pointer) { 145 | RE2 *pointer = reinterpret_cast(j_pointer); 146 | //pool.destroy(pointer); 147 | delete pointer; 148 | } 149 | 150 | struct FullMatchCOp { 151 | const char *str_; 152 | const RE2 *pattern_; 153 | 154 | FullMatchCOp(const char *str, const RE2 *pattern) 155 | : str_(str), 156 | pattern_(pattern) 157 | { } 158 | 159 | bool operator()(const RE2::Arg* const args[], const int n) const { 160 | return RE2::FullMatchN(str_, *pattern_, args, n); 161 | } 162 | }; 163 | 164 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_fullMatchImpl__Ljava_lang_String_2J_3Ljava_lang_Object_2 165 | (JNIEnv *env, jclass cls, jstring j_str, jlong j_pointer, jobjectArray j_args) { 166 | const char *str = env->GetStringUTFChars(j_str, 0); 167 | RE2 *pointer = reinterpret_cast(j_pointer); 168 | const bool res = is_empty_arr(env, j_args) ? RE2::FullMatch(str, *pointer) : do_op(env, FullMatchCOp(str, pointer), j_args); 169 | env->ReleaseStringUTFChars(j_str, str); 170 | return static_cast(res); 171 | } 172 | 173 | struct PartialMatchCOp { 174 | const char *str_; 175 | const RE2 *pattern_; 176 | 177 | PartialMatchCOp(const char *str, const RE2 *pattern) 178 | : str_(str), 179 | pattern_(pattern) 180 | { } 181 | 182 | bool operator()(const RE2::Arg* const args[], const int n) const { 183 | return RE2::PartialMatchN(str_, *pattern_, args, n); 184 | } 185 | }; 186 | 187 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_partialMatchImpl__Ljava_lang_String_2J_3Ljava_lang_Object_2 188 | (JNIEnv *env, jclass cls, jstring j_str, jlong j_pointer, jobjectArray j_args) { 189 | const char *str = env->GetStringUTFChars(j_str, 0); 190 | RE2 *pointer = reinterpret_cast(j_pointer); 191 | const bool res = is_empty_arr(env, j_args) ? RE2::PartialMatch(str, *pointer) : do_op(env, PartialMatchCOp(str, pointer), j_args); 192 | env->ReleaseStringUTFChars(j_str, str); 193 | return static_cast(res); 194 | } 195 | 196 | struct FullMatchOp { 197 | const char *str_; 198 | const char *pattern_; 199 | 200 | FullMatchOp(const char *str, const char *pattern) 201 | : str_(str), 202 | pattern_(pattern) 203 | { } 204 | 205 | bool operator()(const RE2::Arg* const args[], const int n) const { 206 | return RE2::FullMatchN(str_, pattern_, args, n); 207 | } 208 | }; 209 | 210 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_fullMatchImpl__Ljava_lang_String_2Ljava_lang_String_2_3Ljava_lang_Object_2 211 | (JNIEnv *env, jclass cls, jstring j_str, jstring j_pattern, jobjectArray j_args) { 212 | const char *str = env->GetStringUTFChars(j_str, 0); 213 | const char *pattern = env->GetStringUTFChars(j_pattern, 0); 214 | const bool res = is_empty_arr(env, j_args) ? RE2::FullMatch(str, pattern) : do_op(env, FullMatchOp(str, pattern), j_args); 215 | env->ReleaseStringUTFChars(j_str, str); 216 | env->ReleaseStringUTFChars(j_pattern, pattern); 217 | return static_cast(res); 218 | } 219 | 220 | struct PartialMatchOp { 221 | const char *str_; 222 | const char *pattern_; 223 | 224 | PartialMatchOp(const char *str, const char *pattern) 225 | : str_(str), 226 | pattern_(pattern) 227 | { } 228 | 229 | bool operator()(const RE2::Arg* const args[], const int n) const { 230 | return RE2::PartialMatchN(str_, pattern_, args, n); 231 | } 232 | }; 233 | 234 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2_partialMatchImpl__Ljava_lang_String_2Ljava_lang_String_2_3Ljava_lang_Object_2 235 | (JNIEnv *env, jclass cls, jstring j_str, jstring j_pattern, jobjectArray j_args) { 236 | const char *str = env->GetStringUTFChars(j_str, 0); 237 | const char *pattern = env->GetStringUTFChars(j_pattern, 0); 238 | const bool res = is_empty_arr(env, j_args) ? RE2::PartialMatch(str, pattern) : do_op(env, PartialMatchOp(str, pattern), j_args); 239 | env->ReleaseStringUTFChars(j_str, str); 240 | env->ReleaseStringUTFChars(j_pattern, pattern); 241 | return static_cast(res); 242 | } 243 | 244 | JNIEXPORT jobject JNICALL Java_com_logentries_re2_RE2_getCaptureGroupNamesImpl 245 | (JNIEnv *env, jclass cls, jlong j_pointer, jobjectArray j_args) { 246 | RE2 *pointer = reinterpret_cast(j_pointer); 247 | 248 | jclass j_array_list = env->FindClass("java/util/ArrayList"); 249 | if (j_array_list == NULL) return NULL; 250 | 251 | jmethodID arrayListCtor = env->GetMethodID(j_array_list, "", "()V"); 252 | jmethodID add = env->GetMethodID(j_array_list, "add", "(Ljava/lang/Object;)Z"); 253 | jobject java_array_list = env->NewObject(j_array_list, arrayListCtor); 254 | 255 | map groupNames = (pointer->CapturingGroupNames()); 256 | map::iterator it; 257 | 258 | for (it = groupNames.begin(); it != groupNames.end(); ++it) { 259 | jstring jvalue = env->NewStringUTF(it->second.c_str()); 260 | 261 | env->CallObjectMethod(java_array_list, add, jvalue); 262 | }; 263 | 264 | return java_array_list; 265 | } 266 | 267 | JNIEXPORT jint JNICALL Java_com_logentries_re2_RE2_numberOfCapturingGroupsImpl 268 | (JNIEnv *env, jclass cls, jlong re2_pointer) { 269 | 270 | RE2 *regex = reinterpret_cast(re2_pointer); 271 | return static_cast(regex->NumberOfCapturingGroups()); 272 | } 273 | 274 | JNIEXPORT jlong JNICALL Java_com_logentries_re2_RE2String_createStringBuffer 275 | (JNIEnv *env, jclass cls, jbyteArray input) { 276 | // const char *str = env->GetStringUTFChars(input, 0); 277 | char* str = (char*) env->GetByteArrayElements(input, 0); 278 | return reinterpret_cast(str); 279 | } 280 | 281 | 282 | JNIEXPORT void JNICALL Java_com_logentries_re2_RE2String_releaseStringBuffer 283 | (JNIEnv *env, jclass cls, jbyteArray input, jlong j_pointer) { 284 | char *pointer = reinterpret_cast(j_pointer); 285 | env->ReleaseByteArrayElements(input, (jbyte*)pointer, JNI_ABORT); 286 | } 287 | 288 | static const int stackSize = 16 + 1; // see 'kVecSize' in re2.cc 289 | 290 | JNIEXPORT jboolean JNICALL Java_com_logentries_re2_RE2Matcher_findImpl 291 | (JNIEnv *env, jclass cls, jobject matcher, jlong re2_pointer, jlong str_pointer, jint ngroups, jint start, jint end) { 292 | 293 | 294 | RE2 *regex = reinterpret_cast(re2_pointer); 295 | char *str = reinterpret_cast(str_pointer); 296 | 297 | StringPiece* groups; 298 | StringPiece stackgroups[stackSize]; 299 | StringPiece* heapgroups = NULL; 300 | 301 | if (ngroups <= stackSize) { 302 | groups = stackgroups; 303 | } else { 304 | groups = new StringPiece[ngroups]; 305 | heapgroups = groups; 306 | } 307 | 308 | StringPiece text(str); 309 | const bool res = regex->Match(text, start, end, RE2::UNANCHORED, groups, ngroups); 310 | if (res) { 311 | jclass matcher_class = env->FindClass("com/logentries/re2/RE2Matcher"); 312 | jmethodID addID = env->GetStaticMethodID(matcher_class, "addGroup", "(Lcom/logentries/re2/RE2Matcher;II)V"); 313 | for (int i=0; iCallStaticObjectMethod( 316 | matcher_class, 317 | addID, 318 | matcher, 319 | static_cast(groups[i].data() - str), 320 | static_cast(groups[i].data() - str + groups[i].size()) 321 | ); 322 | } else { 323 | env->CallStaticObjectMethod(matcher_class, addID, 324 | matcher, static_cast(-1), static_cast(-1)); 325 | } 326 | } 327 | } 328 | 329 | delete[] heapgroups; 330 | return static_cast(res); 331 | } --------------------------------------------------------------------------------