├── gradle.properties ├── .travis.yml ├── settings.gradle ├── gradle ├── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties └── sonatype.gradle ├── TODO.md ├── src ├── main │ ├── resources │ │ └── patterns │ │ │ ├── java │ │ │ ├── ruby │ │ │ ├── linux-syslog │ │ │ ├── firewalls │ │ │ ├── haproxy │ │ │ ├── patterns │ │ │ ├── nagios │ │ │ └── postfix │ └── java │ │ └── io │ │ └── krakens │ │ └── grok │ │ └── api │ │ ├── exception │ │ └── GrokException.java │ │ ├── GrokUtils.java │ │ ├── Discovery.java │ │ ├── Converter.java │ │ ├── Grok.java │ │ ├── Match.java │ │ └── GrokCompiler.java └── test │ └── java │ └── io │ └── krakens │ └── grok │ └── api │ ├── ResourceManager.java │ ├── MessagesTest.java │ ├── GrokDocumentationTest.java │ ├── ApacheTest.java │ ├── GrokListTest.java │ ├── ApacheDataTypeTest.java │ ├── BasicTest.java │ ├── CaptureTest.java │ └── GrokTest.java ├── LICENSE ├── .gitignore ├── README.md ├── gradlew.bat ├── gradlew └── extra └── checkstyle ├── checkstyle.xml └── intellij-style.xml /gradle.properties: -------------------------------------------------------------------------------- 1 | version=0.1.10-SNAPSHOT 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | jdk: 3 | - oraclejdk8 4 | dist: trusty -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = 'java-grok' 2 | version = "0.1.9-SNAPSHOT" 3 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thekrakken/java-grok/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # List of Todo 2 | 3 | * Add to Grok the multiline 4 | * Give to Grok a purpose; Grok as a Program. Will inject data, process and save it (via configuration) *Must define standart* 5 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-4.6-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /src/main/resources/patterns/java: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | JAVACLASS (?:[a-zA-Z0-9-]+\.)+[A-Za-z0-9$]+ 3 | JAVAFILE (?:[A-Za-z0-9_.-]+) 4 | JAVASTACKTRACEPART at %{JAVACLASS:class}\.%{WORD:method}\(%{JAVAFILE:file}:%{NUMBER:line}\) 5 | -------------------------------------------------------------------------------- /src/main/resources/patterns/ruby: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO) 3 | RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- +%{DATA:progname}: %{GREEDYDATA:message} 4 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/ResourceManager.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | /** 4 | * {@code ResourceManager} . 5 | */ 6 | public final class ResourceManager { 7 | 8 | public static final String PATTERNS = "patterns/patterns"; 9 | 10 | public static final String MESSAGES = "message/messages"; 11 | 12 | public static final String NASA = "nasa/"; 13 | 14 | public static final String IP = "ip"; 15 | } 16 | -------------------------------------------------------------------------------- /src/main/resources/patterns/linux-syslog: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: 3 | SYSLOGPAMSESSION %{SYSLOGBASE} (?=%{GREEDYDATA:message})%{WORD:pam_module}\(%{DATA:pam_caller}\): session %{WORD:pam_session_state} for user %{USERNAME:username}(?: by %{GREEDYDATA:pam_by})? 4 | 5 | CRON_ACTION [A-Z ]+ 6 | CRONLOG %{SYSLOGBASE} \(%{USER:user}\) %{CRON_ACTION:action} \(%{DATA:message}\) 7 | 8 | SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014 Anthony Corbacho, and contributors. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /src/main/resources/patterns/firewalls: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | # NetScreen firewall logs 3 | NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:date} %{IPORHOST:device} %{IPORHOST}: NetScreen device_id=%{WORD:device_id}%{DATA}: start_time=%{QUOTEDSTRING:start_time} duration=%{INT:duration} policy_id=%{INT:policy_id} service=%{DATA:service} proto=%{INT:proto} src zone=%{WORD:src_zone} dst zone=%{WORD:dst_zone} action=%{WORD:action} sent=%{INT:sent} rcvd=%{INT:rcvd} src=%{IPORHOST:src_ip} dst=%{IPORHOST:dst_ip} src_port=%{INT:src_port} dst_port=%{INT:dst_port} src-xlated ip=%{IPORHOST:src_xlated_ip} port=%{INT:src_xlated_port} dst-xlated ip=%{IPORHOST:dst_xlated_ip} port=%{INT:dst_xlated_port} session_id=%{INT:session_id} reason=%{GREEDYDATA:reason} 4 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/MessagesTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static org.junit.Assert.assertNotNull; 4 | 5 | import java.io.BufferedReader; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | 10 | import io.krakens.grok.api.exception.GrokException; 11 | 12 | import com.google.common.io.Resources; 13 | import org.assertj.core.api.Assertions; 14 | import org.junit.Test; 15 | 16 | 17 | public class MessagesTest { 18 | 19 | @Test 20 | public void test001_linux_messages() throws GrokException, IOException { 21 | GrokCompiler compiler = GrokCompiler.newInstance(); 22 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 23 | 24 | Grok grok = compiler.compile("%{MESSAGESLOG}"); 25 | 26 | BufferedReader br = new BufferedReader(new FileReader(Resources.getResource(ResourceManager.MESSAGES).getFile())); 27 | String line; 28 | System.out.println("Starting test with linux messages log -- may take a while"); 29 | while ((line = br.readLine()) != null) { 30 | Match gm = grok.match(line); 31 | Map map = gm.capture(); 32 | assertNotNull(map); 33 | Assertions.assertThat(map).doesNotContainKey("Error"); 34 | } 35 | br.close(); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/exception/GrokException.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api.exception; 2 | 3 | /** 4 | * Signals that an {@code Grok} exception of some sort has occurred. 5 | * This class is the general class of 6 | * exceptions produced by failed or interrupted Grok operations. 7 | * 8 | * @since 0.0.4 9 | */ 10 | public class GrokException extends RuntimeException { 11 | 12 | private static final long serialVersionUID = 1L; 13 | 14 | /** 15 | * Creates a new GrokException. 16 | */ 17 | public GrokException() { 18 | super(); 19 | } 20 | 21 | /** 22 | * Constructs a new GrokException. 23 | * 24 | * @param message the reason for the exception 25 | * @param cause the underlying Throwable that caused this exception to be thrown. 26 | */ 27 | public GrokException(String message, Throwable cause) { 28 | super(message, cause); 29 | } 30 | 31 | /** 32 | * Constructs a new GrokException. 33 | * 34 | * @param message the reason for the exception 35 | */ 36 | public GrokException(String message) { 37 | super(message); 38 | } 39 | 40 | /** 41 | * Constructs a new GrokException. 42 | * 43 | * @param cause the underlying Throwable that caused this exception to be thrown. 44 | */ 45 | public GrokException(Throwable cause) { 46 | super(cause); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/GrokUtils.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import java.util.LinkedHashMap; 4 | import java.util.LinkedHashSet; 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.regex.Matcher; 8 | import java.util.regex.Pattern; 9 | 10 | 11 | /** 12 | * {@code GrokUtils} contain set of useful tools or methods. 13 | * 14 | * @since 0.0.6 15 | */ 16 | public class GrokUtils { 17 | 18 | /** 19 | * Extract Grok patter like %{FOO} to FOO, Also Grok pattern with semantic. 20 | */ 21 | public static final Pattern GROK_PATTERN = Pattern.compile( 22 | "%\\{" 23 | + "(?" 24 | + "(?[A-z0-9]+)" 25 | + "(?::(?[A-z0-9_:;,\\-\\/\\s\\.']+))?" 26 | + ")" 27 | + "(?:=(?" 28 | + "(?:" 29 | + "(?:[^{}]+|\\.+)+" 30 | + ")+" 31 | + ")" 32 | + ")?" 33 | + "\\}"); 34 | 35 | public static final Pattern NAMED_REGEX = Pattern 36 | .compile("\\(\\?<([a-zA-Z][a-zA-Z0-9]*)>"); 37 | 38 | public static Set getNameGroups(String regex) { 39 | Set namedGroups = new LinkedHashSet<>(); 40 | Matcher matcher = NAMED_REGEX.matcher(regex); 41 | while (matcher.find()) { 42 | namedGroups.add(matcher.group(1)); 43 | } 44 | return namedGroups; 45 | } 46 | 47 | public static Map namedGroups(Matcher matcher, Set groupNames) { 48 | Map namedGroups = new LinkedHashMap<>(); 49 | for (String groupName : groupNames) { 50 | String groupValue = matcher.group(groupName); 51 | namedGroups.put(groupName, groupValue); 52 | } 53 | return namedGroups; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /gradle/sonatype.gradle: -------------------------------------------------------------------------------- 1 | apply { 2 | plugin "maven" 3 | plugin "signing" 4 | } 5 | 6 | artifacts { 7 | archives javadocJar, sourcesJar 8 | } 9 | 10 | ext.isReleaseVersion = !version.endsWith("SNAPSHOT") 11 | signing { 12 | required { isReleaseVersion } 13 | sign configurations.archives 14 | } 15 | 16 | uploadArchives { 17 | repositories { 18 | mavenDeployer { 19 | beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } 20 | 21 | repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") { 22 | authentication(userName: ossrhUsername, password: ossrhPassword) 23 | } 24 | 25 | snapshotRepository(url: "https://oss.sonatype.org/content/repositories/snapshots/") { 26 | authentication(userName: ossrhUsername, password: ossrhPassword) 27 | } 28 | 29 | pom.project { 30 | name 'Java Grok' 31 | packaging 'jar' 32 | description 'Java Grok is simple API that allows you to easily parse logs and other files (single line). With Java Grok, you can turn unstructured log and event data into structured data (JSON).' 33 | url 'https://github.com/thekrakken/java-grok' 34 | 35 | scm { 36 | connection 'scm:git:git@github.com:thekrakken/java-grok.git' 37 | developerConnection 'scm:git:git@github.com:thekrakken/java-grok.git' 38 | url 'scm:git:git@github.com:thekrakken/java-grok.git' 39 | } 40 | 41 | licenses { 42 | license { 43 | name 'The Apache License, Version 2.0' 44 | url 'http://www.apache.org/licenses/LICENSE-2.0.txt' 45 | } 46 | } 47 | 48 | developers { 49 | developer { 50 | id 'anthony-corbacho' 51 | name 'Anthony Corbacho' 52 | email 'manfred@sonatype.com' 53 | } 54 | } 55 | } 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/GrokDocumentationTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import java.util.HashSet; 4 | import java.util.Map; 5 | 6 | import org.assertj.core.api.Assertions; 7 | import org.junit.Test; 8 | import static org.hamcrest.Matchers.containsInAnyOrder; 9 | import static org.hamcrest.Matchers.hasItem; 10 | import static org.junit.Assert.*; 11 | 12 | import org.assertj.core.util.Arrays; 13 | 14 | public class GrokDocumentationTest { 15 | 16 | @Test 17 | public void assureCodeInReadmeWorks() { 18 | /* Create a new grokCompiler instance */ 19 | GrokCompiler grokCompiler = GrokCompiler.newInstance(); 20 | grokCompiler.registerDefaultPatterns(); 21 | 22 | /* Grok pattern to compile, here httpd logs */ 23 | final Grok grok = grokCompiler.compile("%{COMBINEDAPACHELOG}"); 24 | 25 | /* Line of log to match */ 26 | String log = "112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET / HTTP/1.1\" 200 44346 \"-\" " 27 | + "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) " 28 | + "Chrome/25.0.1364.152 Safari/537.22\""; 29 | 30 | Match gm = grok.match(log); 31 | 32 | /* Get the map with matches */ 33 | final Map capture = gm.capture(); 34 | 35 | Assertions.assertThat(capture).hasSize(22); 36 | final boolean debug = false; 37 | final Object[] keywordArray = new Object[] { "COMBINEDAPACHELOG", 38 | "COMMONAPACHELOG", "clientip", "ident", "auth", "timestamp", "MONTHDAY", 39 | "MONTH", "YEAR", "TIME", "HOUR", "MINUTE", "SECOND", "INT", "verb", 40 | "httpversion", "rawrequest", "request", "response", "bytes", "referrer", 41 | "agent" }; 42 | if (debug) 43 | capture.keySet().stream().forEach(System.err::println); 44 | assertTrue(new HashSet(Arrays.asList(keywordArray)) 45 | .containsAll(new HashSet(capture.keySet()))); 46 | 47 | Arrays.asList(keywordArray).stream() 48 | .forEach(o -> assertThat(capture.keySet(), hasItem((String) o))); 49 | assertThat(new HashSet(capture.keySet()), 50 | containsInAnyOrder(keywordArray)); 51 | assertTrue(new HashSet(capture.keySet()) 52 | .containsAll(new HashSet(Arrays.asList(keywordArray)))); 53 | 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Package Files # 4 | *.jar 5 | !gradle-wrapper.jar 6 | *.war 7 | *.ear 8 | 9 | # Logs and databases # 10 | ###################### 11 | *.log 12 | *.sql 13 | *.sqlite 14 | 15 | # OS generated files # 16 | ###################### 17 | .DS_Store 18 | .DS_Store? 19 | ._* 20 | .Spotlight-V100 21 | .Trashes 22 | Icon? 23 | ehthumbs.db 24 | Thumbs.db 25 | 26 | 27 | # Gradle 28 | .gradle 29 | build/ 30 | bin/ 31 | 32 | target/ 33 | src/main/java/com/nflabs/Grok/App.java 34 | 35 | *~ 36 | \#*\# 37 | /.emacs.desktop 38 | /.emacs.desktop.lock 39 | .elc 40 | auto-save-list 41 | tramp 42 | .\#* 43 | 44 | # Org-mode 45 | .org-id-locations 46 | *_archive 47 | 48 | *.pydevproject 49 | .project 50 | .metadata 51 | bin/** 52 | tmp/** 53 | tmp/**/* 54 | *.tmp 55 | *.bak 56 | *.swp 57 | *~.nib 58 | local.properties 59 | .classpath 60 | .settings/ 61 | .loadpath 62 | 63 | # External tool builders 64 | .externalToolBuilders/ 65 | 66 | # Locally stored "Eclipse launch configurations" 67 | *.launch 68 | 69 | # CDT-specific 70 | .cproject 71 | 72 | # PDT-specific 73 | .buildpath 74 | /target 75 | 76 | ### JetBrains template 77 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm 78 | 79 | *.iml 80 | 81 | ## Directory-based project format: 82 | .idea/ 83 | # if you remove the above rule, at least ignore the following: 84 | 85 | # User-specific stuff: 86 | # .idea/workspace.xml 87 | # .idea/tasks.xml 88 | # .idea/dictionaries 89 | 90 | # Sensitive or high-churn files: 91 | # .idea/dataSources.ids 92 | # .idea/dataSources.xml 93 | # .idea/sqlDataSources.xml 94 | # .idea/dynamic.xml 95 | # .idea/uiDesigner.xml 96 | 97 | # Gradle: 98 | # .idea/gradle.xml 99 | # .idea/libraries 100 | 101 | # Mongo Explorer plugin: 102 | # .idea/mongoSettings.xml 103 | 104 | ## File-based project format: 105 | *.ipr 106 | *.iws 107 | 108 | ## Plugin-specific files: 109 | 110 | # IntelliJ 111 | out/ 112 | 113 | # mpeltonen/sbt-idea plugin 114 | .idea_modules/ 115 | 116 | # JIRA plugin 117 | atlassian-ide-plugin.xml 118 | 119 | # Crashlytics plugin (for Android Studio and IntelliJ) 120 | com_crashlytics_export_strings.xml 121 | crashlytics.properties 122 | crashlytics-build.properties 123 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/ApacheTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | 4 | import java.io.BufferedReader; 5 | import java.io.File; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.Map; 9 | 10 | import io.krakens.grok.api.exception.GrokException; 11 | 12 | import com.google.common.io.Resources; 13 | import org.assertj.core.api.Assertions; 14 | import org.junit.Before; 15 | import org.junit.FixMethodOrder; 16 | import org.junit.Test; 17 | import org.junit.runners.MethodSorters; 18 | 19 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 20 | public class ApacheTest { 21 | 22 | public static final String LOG_FILE = "src/test/resources/access_log"; 23 | public static final String LOG_DIR_NASA = "src/test/resources/nasa/"; 24 | 25 | private GrokCompiler compiler; 26 | 27 | @Before 28 | public void setup() throws Exception { 29 | compiler = GrokCompiler.newInstance(); 30 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 31 | } 32 | 33 | @Test 34 | public void test001_httpd_access() throws GrokException, IOException { 35 | Grok grok = compiler.compile("%{COMMONAPACHELOG}"); 36 | 37 | BufferedReader br = new BufferedReader(new FileReader(LOG_FILE)); 38 | String line; 39 | System.out.println("Starting test with httpd log"); 40 | while ((line = br.readLine()) != null) { 41 | Match gm = grok.match(line); 42 | final Map capture = gm.capture(); 43 | Assertions.assertThat(capture).doesNotContainKey("Error"); 44 | } 45 | br.close(); 46 | } 47 | 48 | @Test 49 | public void test002_nasa_httpd_access() throws GrokException, IOException { 50 | Grok grok = compiler.compile("%{COMMONAPACHELOG}"); 51 | System.out.println("Starting test with nasa log -- may take a while"); 52 | BufferedReader br; 53 | String line; 54 | File dir = new File(LOG_DIR_NASA); 55 | for (File child : dir.listFiles()) { 56 | br = new BufferedReader(new FileReader(LOG_DIR_NASA + child.getName())); 57 | while ((line = br.readLine()) != null) { 58 | Match gm = grok.match(line); 59 | final Map capture = gm.capture(); 60 | Assertions.assertThat(capture).doesNotContainKey("Error"); 61 | } 62 | br.close(); 63 | } 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/GrokListTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertNotNull; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | import io.krakens.grok.api.exception.GrokException; 11 | 12 | import com.google.common.io.Resources; 13 | import org.junit.Before; 14 | import org.junit.FixMethodOrder; 15 | import org.junit.Test; 16 | import org.junit.runners.MethodSorters; 17 | 18 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 19 | public class GrokListTest { 20 | 21 | GrokCompiler compiler; 22 | 23 | @Before 24 | public void setUp() throws Exception { 25 | compiler = GrokCompiler.newInstance(); 26 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 27 | } 28 | 29 | @Test 30 | public void test_001() throws GrokException { 31 | List logs = new ArrayList<>(); 32 | 33 | logs.add("178.21.82.201"); 34 | logs.add("11.178.94.216"); 35 | logs.add("238.222.236.81"); 36 | logs.add("231.49.38.155"); 37 | logs.add("206.0.116.17"); 38 | logs.add("191.199.247.47"); 39 | logs.add("43.131.249.156"); 40 | logs.add("170.36.40.12"); 41 | logs.add("124.2.84.36"); 42 | 43 | Grok grok = compiler.compile("%{IP}"); 44 | ArrayList> capture = grok.capture(logs); 45 | assertNotNull(capture); 46 | int counter = 0; 47 | for (Map elem : capture) { 48 | assertNotNull(elem); 49 | assertEquals(elem, grok.capture(logs.get(counter))); 50 | counter++; 51 | } 52 | } 53 | 54 | @Test 55 | public void test_002() throws GrokException { 56 | List logs = new ArrayList<>(); 57 | 58 | logs.add("178.21.82.201"); 59 | logs.add("11.178.94.216"); 60 | logs.add(""); 61 | logs.add("231.49.38.155"); 62 | logs.add("206.0.116.17"); 63 | logs.add("191.199.247.47"); 64 | logs.add("43.131.249.156"); 65 | logs.add("170.36.40.12"); 66 | logs.add("124.2.84.36"); 67 | 68 | Grok grok = compiler.compile("%{IP}"); 69 | ArrayList> capture = grok.capture(logs); 70 | assertNotNull(capture); 71 | int counter = 0; 72 | for (Map elem : capture) { 73 | assertNotNull(elem); 74 | assertEquals(elem, grok.capture(logs.get(counter))); 75 | counter++; 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Grok 2 | [![Build Status](https://secure.travis-ci.org/thekrakken/java-grok.png?branch=master)](https://travis-ci.org/thekrakken/java-grok) 3 | 4 | Java Grok is simple API that allows you to easily parse logs and other files (single line). With Java Grok, you can turn unstructured log and event data into structured data (JSON). 5 | 6 | ----------------------- 7 | 8 | ### What can I use Grok for? 9 | * reporting errors and other patterns from logs and processes 10 | * parsing complex text output and converting it to json for external processing 11 | * apply 'write-once use-everywhere' to regular expressions 12 | * automatically providing patterns for unknown text inputs (logs you want patterns generated for future matching) 13 | 14 | ### Maven repository 15 | 16 | ```maven 17 | 18 | io.krakens 19 | java-grok 20 | 0.1.9 21 | 22 | ``` 23 | 24 | Or with gradle 25 | 26 | ```gradle 27 | compile "io.krakens:java-grok:0.1.9" 28 | ``` 29 | 30 | Old release ([Link](https://mvnrepository.com/artifact/io.thekraken/grok)) 31 | 32 | ### Usage ([Grok java documentation](http://grok.nflabs.com/javadoc)) 33 | Example of how to use java-grok: 34 | 35 | ```java 36 | /* Create a new grokCompiler instance */ 37 | GrokCompiler grokCompiler = GrokCompiler.newInstance(); 38 | grokCompiler.registerDefaultPatterns(); 39 | 40 | /* Grok pattern to compile, here httpd logs */ 41 | final Grok grok = grokCompiler.compile("%{COMBINEDAPACHELOG}"); 42 | 43 | /* Line of log to match */ 44 | String log = "112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET / HTTP/1.1\" 200 44346 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.152 Safari/537.22\""; 45 | 46 | Match gm = grok.match(log); 47 | 48 | /* Get the map with matches */ 49 | final Map capture = gm.capture(); 50 | ``` 51 | 52 | ### Build Java Grok 53 | 54 | Java Grok support Gradle: `./gradlew assemble` 55 | 56 | ### Getting help 57 | [Mailling List](https://groups.google.com/forum/#!forum/java-grok) 58 | 59 | ### Thanks to 60 | * [@joschi](https://github.com/joschi) 61 | * [@keitaf](https://github.com/keitaf) 62 | * [@anthonycorbacho](https://github.com/anthonycorbacho) 63 | * [@nokk](https://github.com/nokk) 64 | * [@wouterdb](https://github.com/wouterdb) 65 | * [@Leemoonsoo](https://github.com/Leemoonsoo) 66 | 67 | **Any contributions are warmly welcome** 68 | 69 | Grok is inspired by the logstash inteceptor or filter available [here](http://logstash.net/docs/1.4.1/filters/grok) 70 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /src/main/resources/patterns/haproxy: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | ## These patterns were tested w/ haproxy-1.4.15 3 | 4 | ## Documentation of the haproxy log formats can be found at the following links: 5 | ## http://code.google.com/p/haproxy-docs/wiki/HTTPLogFormat 6 | ## http://code.google.com/p/haproxy-docs/wiki/TCPLogFormat 7 | 8 | HAPROXYTIME (?!<[0-9])%{HOUR:haproxy_hour}:%{MINUTE:haproxy_minute}(?::%{SECOND:haproxy_second})(?![0-9]) 9 | HAPROXYDATE %{MONTHDAY:haproxy_monthday}/%{MONTH:haproxy_month}/%{YEAR:haproxy_year}:%{HAPROXYTIME:haproxy_time}.%{INT:haproxy_milliseconds} 10 | 11 | # Override these default patterns to parse out what is captured in your haproxy.cfg 12 | HAPROXYCAPTUREDREQUESTHEADERS %{DATA:captured_request_headers} 13 | HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:captured_response_headers} 14 | 15 | # Example: 16 | # These haproxy config lines will add data to the logs that are captured 17 | # by the patterns below. Place them in your custom patterns directory to 18 | # override the defaults. 19 | # 20 | # capture request header Host len 40 21 | # capture request header X-Forwarded-For len 50 22 | # capture request header Accept-Language len 50 23 | # capture request header Referer len 200 24 | # capture request header User-Agent len 200 25 | # 26 | # capture response header Content-Type len 30 27 | # capture response header Content-Encoding len 10 28 | # capture response header Cache-Control len 200 29 | # capture response header Last-Modified len 200 30 | # 31 | # HAPROXYCAPTUREDREQUESTHEADERS %{DATA:request_header_host}\|%{DATA:request_header_x_forwarded_for}\|%{DATA:request_header_accept_language}\|%{DATA:request_header_referer}\|%{DATA:request_header_user_agent} 32 | # HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:response_header_content_type}\|%{DATA:response_header_content_encoding}\|%{DATA:response_header_cache_control}\|%{DATA:response_header_last_modified} 33 | 34 | # parse a haproxy 'httplog' line 35 | HAPROXYHTTP %{SYSLOGTIMESTAMP:syslog_timestamp} %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_request}/%{INT:time_queue}/%{INT:time_backend_connect}/%{INT:time_backend_response}/%{NOTSPACE:time_duration} %{INT:http_status_code} %{NOTSPACE:bytes_read} %{DATA:captured_request_cookie} %{DATA:captured_response_cookie} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} (\{%{HAPROXYCAPTUREDREQUESTHEADERS}\})?( )?(\{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?( )?"%{WORD:http_verb} %{URIPATHPARAM:http_request}( HTTP/%{NUMBER:http_version}")? 36 | 37 | # parse a haproxy 'tcplog' line 38 | HAPROXYTCP %{SYSLOGTIMESTAMP:syslog_timestamp} %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_queue}/%{INT:time_backend_connect}/%{NOTSPACE:time_duration} %{NOTSPACE:bytes_read} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} 39 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/ApacheDataTypeTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | 4 | import static org.junit.Assert.assertEquals; 5 | 6 | import java.time.Instant; 7 | import java.time.ZoneOffset; 8 | import java.time.ZonedDateTime; 9 | import java.util.Locale; 10 | import java.util.Map; 11 | 12 | import io.krakens.grok.api.exception.GrokException; 13 | 14 | import com.google.common.io.Resources; 15 | import org.assertj.core.api.Assertions; 16 | import org.junit.Before; 17 | import org.junit.FixMethodOrder; 18 | import org.junit.Test; 19 | import org.junit.runners.MethodSorters; 20 | 21 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 22 | public class ApacheDataTypeTest { 23 | 24 | static { 25 | Locale.setDefault(Locale.ROOT); 26 | } 27 | 28 | private final String line = 29 | "64.242.88.10 - - [07/Mar/2004:16:45:56 -0800] \"GET /twiki/bin/attach/Main/PostfixCommands HTTP/1.1\" 401 12846"; 30 | 31 | private GrokCompiler compiler; 32 | 33 | @Before 34 | public void setup() throws Exception { 35 | compiler = GrokCompiler.newInstance(); 36 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 37 | } 38 | 39 | @Test 40 | public void test002_httpd_access_semi() throws GrokException { 41 | Grok grok = compiler.compile( 42 | "%{IPORHOST:clientip} %{USER:ident;boolean} %{USER:auth} " 43 | + "\\[%{HTTPDATE:timestamp;date;dd/MMM/yyyy:HH:mm:ss Z}\\] \"(?:%{WORD:verb;string} %{NOTSPACE:request}" 44 | + "(?: HTTP/%{NUMBER:httpversion;float})?|%{DATA:rawrequest})\" %{NUMBER:response;int} " 45 | + "(?:%{NUMBER:bytes;long}|-)"); 46 | 47 | System.out.println(line); 48 | Match gm = grok.match(line); 49 | Map map = gm.capture(); 50 | 51 | Assertions.assertThat(map).doesNotContainKey("Error"); 52 | Instant ts = ZonedDateTime.of(2004, 3, 7, 16, 45, 56, 0, ZoneOffset.ofHours(-8)).toInstant(); 53 | assertEquals(map.get("timestamp"), ts); 54 | assertEquals(map.get("response"), 401); 55 | assertEquals(map.get("ident"), Boolean.FALSE); 56 | assertEquals(map.get("httpversion"), 1.1f); 57 | assertEquals(map.get("bytes"), 12846L); 58 | assertEquals("GET", map.get("verb")); 59 | 60 | } 61 | 62 | @Test 63 | public void test002_httpd_access_colon() throws GrokException { 64 | Grok grok = compiler.compile( 65 | "%{IPORHOST:clientip} %{USER:ident:boolean} %{USER:auth} " 66 | + "\\[%{HTTPDATE:timestamp:date:dd/MMM/yyyy:HH:mm:ss Z}\\] \"(?:%{WORD:verb:string} %{NOTSPACE:request}" 67 | + "(?: HTTP/%{NUMBER:httpversion:float})?|%{DATA:rawrequest})\" %{NUMBER:response:int} " 68 | + "(?:%{NUMBER:bytes:long}|-)"); 69 | 70 | Match gm = grok.match(line); 71 | Map map = gm.capture(); 72 | 73 | Assertions.assertThat(map).doesNotContainKey("Error"); 74 | 75 | Instant ts = ZonedDateTime.of(2004, 3, 7, 16, 45, 56, 0, ZoneOffset.ofHours(-8)).toInstant(); 76 | assertEquals(map.get("timestamp"), ts); 77 | assertEquals(map.get("response"), 401); 78 | assertEquals(map.get("ident"), Boolean.FALSE); 79 | assertEquals(map.get("httpversion"), 1.1f); 80 | assertEquals(map.get("bytes"), 12846L); 81 | assertEquals("GET", map.get("verb")); 82 | 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/BasicTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static junit.framework.TestCase.assertTrue; 4 | import static org.junit.Assert.assertEquals; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.File; 8 | import java.io.FileInputStream; 9 | import java.io.FileOutputStream; 10 | import java.io.FileWriter; 11 | import java.io.IOException; 12 | import java.io.OutputStreamWriter; 13 | import java.io.Reader; 14 | import java.io.StringReader; 15 | import java.nio.charset.StandardCharsets; 16 | import java.util.ArrayList; 17 | import java.util.List; 18 | import java.util.regex.PatternSyntaxException; 19 | 20 | import io.krakens.grok.api.exception.GrokException; 21 | 22 | import com.google.common.io.Resources; 23 | import org.junit.Before; 24 | import org.junit.FixMethodOrder; 25 | import org.junit.Rule; 26 | import org.junit.Test; 27 | import org.junit.rules.TemporaryFolder; 28 | import org.junit.runners.MethodSorters; 29 | 30 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 31 | public class BasicTest { 32 | 33 | @Rule 34 | public TemporaryFolder tempFolder = new TemporaryFolder(); 35 | 36 | private GrokCompiler compiler; 37 | 38 | @Before 39 | public void setup() throws Exception { 40 | compiler = GrokCompiler.newInstance(); 41 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 42 | } 43 | 44 | @Test 45 | public void test001_compileFailOnInvalidExpression() throws GrokException { 46 | List badRegxp = new ArrayList<>(); 47 | badRegxp.add("["); 48 | badRegxp.add("[foo"); 49 | badRegxp.add("?"); 50 | badRegxp.add("foo????"); 51 | badRegxp.add("(?-"); 52 | 53 | boolean thrown = false; 54 | 55 | /** This should always throw */ 56 | for (String regx : badRegxp) { 57 | try { 58 | compiler.compile(regx); 59 | } catch (PatternSyntaxException e) { 60 | thrown = true; 61 | } 62 | assertTrue(thrown); 63 | thrown = false; 64 | } 65 | } 66 | 67 | @Test 68 | public void test002_compileSuccessValidExpression() throws GrokException { 69 | List regxp = new ArrayList<>(); 70 | regxp.add("[hello]"); 71 | regxp.add("(test)"); 72 | regxp.add("(?:hello)"); 73 | regxp.add("(?=testing)"); 74 | 75 | for (String regx : regxp) { 76 | compiler.compile(regx); 77 | } 78 | } 79 | 80 | @Test 81 | public void test003_samePattern() throws GrokException { 82 | String pattern = "Hello World"; 83 | Grok grok = compiler.compile(pattern); 84 | assertEquals(pattern, grok.getOriginalGrokPattern()); 85 | } 86 | 87 | @Test 88 | public void test004_sameExpantedPatern() throws GrokException { 89 | compiler.register("test", "hello world"); 90 | Grok grok = compiler.compile("%{test}"); 91 | assertEquals("(?hello world)", grok.getNamedRegex()); 92 | } 93 | 94 | @Test 95 | public void test005_testLoadPatternFromFile() throws IOException, GrokException { 96 | File temp = tempFolder.newFile("grok-tmp-pattern"); 97 | try (BufferedWriter bw = new BufferedWriter(new FileWriter(temp))) { 98 | bw.write("TEST \\d+"); 99 | } 100 | 101 | GrokCompiler compiler = GrokCompiler.newInstance(); 102 | compiler.register(new FileInputStream(temp)); 103 | Grok grok = compiler.compile("%{TEST}"); 104 | assertEquals("(?\\d+)", grok.getNamedRegex()); 105 | } 106 | 107 | @Test 108 | public void test006_testLoadPatternFromFileIso_8859_1() throws IOException, GrokException { 109 | File temp = tempFolder.newFile("grok-tmp-pattern"); 110 | try (FileOutputStream fis = new FileOutputStream(temp); 111 | BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fis, StandardCharsets.ISO_8859_1))) { 112 | bw.write("TEST §"); 113 | } 114 | 115 | GrokCompiler compiler = GrokCompiler.newInstance(); 116 | compiler.register(new FileInputStream(temp), StandardCharsets.ISO_8859_1); 117 | Grok grok = compiler.compile("%{TEST}"); 118 | assertEquals("(?§)", grok.getNamedRegex()); 119 | } 120 | 121 | @Test 122 | public void test007_testLoadPatternFromReader() throws IOException, GrokException { 123 | Reader reader = new StringReader("TEST €"); 124 | GrokCompiler compiler = GrokCompiler.newInstance(); 125 | compiler.register(reader); 126 | Grok grok = compiler.compile("%{TEST}"); 127 | assertEquals("(?€)", grok.getNamedRegex()); 128 | } 129 | 130 | } 131 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/Discovery.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.LinkedHashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Map.Entry; 10 | import java.util.TreeMap; 11 | import java.util.regex.Matcher; 12 | import java.util.regex.Pattern; 13 | 14 | import org.apache.commons.lang3.StringUtils; 15 | 16 | 17 | /** 18 | * {@code Discovery} try to find the best pattern for the given string. 19 | * 20 | * @since 0.0.2 21 | */ 22 | public class Discovery { 23 | 24 | private Grok grok; 25 | 26 | /** 27 | * Create a new {@code Discovery} object. 28 | * 29 | * @param grok instance of grok 30 | */ 31 | public Discovery(Grok grok) { 32 | this.grok = grok; 33 | } 34 | 35 | /** 36 | * Sort by regex complexity. 37 | * 38 | * @param groks Map of the pattern name and grok instance 39 | * @return the map sorted by grok pattern complexity 40 | */ 41 | private Map sort(Map groks) { 42 | 43 | List groky = new ArrayList(groks.values()); 44 | Map grokMap = new LinkedHashMap(); 45 | Collections.sort(groky, new Comparator() { 46 | public int compare(Grok g1, Grok g2) { 47 | return (this.complexity(g1.getNamedRegex()) < this.complexity(g2.getNamedRegex())) ? 1 : 0; 48 | } 49 | 50 | private int complexity(String expandedPattern) { 51 | int score = 0; 52 | score += expandedPattern.split("\\Q" + "|" + "\\E", -1).length - 1; 53 | score += expandedPattern.length(); 54 | return score; 55 | } 56 | }); 57 | 58 | for (Grok grok : groky) { 59 | grokMap.put(grok.getSaved_pattern(), grok); 60 | } 61 | return grokMap; 62 | 63 | } 64 | 65 | /** 66 | * Determinate the complexity of the pattern. 67 | * 68 | * @param expandedPattern regex string 69 | * @return the complexity of the regex 70 | */ 71 | private int complexity(String expandedPattern) { 72 | int score = 0; 73 | 74 | score += expandedPattern.split("\\Q" + "|" + "\\E", -1).length - 1; 75 | score += expandedPattern.length(); 76 | 77 | return score; 78 | } 79 | 80 | /** 81 | * Find a pattern from a log. 82 | * 83 | * @param text witch is the representation of your single 84 | * @return Grok pattern %{Foo}... 85 | */ 86 | public String discover(String text) { 87 | if (text == null) { 88 | return ""; 89 | } 90 | 91 | Map groks = new TreeMap(); 92 | Map grokPatterns = grok.getPatterns(); 93 | // Boolean done = false; 94 | String texte = text; 95 | GrokCompiler compiler = GrokCompiler.newInstance(); 96 | compiler.register(grokPatterns); 97 | 98 | // Compile the pattern 99 | for (Entry stringStringEntry : grokPatterns.entrySet()) { 100 | @SuppressWarnings("rawtypes") 101 | Entry pairs = (Entry) stringStringEntry; 102 | String key = pairs.getKey().toString(); 103 | 104 | try { 105 | Grok grok = compiler.compile("%{" + key + "}"); 106 | grok.setSaved_pattern(key); 107 | groks.put(key, grok); 108 | } catch (Exception e) { 109 | // Add logger 110 | } 111 | 112 | } 113 | 114 | // Sort patterns by complexity 115 | Map patterns = this.sort(groks); 116 | 117 | // while (!done){ 118 | // done = true; 119 | for (Entry pairs : patterns.entrySet()) { 120 | String key = pairs.getKey(); 121 | Grok value = pairs.getValue(); 122 | 123 | // We want to search with more complex pattern 124 | // We avoid word, small number, space.... 125 | if (this.complexity(value.getNamedRegex()) < 20) { 126 | continue; 127 | } 128 | 129 | Match match = value.match(text); 130 | if (match.isNull()) { 131 | continue; 132 | } 133 | // get the part of the matched text 134 | String part = getPart(match, text); 135 | 136 | // we skip boundary word 137 | Pattern pattern = Pattern.compile(".\\b."); 138 | Matcher ma = pattern.matcher(part); 139 | if (!ma.find()) { 140 | continue; 141 | } 142 | 143 | // We skip the part that already include %{Foo} 144 | Pattern pattern2 = Pattern.compile("%\\{[^}+]\\}"); 145 | Matcher ma2 = pattern2.matcher(part); 146 | 147 | if (ma2.find()) { 148 | continue; 149 | } 150 | texte = StringUtils.replace(texte, part, "%{" + key + "}"); 151 | } 152 | // } 153 | 154 | return texte; 155 | } 156 | 157 | /** 158 | * Get the substring that match with the text. 159 | * 160 | * @param matcher Grok Match 161 | * @param text text 162 | * @return string 163 | */ 164 | private String getPart(Match matcher, String text) { 165 | 166 | if (matcher == null || text == null) { 167 | return ""; 168 | } 169 | 170 | return text.substring(matcher.getStart(), matcher.getEnd()); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/Converter.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import java.time.Instant; 4 | import java.time.LocalDate; 5 | import java.time.LocalDateTime; 6 | import java.time.OffsetDateTime; 7 | import java.time.ZoneId; 8 | import java.time.ZoneOffset; 9 | import java.time.ZonedDateTime; 10 | import java.time.format.DateTimeFormatter; 11 | import java.time.temporal.TemporalAccessor; 12 | import java.util.AbstractMap; 13 | import java.util.Arrays; 14 | import java.util.Collection; 15 | import java.util.List; 16 | import java.util.Map; 17 | import java.util.function.Function; 18 | import java.util.regex.Pattern; 19 | import java.util.stream.Collectors; 20 | 21 | /** 22 | * Convert String argument to the right type. 23 | * 24 | */ 25 | public class Converter { 26 | 27 | public enum Type { 28 | BYTE(Byte::valueOf), 29 | BOOLEAN(Boolean::valueOf), 30 | SHORT(Short::valueOf), 31 | INT(Integer::valueOf, "integer"), 32 | LONG(Long::valueOf), 33 | FLOAT(Float::valueOf), 34 | DOUBLE(Double::valueOf), 35 | DATETIME(new DateConverter(), "date"), 36 | STRING(v -> v, "text"); 37 | 38 | public final IConverter converter; 39 | public final List aliases; 40 | 41 | Type(IConverter converter, String... aliases) { 42 | this.converter = converter; 43 | this.aliases = Arrays.asList(aliases); 44 | } 45 | } 46 | 47 | private static final Pattern SPLITTER = Pattern.compile("[:;]"); 48 | 49 | private static final Map TYPES = 50 | Arrays.stream(Type.values()) 51 | .collect(Collectors.toMap(t -> t.name().toLowerCase(), t -> t)); 52 | 53 | private static final Map TYPE_ALIASES = 54 | Arrays.stream(Type.values()) 55 | .flatMap(type -> type.aliases.stream().map(alias -> new AbstractMap.SimpleEntry<>(alias, type))) 56 | .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 57 | 58 | private static Type getType(String key) { 59 | key = key.toLowerCase(); 60 | Type type = TYPES.getOrDefault(key, TYPE_ALIASES.get(key)); 61 | if (type == null) { 62 | throw new IllegalArgumentException("Invalid data type :" + key); 63 | } 64 | return type; 65 | } 66 | 67 | public static Map> 68 | getConverters(Collection groupNames, Object... params) { 69 | return groupNames.stream() 70 | .filter(Converter::containsDelimiter) 71 | .collect(Collectors.toMap(Function.identity(), key -> { 72 | String[] list = splitGrokPattern(key); 73 | IConverter converter = getType(list[1]).converter; 74 | if (list.length == 3) { 75 | converter = converter.newConverter(list[2], params); 76 | } 77 | return converter; 78 | })); 79 | } 80 | 81 | public static Map getGroupTypes(Collection groupNames) { 82 | return groupNames.stream() 83 | .filter(Converter::containsDelimiter) 84 | .map(Converter::splitGrokPattern) 85 | .collect(Collectors.toMap( 86 | l -> l[0], 87 | l -> getType(l[1]) 88 | )); 89 | } 90 | 91 | public static String extractKey(String key) { 92 | return splitGrokPattern(key)[0]; 93 | } 94 | 95 | private static boolean containsDelimiter(String string) { 96 | return string.indexOf(':') >= 0 || string.indexOf(';') >= 0; 97 | } 98 | 99 | private static String[] splitGrokPattern(String string) { 100 | return SPLITTER.split(string, 3); 101 | } 102 | 103 | interface IConverter { 104 | 105 | T convert(String value); 106 | 107 | default IConverter newConverter(String param, Object... params) { 108 | return this; 109 | } 110 | } 111 | 112 | 113 | static class DateConverter implements IConverter { 114 | 115 | private final DateTimeFormatter formatter; 116 | private final ZoneId timeZone; 117 | 118 | public DateConverter() { 119 | this.formatter = DateTimeFormatter.ISO_DATE_TIME; 120 | this.timeZone = ZoneOffset.UTC; 121 | } 122 | 123 | private DateConverter(DateTimeFormatter formatter, ZoneId timeZone) { 124 | this.formatter = formatter; 125 | this.timeZone = timeZone; 126 | } 127 | 128 | @Override 129 | public Instant convert(String value) { 130 | TemporalAccessor dt = formatter 131 | .parseBest(value.trim(), ZonedDateTime::from, LocalDateTime::from, OffsetDateTime::from, Instant::from, 132 | LocalDate::from); 133 | if (dt instanceof ZonedDateTime) { 134 | return ((ZonedDateTime) dt).toInstant(); 135 | } else if (dt instanceof LocalDateTime) { 136 | return ((LocalDateTime) dt).atZone(timeZone).toInstant(); 137 | } else if (dt instanceof OffsetDateTime) { 138 | return ((OffsetDateTime) dt).atZoneSameInstant(timeZone).toInstant(); 139 | } else if (dt instanceof Instant) { 140 | return ((Instant) dt); 141 | } else if (dt instanceof LocalDate) { 142 | return ((LocalDate) dt).atStartOfDay(timeZone).toInstant(); 143 | } else { 144 | return null; 145 | } 146 | } 147 | 148 | @Override 149 | public DateConverter newConverter(String param, Object... params) { 150 | if (!(params.length == 1 && params[0] instanceof ZoneId)) { 151 | throw new IllegalArgumentException("Invalid parameters"); 152 | } 153 | return new DateConverter(DateTimeFormatter.ofPattern(param), (ZoneId) params[0]); 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/CaptureTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static org.hamcrest.CoreMatchers.containsString; 4 | import static org.hamcrest.MatcherAssert.assertThat; 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertNull; 7 | import static org.junit.Assert.assertTrue; 8 | import static org.junit.Assert.fail; 9 | 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | import io.krakens.grok.api.exception.GrokException; 14 | 15 | import com.google.common.io.Resources; 16 | import org.junit.Before; 17 | import org.junit.FixMethodOrder; 18 | import org.junit.Test; 19 | import org.junit.runners.MethodSorters; 20 | 21 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 22 | public class CaptureTest { 23 | 24 | GrokCompiler compiler; 25 | 26 | @Before 27 | public void setUp() throws Exception { 28 | compiler = GrokCompiler.newInstance(); 29 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 30 | } 31 | 32 | @Test 33 | public void test001_captureMathod() { 34 | compiler.register("foo", ".*"); 35 | Grok grok = compiler.compile("%{foo}"); 36 | Match match = grok.match("Hello World"); 37 | assertEquals("(?.*)", grok.getNamedRegex()); 38 | assertEquals("Hello World", match.getSubject()); 39 | Map map = match.capture(); 40 | assertEquals(1, map.size()); 41 | assertEquals("Hello World", map.get("foo")); 42 | assertEquals("{foo=Hello World}", map.toString()); 43 | } 44 | 45 | @Test 46 | public void test002_captureMathodMulti() throws GrokException { 47 | compiler.register("foo", ".*"); 48 | compiler.register("bar", ".*"); 49 | Grok grok = compiler.compile("%{foo} %{bar}"); 50 | Match match = grok.match("Hello World"); 51 | assertEquals("(?.*) (?.*)", grok.getNamedRegex()); 52 | assertEquals("Hello World", match.getSubject()); 53 | Map map = match.capture(); 54 | assertEquals(2, map.size()); 55 | assertEquals("Hello", map.get("foo")); 56 | assertEquals("World", map.get("bar")); 57 | assertEquals("{foo=Hello, bar=World}", map.toString()); 58 | } 59 | 60 | @Test 61 | public void test003_captureMathodNasted() throws GrokException { 62 | compiler.register("foo", "\\w+ %{bar}"); 63 | compiler.register("bar", "\\w+"); 64 | Grok grok = compiler.compile("%{foo}"); 65 | Match match = grok.match("Hello World"); 66 | assertEquals("(?\\w+ (?\\w+))", grok.getNamedRegex()); 67 | assertEquals("Hello World", match.getSubject()); 68 | Map map = match.capture(); 69 | assertEquals(2, map.size()); 70 | assertEquals("Hello World", map.get("foo")); 71 | assertEquals("World", map.get("bar")); 72 | assertEquals("{foo=Hello World, bar=World}", map.toString()); 73 | } 74 | 75 | @Test 76 | public void test004_captureNastedRecustion() throws GrokException { 77 | compiler.register("foo", "%{foo}"); 78 | boolean thrown = false; 79 | /** Must raise `Deep recursion pattern` execption */ 80 | try { 81 | compiler.compile("%{foo}"); 82 | } catch (Exception e) { 83 | thrown = true; 84 | } 85 | assertTrue(thrown); 86 | } 87 | 88 | @Test 89 | public void test005_captureSubName() throws GrokException { 90 | String name = "foo"; 91 | String subname = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_abcdef"; 92 | compiler.register(name, "\\w+"); 93 | Grok grok = compiler.compile("%{" + name + ":" + subname + "}"); 94 | Match match = grok.match("Hello"); 95 | Map map = match.capture(); 96 | assertEquals(1, map.size()); 97 | assertEquals("Hello", map.get(subname).toString()); 98 | assertEquals("{abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_abcdef=Hello}", 99 | map.toString()); 100 | } 101 | 102 | @Test 103 | public void test006_captureOnlyNamed() throws GrokException { 104 | compiler.register("abcdef", "[a-zA-Z]+"); 105 | compiler.register("ghijk", "\\d+"); 106 | Grok grok = compiler.compile("%{abcdef:abcdef}%{ghijk}", true); 107 | Match match = grok.match("abcdef12345"); 108 | Map map = match.capture(); 109 | assertEquals(map.size(), 1); 110 | assertNull(map.get("ghijk")); 111 | assertEquals(map.get("abcdef"), "abcdef"); 112 | } 113 | 114 | @SuppressWarnings("unchecked") 115 | @Test 116 | public void test007_captureDuplicateName() throws GrokException { 117 | Grok grok = compiler.compile("%{INT:id} %{INT:id}"); 118 | Match match = grok.match("123 456"); 119 | Map map = match.capture(); 120 | assertEquals(map.size(), 1); 121 | assertEquals(((List) (map.get("id"))).size(), 2); 122 | assertEquals(((List) (map.get("id"))).get(0), "123"); 123 | assertEquals(((List) (map.get("id"))).get(1), "456"); 124 | } 125 | 126 | @Test 127 | public void test008_flattenDuplicateKeys() throws GrokException { 128 | Grok grok = compiler.compile("(?:foo %{INT:id} bar|bar %{INT:id} foo)"); 129 | Match match = grok.match("foo 123 bar"); 130 | Map map = match.captureFlattened(); 131 | assertEquals(map.size(), 1); 132 | assertEquals(map.get("id"), "123"); 133 | Match m2 = grok.match("bar 123 foo"); 134 | map = m2.captureFlattened(); 135 | assertEquals(map.size(), 1); 136 | assertEquals(map.get("id"), "123"); 137 | 138 | grok = compiler.compile("%{INT:id} %{INT:id}"); 139 | Match m3 = grok.match("123 456"); 140 | 141 | try { 142 | m3.captureFlattened(); 143 | fail("should report error due tu ambiguity"); 144 | } catch (RuntimeException e) { 145 | assertThat(e.getMessage(), 146 | containsString("has multiple non-null values, this is not allowed in flattened mode")); 147 | } 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/Grok.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import java.io.Serializable; 4 | import java.time.ZoneId; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | import io.krakens.grok.api.Converter.IConverter; 13 | 14 | import org.apache.commons.lang3.StringUtils; 15 | 16 | /** 17 | * {@code Grok} parse arbitrary text and structure it. 18 | *
19 | * {@code Grok} is simple API that allows you to easily parse logs 20 | * and other files (single line). With {@code Grok}, 21 | * you can turn unstructured log and event data into structured data. 22 | * 23 | * @since 0.0.1 24 | */ 25 | public class Grok implements Serializable { 26 | /** 27 | * Named regex of the originalGrokPattern. 28 | */ 29 | private final String namedRegex; 30 | /** 31 | * Map of the named regex of the originalGrokPattern 32 | * with id = namedregexid and value = namedregex. 33 | */ 34 | private final Map namedRegexCollection; 35 | /** 36 | * Original {@code Grok} pattern (expl: %{IP}). 37 | */ 38 | private final String originalGrokPattern; 39 | /** 40 | * Pattern of the namedRegex. 41 | */ 42 | private final Pattern compiledNamedRegex; 43 | 44 | /** 45 | * {@code Grok} patterns definition. 46 | */ 47 | private final Map grokPatternDefinition; 48 | 49 | public final Set namedGroups; 50 | 51 | public final Map groupTypes; 52 | 53 | public final Map> converters; 54 | 55 | /** 56 | * {@code Grok} discovery. 57 | */ 58 | private Discovery disco; 59 | 60 | /** only use in grok discovery. */ 61 | private String savedPattern = ""; 62 | 63 | public Grok(String pattern, 64 | String namedRegex, 65 | Map namedRegexCollection, 66 | Map patternDefinitions, 67 | ZoneId defaultTimeZone) { 68 | this.originalGrokPattern = pattern; 69 | this.namedRegex = namedRegex; 70 | this.compiledNamedRegex = Pattern.compile(namedRegex); 71 | this.namedRegexCollection = namedRegexCollection; 72 | this.namedGroups = GrokUtils.getNameGroups(namedRegex); 73 | this.groupTypes = Converter.getGroupTypes(namedRegexCollection.values()); 74 | this.converters = Converter.getConverters(namedRegexCollection.values(), defaultTimeZone); 75 | this.grokPatternDefinition = patternDefinitions; 76 | } 77 | 78 | public String getSaved_pattern() { 79 | return savedPattern; 80 | } 81 | 82 | public void setSaved_pattern(String savedpattern) { 83 | this.savedPattern = savedpattern; 84 | } 85 | 86 | /** 87 | * Get the current map of {@code Grok} pattern. 88 | * 89 | * @return Patterns (name, regular expression) 90 | */ 91 | public Map getPatterns() { 92 | return grokPatternDefinition; 93 | } 94 | 95 | /** 96 | * Get the named regex from the {@code Grok} pattern.
97 | * @return named regex 98 | */ 99 | public String getNamedRegex() { 100 | return namedRegex; 101 | } 102 | 103 | /** 104 | * Original grok pattern used to compile to the named regex. 105 | * 106 | * @return String Original Grok pattern 107 | */ 108 | public String getOriginalGrokPattern() { 109 | return originalGrokPattern; 110 | } 111 | 112 | /** 113 | * Get the named regex from the given id. 114 | * 115 | * @param id : named regex id 116 | * @return String of the named regex 117 | */ 118 | public String getNamedRegexCollectionById(String id) { 119 | return namedRegexCollection.get(id); 120 | } 121 | 122 | /** 123 | * Get the full collection of the named regex. 124 | * 125 | * @return named RegexCollection 126 | */ 127 | public Map getNamedRegexCollection() { 128 | return namedRegexCollection; 129 | } 130 | 131 | /** 132 | * Match the given log with the named regex. 133 | * And return the json representation of the matched element 134 | * 135 | * @param log : log to match 136 | * @return map containing matches 137 | */ 138 | public Map capture(String log) { 139 | Match match = match(log); 140 | return match.capture(); 141 | } 142 | 143 | /** 144 | * Match the given list of log with the named regex 145 | * and return the list of json representation of the matched elements. 146 | * 147 | * @param logs : list of log 148 | * @return list of maps containing matches 149 | */ 150 | public ArrayList> capture(List logs) { 151 | final ArrayList> matched = new ArrayList<>(); 152 | for (String log : logs) { 153 | matched.add(capture(log)); 154 | } 155 | return matched; 156 | } 157 | 158 | /** 159 | * Match the given text with the named regex 160 | * {@code Grok} will extract data from the string and get an extence of {@link Match}. 161 | * 162 | * @param text : Single line of log 163 | * @return Grok Match 164 | */ 165 | public Match match(CharSequence text) { 166 | if (compiledNamedRegex == null || text == null) { 167 | return Match.EMPTY; 168 | } 169 | 170 | Matcher matcher = compiledNamedRegex.matcher(text); 171 | if (matcher.find()) { 172 | return new Match( 173 | text, this, matcher, matcher.start(0), matcher.end(0) 174 | ); 175 | } 176 | 177 | return Match.EMPTY; 178 | } 179 | 180 | /** 181 | * {@code Grok} will try to find the best expression that will match your input. 182 | * {@link Discovery} 183 | * 184 | * @param input : Single line of log 185 | * @return the Grok pattern 186 | */ 187 | public String discover(String input) { 188 | 189 | if (disco == null) { 190 | disco = new Discovery(this); 191 | } 192 | return disco.discover(input); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /src/main/resources/patterns/patterns: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | 3 | USERNAME [a-zA-Z0-9._-]+ 4 | USER %{USERNAME:UNWANTED} 5 | INT (?:[+-]?(?:[0-9]+)) 6 | BASE10NUM (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 7 | NUMBER (?:%{BASE10NUM:UNWANTED}) 8 | BASE16NUM (?(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) 20 | UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} 21 | 22 | # Networking 23 | MAC (?:%{CISCOMAC:UNWANTED}|%{WINDOWSMAC:UNWANTED}|%{COMMONMAC:UNWANTED}) 24 | CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) 25 | WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) 26 | COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) 27 | IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? 28 | IPV4 (?/(?>[\w_%!$@:.,~-]+|\\.)*)+ 38 | #UNIXPATH (?[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ 41 | URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? 42 | URIHOST %{IPORHOST}(?::%{POSINT:port})? 43 | # uripath comes loosely from RFC1738, but mostly from what Firefox 44 | # doesn't turn into %XX 45 | URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ 46 | #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? 47 | URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]]* 48 | URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? 49 | URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? 50 | 51 | # Months: January, Feb, 3, 03, 12, December 52 | MONTH \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b 53 | MONTHNUM (?:0?[1-9]|1[0-2]) 54 | MONTHNUM2 (?:0[1-9]|1[0-2]) 55 | MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) 56 | 57 | # Days: Monday, Tue, Thu, etc... 58 | DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) 59 | 60 | # Years? 61 | YEAR (?>\d\d){1,2} 62 | # Time: HH:MM:SS 63 | #TIME \d{2}:\d{2}(?::\d{2}(?:\.\d+)?)? 64 | # I'm still on the fence about using grok to perform the time match, 65 | # since it's probably slower. 66 | # TIME %{POSINT<24}:%{POSINT<60}(?::%{POSINT<60}(?:\.%{POSINT})?)? 67 | HOUR (?:2[0123]|[01]?[0-9]) 68 | MINUTE (?:[0-5][0-9]) 69 | # '60' is a leap second in most time standards and thus is valid. 70 | SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) 71 | TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) 72 | # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) 73 | DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} 74 | DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} 75 | ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) 76 | ISO8601_SECOND (?:%{SECOND}|60) 77 | TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? 78 | DATE %{DATE_US}|%{DATE_EU} 79 | DATESTAMP %{DATE}[- ]%{TIME} 80 | TZ (?:[PMCE][SD]T|UTC) 81 | DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} 82 | DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} 83 | DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} 84 | DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} 85 | 86 | # Syslog Dates: Month Day HH:MM:SS 87 | SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} 88 | PROG (?:[\w._/%-]+) 89 | SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? 90 | SYSLOGHOST %{IPORHOST} 91 | SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> 92 | HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} 93 | 94 | # Shortcuts 95 | QS %{QUOTEDSTRING:UNWANTED} 96 | 97 | # Log formats 98 | SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: 99 | 100 | MESSAGESLOG %{SYSLOGBASE} %{DATA} 101 | 102 | COMMONAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) 103 | COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent} 104 | COMMONAPACHELOG_DATATYPED %{IPORHOST:clientip} %{USER:ident;boolean} %{USER:auth} \[%{HTTPDATE:timestamp;date;dd/MMM/yyyy:HH:mm:ss Z}\] "(?:%{WORD:verb;string} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion;float})?|%{DATA:rawrequest})" %{NUMBER:response;int} (?:%{NUMBER:bytes;long}|-) 105 | 106 | 107 | # Log Levels 108 | LOGLEVEL ([A|a]lert|ALERT|[T|t]race|TRACE|[D|d]ebug|DEBUG|[N|n]otice|NOTICE|[I|i]nfo|INFO|[W|w]arn?(?:ing)?|WARN?(?:ING)?|[E|e]rr?(?:or)?|ERR?(?:OR)?|[C|c]rit?(?:ical)?|CRIT?(?:ICAL)?|[F|f]atal|FATAL|[S|s]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?) 109 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/Match.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | 4 | import static java.lang.String.format; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Collections; 8 | import java.util.LinkedHashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.regex.Matcher; 12 | 13 | import io.krakens.grok.api.Converter.IConverter; 14 | import io.krakens.grok.api.exception.GrokException; 15 | 16 | /** 17 | * {@code Match} is a representation in {@code Grok} world of your log. 18 | * 19 | * @since 0.0.1 20 | */ 21 | public class Match { 22 | private final CharSequence subject; 23 | private final Grok grok; 24 | private final Matcher match; 25 | private final int start; 26 | private final int end; 27 | private boolean keepEmptyCaptures = true; 28 | private Map capture = Collections.emptyMap(); 29 | 30 | /** 31 | * Create a new {@code Match} object. 32 | */ 33 | public Match(CharSequence subject, Grok grok, Matcher match, int start, int end) { 34 | this.subject = subject; 35 | this.grok = grok; 36 | this.match = match; 37 | this.start = start; 38 | this.end = end; 39 | } 40 | 41 | /** 42 | * Create Empty grok matcher. 43 | */ 44 | public static final Match EMPTY = new Match("", null, null, 0, 0); 45 | 46 | public Matcher getMatch() { 47 | return match; 48 | } 49 | 50 | public int getStart() { 51 | return start; 52 | } 53 | 54 | public int getEnd() { 55 | return end; 56 | } 57 | 58 | /** 59 | * Ignore empty captures. 60 | */ 61 | public void setKeepEmptyCaptures(boolean ignore) { 62 | // clear any cached captures 63 | if ( capture.size() > 0) { 64 | capture = new LinkedHashMap<>(); 65 | } 66 | this.keepEmptyCaptures = ignore; 67 | } 68 | 69 | public boolean isKeepEmptyCaptures() { 70 | return this.keepEmptyCaptures; 71 | } 72 | 73 | /** 74 | * Retrurn the single line of log. 75 | * 76 | * @return the single line of log 77 | */ 78 | public CharSequence getSubject() { 79 | return subject; 80 | } 81 | 82 | /** 83 | * Match to the subject the regex and save the matched element into a map. 84 | * 85 | * Multiple values for the same key are stored as list. 86 | * 87 | */ 88 | public Map capture() { 89 | return capture(false); 90 | } 91 | 92 | /** 93 | * Match to the subject the regex and save the matched element into a map 94 | * 95 | * Multiple values to the same key are flattened to one value: the sole non-null value will be captured. 96 | * Should there be multiple non-null values a RuntimeException is being thrown. 97 | * 98 | * This can be used in cases like: (foo (.*:message) bar|bar (.*:message) foo) where the regexp guarantees that only 99 | * one value will be captured. 100 | * 101 | * See also {@link #capture} which returns multiple values of the same key as list. 102 | * 103 | * @return the matched elements 104 | * @throws GrokException if a keys has multiple non-null values. 105 | */ 106 | public Map captureFlattened() throws GrokException { 107 | return capture(true); 108 | } 109 | 110 | /** 111 | * Private implementation of captureFlattened and capture. 112 | * @param flattened will it flatten values. 113 | * @return the matched elements. 114 | * @throws GrokException if a keys has multiple non-null values, but only if flattened is set to true. 115 | */ 116 | private Map capture(boolean flattened ) throws GrokException { 117 | if (match == null) { 118 | return Collections.emptyMap(); 119 | } 120 | 121 | if (!capture.isEmpty()) { 122 | return capture; 123 | } 124 | 125 | capture = new LinkedHashMap<>(); 126 | 127 | // _capture.put("LINE", this.line); 128 | // _capture.put("LENGTH", this.line.length() +""); 129 | 130 | Map mappedw = GrokUtils.namedGroups(this.match, this.grok.namedGroups); 131 | 132 | mappedw.forEach((key, valueString) -> { 133 | String id = this.grok.getNamedRegexCollectionById(key); 134 | if (id != null && !id.isEmpty()) { 135 | key = id; 136 | } 137 | 138 | if ("UNWANTED".equals(key)) { 139 | return; 140 | } 141 | 142 | Object value = valueString; 143 | if (valueString != null) { 144 | IConverter converter = grok.converters.get(key); 145 | 146 | if (converter != null) { 147 | key = Converter.extractKey(key); 148 | try { 149 | value = converter.convert(valueString); 150 | } catch (Exception e) { 151 | capture.put(key + "_grokfailure", e.toString()); 152 | } 153 | 154 | if (value instanceof String) { 155 | value = cleanString((String) value); 156 | } 157 | } else { 158 | value = cleanString(valueString); 159 | } 160 | } else if (!isKeepEmptyCaptures()) { 161 | return; 162 | } 163 | 164 | if (capture.containsKey(key)) { 165 | Object currentValue = capture.get(key); 166 | 167 | if (flattened) { 168 | if (currentValue == null && value != null) { 169 | capture.put(key, value); 170 | } 171 | if (currentValue != null && value != null) { 172 | throw new GrokException( 173 | format( 174 | "key '%s' has multiple non-null values, this is not allowed in flattened mode, values:'%s', '%s'", 175 | key, 176 | currentValue, 177 | value)); 178 | } 179 | } else { 180 | if (currentValue instanceof List) { 181 | @SuppressWarnings("unchecked") 182 | List cvl = (List) currentValue; 183 | cvl.add(value); 184 | } else { 185 | List list = new ArrayList(); 186 | list.add(currentValue); 187 | list.add(value); 188 | capture.put(key, list); 189 | } 190 | } 191 | } else { 192 | capture.put(key, value); 193 | } 194 | }); 195 | 196 | capture = Collections.unmodifiableMap(capture); 197 | 198 | return capture; 199 | } 200 | 201 | /** 202 | * remove from the string the quote and double quote. 203 | * 204 | * @param value string to pure: "my/text" 205 | * @return unquoted string: my/text 206 | */ 207 | private String cleanString(String value) { 208 | if (value == null || value.isEmpty()) { 209 | return value; 210 | } 211 | 212 | char firstChar = value.charAt(0); 213 | char lastChar = value.charAt(value.length() - 1); 214 | 215 | if (firstChar == lastChar 216 | && (firstChar == '"' || firstChar == '\'') 217 | ) { 218 | if (value.length() <= 2) { 219 | return ""; 220 | } else { 221 | int found = 0; 222 | for (int i = 1; i < value.length() - 1; i++ ) { 223 | if (value.charAt(i) == firstChar) { 224 | found++; 225 | } 226 | } 227 | if (found == 0) { 228 | return value.substring(1, value.length() - 1); 229 | } 230 | } 231 | } 232 | 233 | return value; 234 | } 235 | 236 | /** 237 | * Util fct. 238 | * 239 | * @return boolean 240 | */ 241 | public Boolean isNull() { 242 | return this.match == null; 243 | } 244 | 245 | } 246 | -------------------------------------------------------------------------------- /src/main/java/io/krakens/grok/api/GrokCompiler.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static java.lang.String.format; 4 | 5 | import java.io.BufferedReader; 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.io.InputStreamReader; 9 | import java.io.Reader; 10 | import java.io.Serializable; 11 | import java.nio.charset.Charset; 12 | import java.nio.charset.StandardCharsets; 13 | import java.time.ZoneId; 14 | import java.time.ZoneOffset; 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | import java.util.Objects; 18 | import java.util.Set; 19 | import java.util.regex.Matcher; 20 | import java.util.regex.Pattern; 21 | 22 | import io.krakens.grok.api.exception.GrokException; 23 | 24 | import org.apache.commons.lang3.StringUtils; 25 | 26 | public class GrokCompiler implements Serializable { 27 | 28 | // We don't want \n and commented line 29 | private static final Pattern patternLinePattern = Pattern.compile("^([A-z0-9_]+)\\s+(.*)$"); 30 | 31 | /** 32 | * {@code Grok} patterns definitions. 33 | */ 34 | private final Map grokPatternDefinitions = new HashMap<>(); 35 | 36 | private GrokCompiler() {} 37 | 38 | public static GrokCompiler newInstance() { 39 | return new GrokCompiler(); 40 | } 41 | 42 | public Map getPatternDefinitions() { 43 | return grokPatternDefinitions; 44 | } 45 | 46 | /** 47 | * Registers a new pattern definition. 48 | * 49 | * @param name : Pattern Name 50 | * @param pattern : Regular expression Or {@code Grok} pattern 51 | * @throws GrokException runtime expt 52 | **/ 53 | public void register(String name, String pattern) { 54 | name = Objects.requireNonNull(name).trim(); 55 | pattern = Objects.requireNonNull(pattern).trim(); 56 | 57 | if (!name.isEmpty() && !pattern.isEmpty()) { 58 | grokPatternDefinitions.put(name, pattern); 59 | } 60 | } 61 | 62 | /** 63 | * Registers multiple pattern definitions. 64 | */ 65 | public void register(Map patternDefinitions) { 66 | Objects.requireNonNull(patternDefinitions); 67 | patternDefinitions.forEach(this::register); 68 | } 69 | 70 | public void registerDefaultPatterns() { 71 | registerPatternFromClasspath("/patterns/patterns"); 72 | } 73 | 74 | public void registerPatternFromClasspath(String path) throws GrokException { 75 | registerPatternFromClasspath(path, StandardCharsets.UTF_8); 76 | } 77 | 78 | public void registerPatternFromClasspath(String path, Charset charset) throws GrokException { 79 | final InputStream inputStream = this.getClass().getResourceAsStream(path); 80 | try (Reader reader = new InputStreamReader(inputStream, charset)) { 81 | register(reader); 82 | } catch (IOException e) { 83 | throw new GrokException(e.getMessage(), e); 84 | } 85 | } 86 | 87 | /** 88 | * Registers multiple pattern definitions from a given inputStream, and decoded as a UTF-8 source. 89 | */ 90 | public void register(InputStream input) throws IOException { 91 | register(input, StandardCharsets.UTF_8); 92 | } 93 | 94 | /** 95 | * Registers multiple pattern definitions from a given inputStream. 96 | */ 97 | public void register(InputStream input, Charset charset) throws IOException { 98 | try ( 99 | BufferedReader in = new BufferedReader(new InputStreamReader(input, charset))) { 100 | in.lines() 101 | .map(patternLinePattern::matcher) 102 | .filter(Matcher::matches) 103 | .forEach(m -> register(m.group(1), m.group(2))); 104 | } 105 | } 106 | 107 | /** 108 | * Registers multiple pattern definitions from a given Reader. 109 | */ 110 | public void register(Reader input) throws IOException { 111 | new BufferedReader(input).lines() 112 | .map(patternLinePattern::matcher) 113 | .filter(Matcher::matches) 114 | .forEach(m -> register(m.group(1), m.group(2))); 115 | } 116 | 117 | /** 118 | * Compiles a given Grok pattern and returns a Grok object which can parse the pattern. 119 | */ 120 | public Grok compile(String pattern) throws IllegalArgumentException { 121 | return compile(pattern, false); 122 | } 123 | 124 | public Grok compile(final String pattern, boolean namedOnly) throws IllegalArgumentException { 125 | return compile(pattern, ZoneOffset.systemDefault(), namedOnly); 126 | } 127 | 128 | /** 129 | * Compiles a given Grok pattern and returns a Grok object which can parse the pattern. 130 | * 131 | * @param pattern : Grok pattern (ex: %{IP}) 132 | * @param defaultTimeZone : time zone used to parse a timestamp when it doesn't contain the time zone 133 | * @param namedOnly : Whether to capture named expressions only or not (i.e. %{IP:ip} but not ${IP}) 134 | * @return a compiled pattern 135 | * @throws IllegalArgumentException when pattern definition is invalid 136 | */ 137 | public Grok compile(final String pattern, ZoneId defaultTimeZone, boolean namedOnly) throws IllegalArgumentException { 138 | 139 | if (StringUtils.isBlank(pattern)) { 140 | throw new IllegalArgumentException("{pattern} should not be empty or null"); 141 | } 142 | 143 | String namedRegex = pattern; 144 | int index = 0; 145 | /** flag for infinite recursion. */ 146 | int iterationLeft = 1000; 147 | Boolean continueIteration = true; 148 | Map patternDefinitions = new HashMap<>(grokPatternDefinitions); 149 | 150 | // output 151 | Map namedRegexCollection = new HashMap<>(); 152 | 153 | // Replace %{foo} with the regex (mostly group name regex) 154 | // and then compile the regex 155 | while (continueIteration) { 156 | continueIteration = false; 157 | if (iterationLeft <= 0) { 158 | throw new IllegalArgumentException("Deep recursion pattern compilation of " + pattern); 159 | } 160 | iterationLeft--; 161 | 162 | Set namedGroups = GrokUtils.getNameGroups(GrokUtils.GROK_PATTERN.pattern()); 163 | Matcher matcher = GrokUtils.GROK_PATTERN.matcher(namedRegex); 164 | // Match %{Foo:bar} -> pattern name and subname 165 | // Match %{Foo=regex} -> add new regex definition 166 | if (matcher.find()) { 167 | continueIteration = true; 168 | Map group = GrokUtils.namedGroups(matcher, namedGroups); 169 | if (group.get("definition") != null) { 170 | patternDefinitions.put(group.get("pattern"), group.get("definition")); 171 | group.put("name", group.get("name") + "=" + group.get("definition")); 172 | } 173 | int count = StringUtils.countMatches(namedRegex, "%{" + group.get("name") + "}"); 174 | for (int i = 0; i < count; i++) { 175 | String definitionOfPattern = patternDefinitions.get(group.get("pattern")); 176 | if (definitionOfPattern == null) { 177 | throw new IllegalArgumentException(format("No definition for key '%s' found, aborting", 178 | group.get("pattern"))); 179 | } 180 | String replacement = String.format("(?%s)", index, definitionOfPattern); 181 | if (namedOnly && group.get("subname") == null) { 182 | replacement = String.format("(?:%s)", definitionOfPattern); 183 | } 184 | namedRegexCollection.put("name" + index, 185 | (group.get("subname") != null ? group.get("subname") : group.get("name"))); 186 | namedRegex = 187 | StringUtils.replace(namedRegex, "%{" + group.get("name") + "}", replacement,1); 188 | // System.out.println(_expanded_pattern); 189 | index++; 190 | } 191 | } 192 | } 193 | 194 | if (namedRegex.isEmpty()) { 195 | throw new IllegalArgumentException("Pattern not found"); 196 | } 197 | 198 | return new Grok( 199 | pattern, 200 | namedRegex, 201 | namedRegexCollection, 202 | patternDefinitions, 203 | defaultTimeZone 204 | ); 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /src/main/resources/patterns/nagios: -------------------------------------------------------------------------------- 1 | # Forked from https://github.com/elasticsearch/logstash/tree/v1.4.0/patterns 2 | ################################################################################## 3 | ################################################################################## 4 | # Chop Nagios log files to smithereens! 5 | # 6 | # A set of GROK filters to process logfiles generated by Nagios. 7 | # While it does not, this set intends to cover all possible Nagios logs. 8 | # 9 | # Some more work needs to be done to cover all External Commands: 10 | # http://old.nagios.org/developerinfo/externalcommands/commandlist.php 11 | # 12 | # If you need some support on these rules please contact: 13 | # Jelle Smet http://smetj.net 14 | # 15 | ################################################################################# 16 | ################################################################################# 17 | 18 | NAGIOSTIME \[%{NUMBER:nagios_epoch}\] 19 | 20 | ############################################### 21 | ######## Begin nagios log types 22 | ############################################### 23 | NAGIOS_TYPE_CURRENT_SERVICE_STATE CURRENT SERVICE STATE 24 | NAGIOS_TYPE_CURRENT_HOST_STATE CURRENT HOST STATE 25 | 26 | NAGIOS_TYPE_SERVICE_NOTIFICATION SERVICE NOTIFICATION 27 | NAGIOS_TYPE_HOST_NOTIFICATION HOST NOTIFICATION 28 | 29 | NAGIOS_TYPE_SERVICE_ALERT SERVICE ALERT 30 | NAGIOS_TYPE_HOST_ALERT HOST ALERT 31 | 32 | NAGIOS_TYPE_SERVICE_FLAPPING_ALERT SERVICE FLAPPING ALERT 33 | NAGIOS_TYPE_HOST_FLAPPING_ALERT HOST FLAPPING ALERT 34 | 35 | NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT SERVICE DOWNTIME ALERT 36 | NAGIOS_TYPE_HOST_DOWNTIME_ALERT HOST DOWNTIME ALERT 37 | 38 | NAGIOS_TYPE_PASSIVE_SERVICE_CHECK PASSIVE SERVICE CHECK 39 | NAGIOS_TYPE_PASSIVE_HOST_CHECK PASSIVE HOST CHECK 40 | 41 | NAGIOS_TYPE_SERVICE_EVENT_HANDLER SERVICE EVENT HANDLER 42 | NAGIOS_TYPE_HOST_EVENT_HANDLER HOST EVENT HANDLER 43 | 44 | NAGIOS_TYPE_EXTERNAL_COMMAND EXTERNAL COMMAND 45 | NAGIOS_TYPE_TIMEPERIOD_TRANSITION TIMEPERIOD TRANSITION 46 | ############################################### 47 | ######## End nagios log types 48 | ############################################### 49 | 50 | ############################################### 51 | ######## Begin external check types 52 | ############################################### 53 | NAGIOS_EC_DISABLE_SVC_CHECK DISABLE_SVC_CHECK 54 | NAGIOS_EC_ENABLE_SVC_CHECK ENABLE_SVC_CHECK 55 | NAGIOS_EC_DISABLE_HOST_CHECK DISABLE_HOST_CHECK 56 | NAGIOS_EC_ENABLE_HOST_CHECK ENABLE_HOST_CHECK 57 | NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT PROCESS_SERVICE_CHECK_RESULT 58 | NAGIOS_EC_PROCESS_HOST_CHECK_RESULT PROCESS_HOST_CHECK_RESULT 59 | NAGIOS_EC_SCHEDULE_SERVICE_DOWNTIME SCHEDULE_SERVICE_DOWNTIME 60 | NAGIOS_EC_SCHEDULE_HOST_DOWNTIME SCHEDULE_HOST_DOWNTIME 61 | ############################################### 62 | ######## End external check types 63 | ############################################### 64 | NAGIOS_WARNING Warning:%{SPACE}%{GREEDYDATA:nagios_message} 65 | 66 | NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 67 | NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 68 | 69 | NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 70 | NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 71 | 72 | NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 73 | NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 74 | 75 | NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 76 | NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 77 | 78 | NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 79 | NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 80 | 81 | NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 82 | NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 83 | 84 | NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 85 | NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 86 | 87 | NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:nagios_type}: %{DATA:nagios_service};%{DATA:nagios_unknown1};%{DATA:nagios_unknown2}; 88 | 89 | #################### 90 | #### External checks 91 | #################### 92 | 93 | #Disable host & service check 94 | NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 95 | NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 96 | 97 | #Enable host & service check 98 | NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 99 | NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 100 | 101 | #Process host & service check 102 | NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 103 | NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 104 | 105 | #Schedule host & service downtime 106 | NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:nagios_command};%{DATA:nagios_hostname};%{NUMBER:nagios_start_time};%{NUMBER:nagios_end_time};%{NUMBER:nagios_fixed};%{NUMBER:nagios_trigger_id};%{NUMBER:nagios_duration};%{DATA:author};%{DATA:comment} 107 | 108 | #End matching line 109 | NAGIOSLOGLINE %{NAGIOSTIME} (?:%{NAGIOS_WARNING}|%{NAGIOS_CURRENT_SERVICE_STATE}|%{NAGIOS_CURRENT_HOST_STATE}|%{NAGIOS_SERVICE_NOTIFICATION}|%{NAGIOS_HOST_NOTIFICATION}|%{NAGIOS_SERVICE_ALERT}|%{NAGIOS_HOST_ALERT}|%{NAGIOS_SERVICE_FLAPPING_ALERT}|%{NAGIOS_HOST_FLAPPING_ALERT}|%{NAGIOS_SERVICE_DOWNTIME_ALERT}|%{NAGIOS_HOST_DOWNTIME_ALERT}|%{NAGIOS_PASSIVE_SERVICE_CHECK}|%{NAGIOS_PASSIVE_HOST_CHECK}|%{NAGIOS_SERVICE_EVENT_HANDLER}|%{NAGIOS_HOST_EVENT_HANDLER}|%{NAGIOS_TIMEPERIOD_TRANSITION}|%{NAGIOS_EC_LINE_DISABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_ENABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_DISABLE_HOST_CHECK|%{NAGIOS_EC_LINE_ENABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT}|%{NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT}|%{NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME}) 110 | -------------------------------------------------------------------------------- /extra/checkstyle/checkstyle.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 70 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 95 | 96 | 97 | 99 | 100 | 101 | 102 | 104 | 105 | 106 | 107 | 109 | 110 | 111 | 112 | 114 | 115 | 116 | 117 | 118 | 119 | 121 | 122 | 123 | 124 | 126 | 127 | 128 | 129 | 131 | 132 | 133 | 134 | 136 | 137 | 138 | 139 | 141 | 143 | 145 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 168 | 169 | 170 | 171 | 172 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /src/main/resources/patterns/postfix: -------------------------------------------------------------------------------- 1 | # common postfix patterns 2 | POSTFIX_QUEUEID ([0-9A-F]{6,}|[0-9a-zA-Z]{15,}) 3 | POSTFIX_CLIENT_INFO %{HOSTNAME:postfix_client_hostname}?\[%{IP:postfix_client_ip}\](:%{INT:postfix_client_port})? 4 | POSTFIX_RELAY_INFO %{HOSTNAME:postfix_relay_hostname}?\[(%{IP:postfix_relay_ip}|%{DATA:postfix_relay_service})\](:%{INT:postfix_relay_port})?|%{WORD:postfix_relay_service} 5 | POSTFIX_SMTP_STAGE (CONNECT|HELO|EHLO|STARTTLS|AUTH|MAIL( FROM)?|RCPT( TO)?|(end of )?DATA|RSET|UNKNOWN|END-OF-MESSAGE|VRFY|\.) 6 | POSTFIX_ACTION (accept|defer|discard|filter|header-redirect|reject) 7 | POSTFIX_STATUS_CODE \d{3} 8 | POSTFIX_STATUS_CODE_ENHANCED \d\.\d\.\d 9 | POSTFIX_DNSBL_MESSAGE Service unavailable; .* \[%{GREEDYDATA:postfix_status_data}\] %{GREEDYDATA:postfix_status_message}; 10 | POSTFIX_PS_ACCESS_ACTION (DISCONNECT|BLACKLISTED|WHITELISTED|WHITELIST VETO|PASS NEW|PASS OLD) 11 | POSTFIX_PS_VIOLATION (BARE NEWLINE|COMMAND (TIME|COUNT|LENGTH) LIMIT|COMMAND PIPELINING|DNSBL|HANGUP|NON-SMTP COMMAND|PREGREET) 12 | POSTFIX_TIME_UNIT %{NUMBER}[smhd] 13 | POSTFIX_KEYVALUE_DATA [\w-]+=[^;]* 14 | POSTFIX_KEYVALUE %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} 15 | POSTFIX_WARNING_LEVEL (warning|fatal|info) 16 | 17 | POSTFIX_TLSCONN (Anonymous|Trusted|Untrusted|Verified) TLS connection established (to %{POSTFIX_RELAY_INFO}|from %{POSTFIX_CLIENT_INFO}): %{DATA:postfix_tls_version} with cipher %{DATA:postfix_tls_cipher} \(%{DATA:postfix_tls_cipher_size} bits\) 18 | POSTFIX_DELAYS %{NUMBER:postfix_delay_before_qmgr}/%{NUMBER:postfix_delay_in_qmgr}/%{NUMBER:postfix_delay_conn_setup}/%{NUMBER:postfix_delay_transmission} 19 | POSTFIX_LOSTCONN (lost connection|timeout|SSL_accept error) 20 | POSTFIX_LOSTCONN_REASONS (receiving the initial server greeting|sending message body|sending end of data -- message may be sent more than once) 21 | POSTFIX_PROXY_MESSAGE (%{POSTFIX_STATUS_CODE:postfix_proxy_status_code} )?(%{POSTFIX_STATUS_CODE_ENHANCED:postfix_proxy_status_code_enhanced})?.* 22 | 23 | # helper patterns 24 | GREEDYDATA_NO_COLON [^:]* 25 | GREEDYDATA_NO_SEMICOLON [^;]* 26 | 27 | # warning patterns 28 | POSTFIX_WARNING_WITH_KV (%{POSTFIX_QUEUEID:postfix_queueid}: )?%{POSTFIX_WARNING_LEVEL:postfix_message_level}: %{GREEDYDATA:postfix_message}; %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} 29 | POSTFIX_WARNING_WITHOUT_KV (%{POSTFIX_QUEUEID:postfix_queueid}: )?%{POSTFIX_WARNING_LEVEL:postfix_message_level}: %{GREEDYDATA:postfix_message} 30 | POSTFIX_WARNING %{POSTFIX_WARNING_WITH_KV}|%{POSTFIX_WARNING_WITHOUT_KV} 31 | 32 | # smtpd patterns 33 | POSTFIX_SMTPD_CONNECT connect from %{POSTFIX_CLIENT_INFO} 34 | POSTFIX_SMTPD_DISCONNECT disconnect from %{POSTFIX_CLIENT_INFO} 35 | POSTFIX_SMTPD_LOSTCONN %{POSTFIX_LOSTCONN:postfix_smtpd_lostconn_data}( after %{POSTFIX_SMTP_STAGE:postfix_smtp_stage}( \(%{INT} bytes\))?)? from %{POSTFIX_CLIENT_INFO}(: %{GREEDYDATA:postfix_smtpd_lostconn_reason})? 36 | POSTFIX_SMTPD_NOQUEUE NOQUEUE: %{POSTFIX_ACTION:postfix_action}: %{POSTFIX_SMTP_STAGE:postfix_smtp_stage} from %{POSTFIX_CLIENT_INFO}:( %{POSTFIX_STATUS_CODE:postfix_status_code} %{POSTFIX_STATUS_CODE_ENHANCED:postfix_status_code_enhanced})?( <%{DATA:postfix_status_data}>:)? (%{POSTFIX_DNSBL_MESSAGE}|%{GREEDYDATA:postfix_status_message};) %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} 37 | POSTFIX_SMTPD_PIPELINING improper command pipelining after %{POSTFIX_SMTP_STAGE:postfix_smtp_stage} from %{POSTFIX_CLIENT_INFO}: %{GREEDYDATA:postfix_improper_pipelining_data} 38 | POSTFIX_SMTPD_PROXY proxy-%{POSTFIX_ACTION:postfix_proxy_result}: (%{POSTFIX_SMTP_STAGE:postfix_proxy_smtp_stage}): %{POSTFIX_PROXY_MESSAGE:postfix_proxy_message}; %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} 39 | 40 | # cleanup patterns 41 | POSTFIX_CLEANUP_MILTER %{POSTFIX_QUEUEID:postfix_queueid}: milter-%{POSTFIX_ACTION:postfix_milter_result}: %{GREEDYDATA:postfix_milter_message}; %{GREEDYDATA_NO_COLON:postfix_keyvalue_data}(: %{GREEDYDATA:postfix_milter_data})? 42 | 43 | # qmgr patterns 44 | POSTFIX_QMGR_REMOVED %{POSTFIX_QUEUEID:postfix_queueid}: removed 45 | POSTFIX_QMGR_ACTIVE %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} \(queue active\) 46 | POSTFIX_QMGR_EXPIRED %{POSTFIX_QUEUEID:postfix_queueid}: from=<%{DATA:postfix_from}>, status=%{WORD:postfix_status}, returned to sender 47 | 48 | # pipe patterns 49 | POSTFIX_PIPE_ANY %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data}, status=%{WORD:postfix_status} \(%{GREEDYDATA:postfix_pipe_response}\) 50 | 51 | # error patterns 52 | POSTFIX_ERROR_ANY %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data}, status=%{WORD:postfix_status} \(%{GREEDYDATA:postfix_error_response}\) 53 | 54 | # discard patterns 55 | POSTFIX_DISCARD_ANY %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_KEYVALUE_DATA:postfix_keyvalue_data} status=%{WORD:postfix_status} %{GREEDYDATA} 56 | 57 | # postsuper patterns 58 | POSTFIX_POSTSUPER_ACTIONS (removed|requeued|placed on hold|released from hold) 59 | POSTFIX_POSTSUPER_ACTION %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_POSTSUPER_ACTIONS:postfix_postsuper_action} 60 | POSTFIX_POSTSUPER_SUMMARY_ACTIONS (Deleted|Requeued|Placed on hold|Released from hold) 61 | POSTFIX_POSTSUPER_SUMMARY %{POSTFIX_POSTSUPER_SUMMARY_ACTIONS:postfix_postsuper_summary_action}: %{NUMBER:postfix_postsuper_summary_count} messages? 62 | 63 | # postscreen patterns 64 | POSTFIX_PS_CONNECT CONNECT from %{POSTFIX_CLIENT_INFO} to \[%{IP:postfix_server_ip}\]:%{INT:postfix_server_port} 65 | POSTFIX_PS_ACCESS %{POSTFIX_PS_ACCESS_ACTION:postfix_postscreen_access} %{POSTFIX_CLIENT_INFO} 66 | POSTFIX_PS_NOQUEUE %{POSTFIX_SMTPD_NOQUEUE} 67 | POSTFIX_PS_TOOBUSY NOQUEUE: reject: CONNECT from %{POSTFIX_CLIENT_INFO}: %{GREEDYDATA:postfix_postscreen_toobusy_data} 68 | POSTFIX_PS_DNSBL %{POSTFIX_PS_VIOLATION:postfix_postscreen_violation} rank %{INT:postfix_postscreen_dnsbl_rank} for %{POSTFIX_CLIENT_INFO} 69 | POSTFIX_PS_CACHE cache %{DATA} full cleanup: retained=%{NUMBER:postfix_postscreen_cache_retained} dropped=%{NUMBER:postfix_postscreen_cache_dropped} entries 70 | POSTFIX_PS_VIOLATIONS %{POSTFIX_PS_VIOLATION:postfix_postscreen_violation}( %{INT})?( after %{NUMBER:postfix_postscreen_violation_time})? from %{POSTFIX_CLIENT_INFO}(( after %{POSTFIX_SMTP_STAGE:postfix_smtp_stage})?(: %{GREEDYDATA:postfix_postscreen_data})?| in tests (after|before) SMTP handshake) 71 | 72 | # dnsblog patterns 73 | POSTFIX_DNSBLOG_LISTING addr %{IP:postfix_client_ip} listed by domain %{HOSTNAME:postfix_dnsbl_domain} as %{IP:postfix_dnsbl_result} 74 | 75 | # tlsproxy patterns 76 | POSTFIX_TLSPROXY_CONN (DIS)?CONNECT( from)? %{POSTFIX_CLIENT_INFO} 77 | 78 | # anvil patterns 79 | POSTFIX_ANVIL_CONN_RATE statistics: max connection rate %{NUMBER:postfix_anvil_conn_rate}/%{POSTFIX_TIME_UNIT:postfix_anvil_conn_period} for \(%{DATA:postfix_service}:%{IP:postfix_client_ip}\) at %{SYSLOGTIMESTAMP:postfix_anvil_timestamp} 80 | POSTFIX_ANVIL_CONN_CACHE statistics: max cache size %{NUMBER:postfix_anvil_cache_size} at %{SYSLOGTIMESTAMP:postfix_anvil_timestamp} 81 | POSTFIX_ANVIL_CONN_COUNT statistics: max connection count %{NUMBER:postfix_anvil_conn_count} for \(%{DATA:postfix_service}:%{IP:postfix_client_ip}\) at %{SYSLOGTIMESTAMP:postfix_anvil_timestamp} 82 | 83 | # smtp patterns 84 | POSTFIX_SMTP_DELIVERY %{POSTFIX_KEYVALUE} status=%{WORD:postfix_status}( \(%{GREEDYDATA:postfix_smtp_response}\))? 85 | POSTFIX_SMTP_CONNERR connect to %{POSTFIX_RELAY_INFO}: (Connection timed out|No route to host|Connection refused|Network is unreachable) 86 | POSTFIX_SMTP_LOSTCONN %{POSTFIX_QUEUEID:postfix_queueid}: %{POSTFIX_LOSTCONN:postfix_smtp_lostconn_data} with %{POSTFIX_RELAY_INFO}( while %{POSTFIX_LOSTCONN_REASONS:postfix_smtp_lostconn_reason})? 87 | POSTFIX_SMTP_TIMEOUT %{POSTFIX_QUEUEID:postfix_queueid}: conversation with %{POSTFIX_RELAY_INFO} timed out( while %{POSTFIX_LOSTCONN_REASONS:postfix_smtp_lostconn_reason})? 88 | POSTFIX_SMTP_RELAYERR %{POSTFIX_QUEUEID:postfix_queueid}: host %{POSTFIX_RELAY_INFO} said: %{GREEDYDATA:postfix_smtp_response} \(in reply to %{POSTFIX_SMTP_STAGE:postfix_smtp_stage} command\) 89 | 90 | # master patterns 91 | POSTFIX_MASTER_START (daemon started|reload) -- version %{DATA:postfix_version}, configuration %{PATH:postfix_config_path} 92 | POSTFIX_MASTER_EXIT terminating on signal %{INT:postfix_termination_signal} 93 | 94 | # bounce patterns 95 | POSTFIX_BOUNCE_NOTIFICATION %{POSTFIX_QUEUEID:postfix_queueid}: sender (non-delivery|delivery status|delay) notification: %{POSTFIX_QUEUEID:postfix_bounce_queueid} 96 | 97 | # scache patterns 98 | POSTFIX_SCACHE_LOOKUPS statistics: (address|domain) lookup hits=%{INT:postfix_scache_hits} miss=%{INT:postfix_scache_miss} success=%{INT:postfix_scache_success}% 99 | POSTFIX_SCACHE_SIMULTANEOUS statistics: max simultaneous domains=%{INT:postfix_scache_domains} addresses=%{INT:postfix_scache_addresses} connection=%{INT:postfix_scache_connection} 100 | POSTFIX_SCACHE_TIMESTAMP statistics: start interval %{SYSLOGTIMESTAMP:postfix_scache_timestamp} 101 | 102 | # aggregate all patterns 103 | POSTFIX_SMTPD %{POSTFIX_SMTPD_CONNECT}|%{POSTFIX_SMTPD_DISCONNECT}|%{POSTFIX_SMTPD_LOSTCONN}|%{POSTFIX_SMTPD_NOQUEUE}|%{POSTFIX_SMTPD_PIPELINING}|%{POSTFIX_TLSCONN}|%{POSTFIX_WARNING}|%{POSTFIX_SMTPD_PROXY}|%{POSTFIX_KEYVALUE} 104 | POSTFIX_CLEANUP %{POSTFIX_CLEANUP_MILTER}|%{POSTFIX_WARNING}|%{POSTFIX_KEYVALUE} 105 | POSTFIX_QMGR %{POSTFIX_QMGR_REMOVED}|%{POSTFIX_QMGR_ACTIVE}|%{POSTFIX_QMGR_EXPIRED}|%{POSTFIX_WARNING} 106 | POSTFIX_PIPE %{POSTFIX_PIPE_ANY} 107 | POSTFIX_POSTSCREEN %{POSTFIX_PS_CONNECT}|%{POSTFIX_PS_ACCESS}|%{POSTFIX_PS_NOQUEUE}|%{POSTFIX_PS_TOOBUSY}|%{POSTFIX_PS_CACHE}|%{POSTFIX_PS_DNSBL}|%{POSTFIX_PS_VIOLATIONS}|%{POSTFIX_WARNING} 108 | POSTFIX_DNSBLOG %{POSTFIX_DNSBLOG_LISTING}|%{POSTFIX_WARNING} 109 | POSTFIX_ANVIL %{POSTFIX_ANVIL_CONN_RATE}|%{POSTFIX_ANVIL_CONN_CACHE}|%{POSTFIX_ANVIL_CONN_COUNT} 110 | POSTFIX_SMTP %{POSTFIX_SMTP_DELIVERY}|%{POSTFIX_SMTP_CONNERR}|%{POSTFIX_SMTP_LOSTCONN}|%{POSTFIX_SMTP_TIMEOUT}|%{POSTFIX_SMTP_RELAYERR}|%{POSTFIX_TLSCONN}|%{POSTFIX_WARNING} 111 | POSTFIX_DISCARD %{POSTFIX_DISCARD_ANY}|%{POSTFIX_WARNING} 112 | POSTFIX_LMTP %{POSTFIX_SMTP} 113 | POSTFIX_PICKUP %{POSTFIX_KEYVALUE} 114 | POSTFIX_TLSPROXY %{POSTFIX_TLSPROXY_CONN}|%{POSTFIX_WARNING} 115 | POSTFIX_MASTER %{POSTFIX_MASTER_START}|%{POSTFIX_MASTER_EXIT}|%{POSTFIX_WARNING} 116 | POSTFIX_BOUNCE %{POSTFIX_BOUNCE_NOTIFICATION} 117 | POSTFIX_SENDMAIL %{POSTFIX_WARNING} 118 | POSTFIX_POSTDROP %{POSTFIX_WARNING} 119 | POSTFIX_SCACHE %{POSTFIX_SCACHE_LOOKUPS}|%{POSTFIX_SCACHE_SIMULTANEOUS}|%{POSTFIX_SCACHE_TIMESTAMP} 120 | POSTFIX_TRIVIAL_REWRITE %{POSTFIX_WARNING} 121 | POSTFIX_TLSMGR %{POSTFIX_WARNING} 122 | POSTFIX_LOCAL %{POSTFIX_KEYVALUE} 123 | POSTFIX_VIRTUAL %{POSTFIX_SMTP_DELIVERY} 124 | POSTFIX_ERROR %{POSTFIX_ERROR_ANY} 125 | POSTFIX_POSTSUPER %{POSTFIX_POSTSUPER_ACTION}|%{POSTFIX_POSTSUPER_SUMMARY} 126 | -------------------------------------------------------------------------------- /extra/checkstyle/intellij-style.xml: -------------------------------------------------------------------------------- 1 | 2 | 14 | 20 | 32 | 602 | -------------------------------------------------------------------------------- /src/test/java/io/krakens/grok/api/GrokTest.java: -------------------------------------------------------------------------------- 1 | package io.krakens.grok.api; 2 | 3 | import static java.lang.String.format; 4 | import static org.hamcrest.CoreMatchers.containsString; 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertNotNull; 7 | import static org.junit.Assert.assertNull; 8 | import static org.junit.Assert.assertThat; 9 | import static org.junit.Assert.assertTrue; 10 | import static org.junit.Assert.fail; 11 | 12 | import java.io.BufferedReader; 13 | import java.io.FileReader; 14 | import java.time.Instant; 15 | import java.time.ZoneId; 16 | import java.time.ZoneOffset; 17 | import java.time.ZonedDateTime; 18 | import java.time.format.DateTimeFormatter; 19 | import java.util.ArrayList; 20 | import java.util.Collections; 21 | import java.util.List; 22 | import java.util.Locale; 23 | import java.util.Map; 24 | import java.util.regex.Matcher; 25 | import java.util.regex.Pattern; 26 | 27 | import io.krakens.grok.api.exception.GrokException; 28 | 29 | import com.google.common.collect.ImmutableMap; 30 | import com.google.common.io.Resources; 31 | import org.assertj.core.api.Assertions; 32 | import org.junit.Before; 33 | import org.junit.FixMethodOrder; 34 | import org.junit.Test; 35 | import org.junit.runners.MethodSorters; 36 | 37 | 38 | @FixMethodOrder(MethodSorters.NAME_ASCENDING) 39 | public class GrokTest { 40 | 41 | static { 42 | Locale.setDefault(Locale.ROOT); 43 | } 44 | 45 | GrokCompiler compiler; 46 | 47 | @Before 48 | public void setUp() throws Exception { 49 | compiler = GrokCompiler.newInstance(); 50 | compiler.register(Resources.getResource(ResourceManager.PATTERNS).openStream()); 51 | } 52 | 53 | @Test 54 | public void test000_basic() { 55 | GrokCompiler compiler = GrokCompiler.newInstance(); 56 | boolean thrown = false; 57 | 58 | try { 59 | compiler.register(null, ""); 60 | } catch (NullPointerException e) { 61 | thrown = true; 62 | } 63 | assertTrue(thrown); 64 | } 65 | 66 | @Test(expected = Exception.class) 67 | public void test_throwExceptionIfPatternIsNull() { 68 | compiler.compile(null); 69 | } 70 | 71 | @Test(expected = Exception.class) 72 | public void test_throwExceptionIfPatternIsEmptyString() { 73 | compiler.compile(""); 74 | } 75 | 76 | @Test(expected = Exception.class) 77 | public void test_throwExceptionIfPatternContainsOnlyBlanks() { 78 | compiler.compile(" "); 79 | } 80 | 81 | @Test 82 | public void test001_static_metod_factory() { 83 | 84 | Grok staticGrok = compiler.compile("%{USERNAME}"); 85 | Match gm = staticGrok.match("root"); 86 | Map map = gm.capture(); 87 | assertEquals("{USERNAME=root}", map.toString()); 88 | 89 | gm = staticGrok.match("r00t"); 90 | map = gm.capture(); 91 | assertEquals("{USERNAME=r00t}", map.toString()); 92 | 93 | gm = staticGrok.match("guest"); 94 | map = gm.capture(); 95 | assertEquals("{USERNAME=guest}", map.toString()); 96 | 97 | gm = staticGrok.match("guest1234"); 98 | map = gm.capture(); 99 | assertEquals("{USERNAME=guest1234}", map.toString()); 100 | 101 | gm = staticGrok.match("john doe"); 102 | map = gm.capture(); 103 | assertEquals("{USERNAME=john}", map.toString()); 104 | } 105 | 106 | @Test 107 | public void test001_username2() { 108 | Grok grok = compiler.compile("%{USER}"); 109 | 110 | Match gm = grok.match("root"); 111 | Map map = gm.capture(); 112 | assertEquals("{USER=root}", map.toString()); 113 | 114 | gm = grok.match("r00t"); 115 | map = gm.capture(); 116 | assertEquals("{USER=r00t}", map.toString()); 117 | 118 | gm = grok.match("guest"); 119 | map = gm.capture(); 120 | assertEquals("{USER=guest}", map.toString()); 121 | 122 | gm = grok.match("guest1234"); 123 | map = gm.capture(); 124 | assertEquals("{USER=guest1234}", map.toString()); 125 | 126 | gm = grok.match("john doe"); 127 | map = gm.capture(); 128 | assertEquals("{USER=john}", map.toString()); 129 | } 130 | 131 | @Test 132 | public void test002_numbers() { 133 | Grok grok = compiler.compile("%{NUMBER}"); 134 | 135 | Match gm = grok.match("-42"); 136 | Map map = gm.capture(); 137 | assertEquals("{NUMBER=-42}", map.toString()); 138 | 139 | } 140 | 141 | @Test 142 | public void test003_word() { 143 | Grok grok = compiler.compile("%{WORD}"); 144 | 145 | Match gm = grok.match("a"); 146 | Map map = gm.capture(); 147 | assertEquals("{WORD=a}", map.toString()); 148 | 149 | gm = grok.match("abc"); 150 | map = gm.capture(); 151 | assertEquals("{WORD=abc}", map.toString()); 152 | 153 | } 154 | 155 | @Test 156 | public void test004_space() { 157 | Grok grok = compiler.compile("%{SPACE}"); 158 | 159 | Match gm = grok.match("abc dc"); 160 | Map map = gm.capture(); 161 | assertEquals("{SPACE=}", map.toString()); 162 | 163 | } 164 | 165 | @Test 166 | public void test004_number() { 167 | Grok grok = compiler.compile("%{NUMBER}"); 168 | 169 | Match gm = grok.match("Something costs $55.4!"); 170 | Map map = gm.capture(); 171 | assertEquals("{NUMBER=55.4}", map.toString()); 172 | 173 | } 174 | 175 | @Test 176 | public void test005_notSpace() { 177 | Grok grok = compiler.compile("%{NOTSPACE}"); 178 | 179 | Match gm = grok.match("abc dc"); 180 | Map map = gm.capture(); 181 | assertEquals("{NOTSPACE=abc}", map.toString()); 182 | 183 | } 184 | 185 | @Test 186 | public void test006_quotedString() { 187 | Grok grok = compiler.compile("%{QUOTEDSTRING:text}"); 188 | 189 | Match gm = grok.match("\"abc dc\""); 190 | Map map = gm.capture(); 191 | assertEquals("{text=abc dc}", map.toString()); 192 | } 193 | 194 | @Test 195 | public void test007_uuid() { 196 | Grok grok = compiler.compile("%{UUID}"); 197 | 198 | Match gm = grok.match("61243740-4786-11e3-86a7-0002a5d5c51b"); 199 | Map map = gm.capture(); 200 | assertEquals("{UUID=61243740-4786-11e3-86a7-0002a5d5c51b}", map.toString()); 201 | 202 | gm = grok.match("7F8C7CB0-4786-11E3-8F96-0800200C9A66"); 203 | map = gm.capture(); 204 | assertEquals("{UUID=7F8C7CB0-4786-11E3-8F96-0800200C9A66}", map.toString()); 205 | 206 | gm = grok.match("03A8413C-F604-4D21-8F4D-24B19D98B5A7"); 207 | map = gm.capture(); 208 | assertEquals("{UUID=03A8413C-F604-4D21-8F4D-24B19D98B5A7}", map.toString()); 209 | 210 | } 211 | 212 | @Test 213 | public void test008_mac() { 214 | Grok grok = compiler.compile("%{MAC}"); 215 | 216 | Match gm = grok.match("5E:FF:56:A2:AF:15"); 217 | Map map = gm.capture(); 218 | assertEquals("{MAC=5E:FF:56:A2:AF:15}", map.toString()); 219 | 220 | } 221 | 222 | @Test 223 | public void test009_ipOrPort() { 224 | Grok grok = compiler.compile("%{IPORHOST}"); 225 | 226 | Match gm = grok.match("www.google.fr"); 227 | Map map = gm.capture(); 228 | assertEquals("{IPORHOST=www.google.fr}", map.toString()); 229 | 230 | gm = grok.match("www.google.com"); 231 | map = gm.capture(); 232 | assertEquals("{IPORHOST=www.google.com}", map.toString()); 233 | } 234 | 235 | @Test 236 | public void test010_hostPort() { 237 | Grok grok = compiler.compile("%{HOSTPORT}"); 238 | 239 | Match gm = grok.match("www.google.fr:80"); 240 | Map map = gm.capture(); 241 | assertEquals(ImmutableMap.of( 242 | "HOSTPORT", "www.google.fr:80", 243 | "IPORHOST", "www.google.fr", 244 | "PORT", "80"), map); 245 | } 246 | 247 | @Test 248 | public void test011_combineApache() { 249 | Grok grok = compiler.compile("%{COMBINEDAPACHELOG}"); 250 | 251 | Match gm = 252 | grok.match("112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET / HTTP/1.1\" 200 44346 \"-\" " 253 | + "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) " 254 | + "Chrome/25.0.1364.152 Safari/537.22\""); 255 | Map map = gm.capture(); 256 | assertEquals( 257 | map.get("agent").toString(), 258 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) " 259 | + "Chrome/25.0.1364.152 Safari/537.22"); 260 | assertEquals(map.get("clientip").toString(), "112.169.19.192"); 261 | assertEquals(map.get("httpversion").toString(), "1.1"); 262 | assertEquals(map.get("timestamp").toString(), "06/Mar/2013:01:36:30 +0900"); 263 | assertEquals(map.get("TIME").toString(), "01:36:30"); 264 | 265 | gm = 266 | grok.match("112.169.19.192 - - [06/Mar/2013:01:36:30 +0900] \"GET " 267 | + "/wp-content/plugins/easy-table/themes/default/style.css?ver=1.0 HTTP/1.1\" " 268 | + "304 - \"http://www.nflabs.com/\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) " 269 | + "AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.152 Safari/537.22\""); 270 | map = gm.capture(); 271 | assertEquals( 272 | map.get("agent").toString(), 273 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.22 (KHTML, like Gecko) " 274 | + "Chrome/25.0.1364.152 Safari/537.22"); 275 | assertEquals(map.get("clientip").toString(), "112.169.19.192"); 276 | assertEquals(map.get("httpversion").toString(), "1.1"); 277 | assertEquals(map.get("request").toString(), 278 | "/wp-content/plugins/easy-table/themes/default/style.css?ver=1.0"); 279 | assertEquals(map.get("TIME").toString(), "01:36:30"); 280 | } 281 | 282 | @Test 283 | public void test012_day() { 284 | 285 | Grok grok = compiler.compile("%{DAY}"); 286 | 287 | List days = new ArrayList<>(); 288 | days.add("Mon"); 289 | days.add("Monday"); 290 | days.add("Tue"); 291 | days.add("Tuesday"); 292 | days.add("Wed"); 293 | days.add("Wednesday"); 294 | days.add("Thu"); 295 | days.add("Thursday"); 296 | days.add("Fri"); 297 | days.add("Friday"); 298 | days.add("Sat"); 299 | days.add("Saturday"); 300 | days.add("Sun"); 301 | days.add("Sunday"); 302 | 303 | int counter = 0; 304 | for (String day : days) { 305 | Match match = grok.match(day); 306 | Map map = match.capture(); 307 | assertNotNull(map); 308 | assertEquals(map.get("DAY"), days.get(counter)); 309 | counter++; 310 | } 311 | } 312 | 313 | @Test 314 | public void test013_IpSet() throws Throwable { 315 | Grok grok = compiler.compile("%{IP}"); 316 | 317 | try (FileReader fr = new FileReader(Resources.getResource(ResourceManager.IP).getFile()); 318 | BufferedReader br = new BufferedReader(fr)) { 319 | String line; 320 | System.out.println("Starting test with ip"); 321 | while ((line = br.readLine()) != null) { 322 | Match gm = grok.match(line); 323 | final Map map = gm.capture(); 324 | Assertions.assertThat(map).doesNotContainKey("Error"); 325 | assertEquals(map.get("IP"), line); 326 | } 327 | } 328 | } 329 | 330 | @Test 331 | public void test014_month() { 332 | 333 | Grok grok = compiler.compile("%{MONTH}"); 334 | 335 | String[] months = {"Jan", "January", "Feb", "February", "Mar", "March", "Apr", "April", "May", "Jun", "June", 336 | "Jul", "July", "Aug", "August", "Sep", "September", "Oct", "October", "Nov", 337 | "November", "Dec", "December"}; 338 | int counter = 0; 339 | for (String month : months) { 340 | Match match = grok.match(month); 341 | Map map = match.capture(); 342 | assertNotNull(map); 343 | assertEquals(map.get("MONTH"), months[counter]); 344 | counter++; 345 | } 346 | } 347 | 348 | @Test 349 | public void test015_iso8601() throws GrokException { 350 | Grok grok = compiler.compile("%{TIMESTAMP_ISO8601}"); 351 | 352 | String[] times = { 353 | "2001-01-01T00:00:00", 354 | "1974-03-02T04:09:09", 355 | "2010-05-03T08:18:18+00:00", 356 | "2004-07-04T12:27:27-00:00", 357 | "2001-09-05T16:36:36+0000", 358 | "2001-11-06T20:45:45-0000", 359 | "2001-12-07T23:54:54Z", 360 | "2001-01-01T00:00:00.123456", 361 | "1974-03-02T04:09:09.123456", 362 | "2010-05-03T08:18:18.123456+00:00", 363 | "2004-07-04T12:27:27.123456-00:00", 364 | "2001-09-05T16:36:36.123456+0000", 365 | "2001-11-06T20:45:45.123456-0000", 366 | "2001-12-07T23:54:54.123456Z"}; 367 | 368 | int counter = 0; 369 | for (String time : times) { 370 | Match match = grok.match(time); 371 | Map map = match.capture(); 372 | assertNotNull(map); 373 | assertEquals(map.get("TIMESTAMP_ISO8601"), times[counter]); 374 | counter++; 375 | } 376 | } 377 | 378 | @Test 379 | public void test016_uri() throws GrokException { 380 | Grok grok = compiler.compile("%{URI}"); 381 | 382 | String[] uris = { 383 | "http://www.google.com", 384 | "telnet://helloworld", 385 | "http://www.example.com/", 386 | "http://www.example.com/test.html", 387 | "http://www.example.com/test.html?foo=bar", 388 | "http://www.example.com/test.html?foo=bar&fizzle=baz", 389 | "http://www.example.com:80/test.html?foo=bar&fizzle=baz", 390 | "https://www.example.com:443/test.html?foo=bar&fizzle=baz", 391 | "https://user@www.example.com:443/test.html?foo=bar&fizzle=baz", 392 | "https://user:pass@somehost/fetch.pl", 393 | "puppet:///", 394 | "http://www.foo.com", 395 | "http://www.foo.com/", 396 | "http://www.foo.com/?testing", 397 | "http://www.foo.com/?one=two", 398 | "http://www.foo.com/?one=two&foo=bar", 399 | "foo://somehost.com:12345", 400 | "foo://user@somehost.com:12345", 401 | "foo://user@somehost.com:12345/", 402 | "foo://user@somehost.com:12345/foo.bar/baz/fizz", 403 | "foo://user@somehost.com:12345/foo.bar/baz/fizz?test", 404 | "foo://user@somehost.com:12345/foo.bar/baz/fizz?test=1&sink&foo=4", 405 | "http://www.google.com/search?hl=en&source=hp&q=hello+world+%5E%40%23%24&btnG=Google+Search", 406 | "http://www.freebsd.org/cgi/url.cgi?ports/sysutils/grok/pkg-descr", 407 | "http://www.google.com/search?q=CAPTCHA+ssh&start=0&ie=utf-8&oe=utf-8&client=firefox-a" 408 | + "&rls=org.mozilla:en-US:official", 409 | "svn+ssh://somehost:12345/testing"}; 410 | 411 | int counter = 0; 412 | for (String uri : uris) { 413 | Match match = grok.match(uri); 414 | Map map = match.capture(); 415 | assertNotNull(map); 416 | assertEquals(map.get("URI"), uris[counter]); 417 | assertNotNull(map.get("URIPROTO")); 418 | counter++; 419 | } 420 | } 421 | 422 | @Test 423 | public void test017_nonMachingList() throws GrokException { 424 | Grok grok = compiler.compile("%{URI}"); 425 | 426 | String[] uris = { 427 | "http://www.google.com", 428 | "telnet://helloworld", 429 | "", 430 | "svn+ssh://somehost:12345/testing" 431 | }; 432 | 433 | int counter = 0; 434 | for (String uri : uris) { 435 | Match match = grok.match(uri); 436 | Map map = match.capture(); 437 | assertNotNull(map); 438 | if (counter == 2) { 439 | assertEquals(Collections.EMPTY_MAP, map); 440 | } 441 | counter++; 442 | } 443 | assertEquals(counter, 4); 444 | } 445 | 446 | @Test 447 | public void test018_namedOnlySimpleCase() throws GrokException { 448 | compiler.register("WORD", "foo|bar"); 449 | compiler.register("TEXT", "<< %{WORD}+ >>"); 450 | 451 | Grok grok = compiler.compile("%{TEXT:text}", true); 452 | 453 | String text = "<< barfoobarfoo >>"; 454 | Match match = grok.match(text); 455 | Map map = match.capture(); 456 | assertEquals("unable to parse: " + text, 457 | text, 458 | map.get("text")); 459 | } 460 | 461 | @Test 462 | public void test019_namedOnlyAllCases() throws GrokException { 463 | /* like previous test, but systematic all four possible options */ 464 | testPatternRepetitions(true, "(?:foo|bar)"); 465 | testPatternRepetitions(true, "foo|bar"); 466 | testPatternRepetitions(false, "(?:foo|bar)"); 467 | testPatternRepetitions(false, "foo|bar"); 468 | } 469 | 470 | private void testPatternRepetitions(boolean namedOnly, String pattern) throws GrokException { 471 | String description = format("[readonly:%s pattern:%s] ", namedOnly, pattern); 472 | 473 | compiler.register("WORD", pattern); 474 | compiler.register("TEXT", "<< %{WORD}+ >>"); 475 | 476 | Grok grok = compiler.compile("%{TEXT:text}", namedOnly); 477 | assertMatches(description, grok, "<< foo >>"); 478 | assertMatches(description, grok, "<< foobar >>"); 479 | assertMatches(description, grok, "<< foofoobarbar >>"); 480 | assertMatches(description, grok, "<< barfoobarfoo >>"); 481 | } 482 | 483 | private void assertMatches(String description, Grok grok, String text) { 484 | Match match = grok.match(text); 485 | Map map = match.capture(); 486 | assertEquals(format("%s: unable to parse '%s'", description, text), 487 | text, 488 | map.get("text")); 489 | } 490 | 491 | @Test 492 | public void test020_postfix_patterns() throws Throwable { 493 | GrokCompiler compiler = GrokCompiler.newInstance(); 494 | compiler.register(Resources.getResource("patterns/postfix").openStream()); 495 | compiler.register(Resources.getResource("patterns/patterns").openStream()); 496 | Grok grok = compiler.compile("%{POSTFIX_SMTPD}", false); 497 | 498 | assertTrue(grok.getPatterns().containsKey("POSTFIX_SMTPD")); 499 | } 500 | 501 | @Test 502 | public void test021_postfix_patterns_with_named_captures_only() throws Throwable { 503 | GrokCompiler compiler = GrokCompiler.newInstance(); 504 | compiler.register(Resources.getResource("patterns/postfix").openStream()); 505 | compiler.register(Resources.getResource("patterns/patterns").openStream()); 506 | Grok grok = compiler.compile("%{POSTFIX_SMTPD}", true); 507 | 508 | assertTrue(grok.getPatterns().containsKey("POSTFIX_SMTPD")); 509 | } 510 | 511 | @Test 512 | public void test022_named_captures_with_missing_definition() { 513 | ensureAbortsWithDefinitionMissing("FOO %{BAR}", "%{FOO}", true); 514 | } 515 | 516 | @Test 517 | public void test023_captures_with_missing_definition() { 518 | ensureAbortsWithDefinitionMissing("FOO %{BAR}", "%{FOO:name}", false); 519 | } 520 | 521 | @Test 522 | public void test024_captures_with_missing_definition() { 523 | ensureAbortsWithDefinitionMissing("FOO %{BAR}", "%{FOO}", false); 524 | } 525 | 526 | @Test 527 | public void test025_datetime_pattern_with_slashes() throws Throwable { 528 | final ZonedDateTime expectedDate = ZonedDateTime.of(2015, 7, 31, 0, 0, 0, 0, ZoneOffset.UTC); 529 | 530 | final Grok grok = compiler.compile("Foo %{DATA:result;date;yyyy/MM/dd} Bar", ZoneOffset.UTC, false); 531 | 532 | final Match gm = grok.match("Foo 2015/07/31 Bar"); 533 | 534 | assertEquals(1, gm.getMatch().groupCount()); 535 | assertEquals(expectedDate.toInstant(), gm.capture().get("result")); 536 | } 537 | 538 | @Test 539 | public void test026_datetime_pattern_with_with_dots() throws Throwable { 540 | final ZonedDateTime expectedDate = ZonedDateTime.of(2015, 7, 31, 0, 0, 0, 0, ZoneOffset.UTC); 541 | 542 | final Grok grok = compiler.compile("Foo %{DATA:result;date;yyyy.MM.dd} Bar", ZoneOffset.UTC, false); 543 | final Match gm = grok.match("Foo 2015.07.31 Bar"); 544 | 545 | assertEquals(1, gm.getMatch().groupCount()); 546 | assertEquals(expectedDate.toInstant(), gm.capture().get("result")); 547 | } 548 | 549 | @Test 550 | public void test027_datetime_pattern_with_with_hyphens() throws Throwable { 551 | final ZonedDateTime expectedDate = ZonedDateTime.of(2015, 7, 31, 0, 0, 0, 0, ZoneOffset.UTC); 552 | 553 | final Grok grok = compiler.compile("Foo %{DATA:result;date;yyyy-MM-dd} Bar", ZoneOffset.UTC, false); 554 | final Match gm = grok.match("Foo 2015-07-31 Bar"); 555 | 556 | assertEquals(1, gm.getMatch().groupCount()); 557 | assertEquals(expectedDate.toInstant(), gm.capture().get("result")); 558 | } 559 | 560 | @Test 561 | public void test028_keep_empty_captures() throws Throwable { 562 | final Grok grok = compiler.compile("%{POSINT:pos}|%{INT:int}"); 563 | Match gm = grok.match("-42"); 564 | gm.setKeepEmptyCaptures(false); 565 | Map captures = gm.capture(); 566 | assertEquals(1,captures.size()); 567 | assertEquals("-42", captures.get("int")); 568 | gm.setKeepEmptyCaptures(true); 569 | captures = gm.capture(); 570 | assertEquals(2,captures.size()); 571 | assertEquals("-42", captures.get("int")); 572 | assertNull(captures.get("pos")); 573 | assertTrue(captures.containsKey("pos")); 574 | } 575 | 576 | @Test 577 | public void test029_datetime_pattern_with_with_commas() throws Throwable { 578 | final ZonedDateTime expectedDate = ZonedDateTime.of(2015, 7, 31, 0, 0, 0, 0, ZoneOffset.UTC); 579 | 580 | final Grok grok = compiler.compile("Foo %{DATA:result;date;yyyy,MM,dd} Bar", ZoneOffset.UTC, false); 581 | final Match gm = grok.match("Foo 2015,07,31 Bar"); 582 | 583 | assertEquals(1, gm.getMatch().groupCount()); 584 | assertEquals(expectedDate.toInstant(), gm.capture().get("result")); 585 | } 586 | 587 | @Test 588 | public void testIssue64() throws Throwable { 589 | String pattern = "(?client id): (?.*)"; 590 | String input = "client id: \"name\" \"Mac OS X Mail\" \"version\" \"10.2 (3259)\" \"os\" \"Mac OS X\"" 591 | + "\"os-version\" \"10.12.3 (16D32)\" \"vendor\" \"Apple Inc.\""; 592 | 593 | // Validate the search is good 594 | Pattern javaPattern = Pattern.compile(pattern); 595 | Matcher javaMatcher = javaPattern.matcher(input); 596 | if (javaMatcher.matches()) { 597 | System.out.println(javaMatcher.group("clientid")); 598 | } 599 | 600 | GrokCompiler grokCompiler = GrokCompiler.newInstance(); 601 | grokCompiler.registerDefaultPatterns(); 602 | 603 | io.krakens.grok.api.Grok grok = grokCompiler.compile(pattern, true); 604 | 605 | Match gm = grok.match(input); 606 | Map captures = gm.capture(); 607 | assertEquals(captures.get("clientid"), gm.getMatch().group("clientid")); 608 | } 609 | 610 | @Test 611 | public void allowClassPathPatternFiles() throws Exception { 612 | GrokCompiler compiler = GrokCompiler.newInstance(); 613 | compiler.register(Resources.getResource("patterns/patterns").openStream()); 614 | compiler.compile("%{USERNAME}", false); 615 | } 616 | 617 | @Test(expected = IllegalArgumentException.class) 618 | public void createGrokWithDefaultPatterns() throws GrokException { 619 | GrokCompiler compiler = GrokCompiler.newInstance(); 620 | compiler.compile("%{USERNAME}", false); 621 | } 622 | 623 | private void ensureAbortsWithDefinitionMissing(String pattern, String compilePattern, boolean namedOnly) { 624 | try { 625 | compiler.compile(pattern); 626 | compiler.compile(compilePattern, namedOnly); 627 | fail("should abort due to missing definition"); 628 | } catch (Exception e) { 629 | assertThat(e.getMessage(), containsString("No definition for key")); 630 | } 631 | } 632 | 633 | @Test 634 | public void testGroupTypes() { 635 | Grok grok = compiler.compile( 636 | "%{HTTPDATE:timestamp;date;dd/MMM/yyyy:HH:mm:ss Z} %{USERNAME:username:text} " 637 | + "%{IPORHOST:host}:%{POSINT:port:integer}", 638 | true); 639 | assertEquals(Converter.Type.DATETIME, grok.groupTypes.get("timestamp")); 640 | assertEquals(Converter.Type.STRING, grok.groupTypes.get("username")); 641 | assertEquals(Converter.Type.INT, grok.groupTypes.get("port")); 642 | assertNull(grok.groupTypes.get("host")); 643 | 644 | Match match = grok.match("07/Mar/2004:16:45:56 -0800 test 64.242.88.10:8080"); 645 | Map result = match.capture(); 646 | assertEquals("test", result.get("username")); 647 | assertEquals("64.242.88.10", result.get("host")); 648 | assertEquals(8080, result.get("port")); 649 | assertTrue(result.get("timestamp") instanceof Instant); 650 | } 651 | 652 | @Test 653 | public void testTimeZone() { 654 | // no timezone. default to sytem default 655 | String date = "03/19/2018 14:11:00"; 656 | DateTimeFormatter dtf = DateTimeFormatter.ofPattern("MM/dd/yyyy HH:mm:ss"); 657 | Grok grok = compiler.compile("%{DATESTAMP:timestamp;date;MM/dd/yyyy HH:mm:ss}", true); 658 | Instant instant = (Instant) grok.match(date).capture().get("timestamp"); 659 | assertEquals(ZonedDateTime.parse(date, dtf.withZone(ZoneOffset.systemDefault())).toInstant(), instant); 660 | 661 | // set default timezone to PST 662 | ZoneId pst = ZoneId.of("PST", ZoneId.SHORT_IDS); 663 | grok = compiler.compile("%{DATESTAMP:timestamp;date;MM/dd/yyyy HH:mm:ss}", pst, true); 664 | instant = (Instant) grok.match(date).capture().get("timestamp"); 665 | assertEquals(ZonedDateTime.parse(date, dtf.withZone(pst)).toInstant(), instant); 666 | 667 | // when timestamp has timezone, use it instead of the default. 668 | String dateWithTimeZone = "07/Mar/2004:16:45:56 +0800"; 669 | dtf = DateTimeFormatter.ofPattern("dd/MMM/yyyy:HH:mm:ss Z"); 670 | grok = compiler.compile("%{HTTPDATE:timestamp;date;dd/MMM/yyyy:HH:mm:ss Z}", pst, true); 671 | instant = (Instant) grok.match(dateWithTimeZone).capture().get("timestamp"); 672 | assertEquals(ZonedDateTime.parse(dateWithTimeZone, dtf.withZone(ZoneOffset.ofHours(8))).toInstant(), instant); 673 | } 674 | 675 | @Test 676 | public void testEmptyLine() { 677 | GrokCompiler grokCompiler = GrokCompiler.newInstance(); 678 | grokCompiler.registerDefaultPatterns(); 679 | final Grok grok = grokCompiler.compile("%{GREEDYDATA}"); 680 | 681 | // empty line 682 | String line = " "; 683 | Match gm = grok.match(line); 684 | Map capture = gm.capture(); 685 | assertEquals(1, capture.size()); 686 | } 687 | } 688 | --------------------------------------------------------------------------------