├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── apache.rs ├── log.rs └── simple.rs ├── build.rs ├── examples └── simple.rs ├── patterns ├── README.md ├── aws.pattern ├── bacula.pattern ├── bind.pattern ├── bro.pattern ├── exim.pattern ├── firewalls.pattern ├── grok.pattern ├── haproxy.pattern ├── httpd.pattern ├── java.pattern ├── junos.pattern ├── linux-syslog.pattern ├── maven.pattern ├── mcollective.pattern ├── mongodb.pattern ├── nagios.pattern ├── postgresql.pattern ├── rails.pattern ├── redis.pattern ├── ruby.pattern └── squid.pattern └── src ├── fancy_regex.rs ├── lib.rs ├── onig.rs ├── pcre2.rs └── regex.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | test: 11 | name: Test Grok 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | build: [linux, macos, windows] 16 | include: 17 | - build: linux 18 | os: ubuntu-latest 19 | rust: stable 20 | - build: macos 21 | os: macos-latest 22 | rust: stable 23 | - build: windows 24 | os: windows-latest 25 | rust: stable 26 | steps: 27 | - uses: actions/checkout@v2 28 | - uses: actions-rs/toolchain@v1 29 | with: 30 | toolchain: stable 31 | - uses: actions-rs/cargo@v1 32 | with: 33 | command: test 34 | - uses: actions-rs/cargo@v1 35 | with: 36 | command: test 37 | args: --no-default-features --features fancy-regex 38 | - uses: actions-rs/cargo@v1 39 | with: 40 | command: test 41 | args: --no-default-features --features pcre2 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | .vscode/** 5 | src/patterns.rs 6 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All user visible changes to this project will be documented in this file. 4 | This project adheres to [Semantic Versioning](http://semver.org/), as described 5 | for Rust libraries in [RFC #1105](https://github.com/rust-lang/rfcs/blob/master/text/1105-api-evolution.md) 6 | 7 | ## 2.0.1 - Unreleased 8 | 9 | * Updated `onig` to `6.4`. 10 | 11 | ## 2.0.0 - 2022-06-07 12 | 13 | * Minimum Rust version is `1.56`, Rust Edition switched to 2021. 14 | * (breaking) Renamed `Grok::with_patterns()` to `Grok::with_default_patterns()`. 15 | * (breaking) Renamed `Grok::insert_definition` to `Grok::add_pattern` to stop mixing "definition" with "pattern". 16 | * (breaking) `Matches::iter()` is now only returning the matches and not also the other patters with an empty string as the value. 17 | * Added `IntoIter` for `&Matches` for more convenient match iteration (i.e. for loop). 18 | * Added `Pattern::capture_names` which returns the names the compiled pattern captures. 19 | * Updated `onig` to `6.3`. 20 | * `master` branch is now called `main`. 21 | 22 | ## 1.2.0 - 2021-03-21 23 | 24 | * Updated `onig` to `6.1`. 25 | * Allow to inspect the default built patterns. Thanks @magodo! 26 | * Use the non_exhaustive attribute on `Error` as suggested by clippy. 27 | 28 | ## 1.0.1 - 2019-10-31 29 | 30 | * Use `Regex::foreach_names` instead of `Regex::capture_names` to work on 32 bit platforms. Thanks @a-rodin! 31 | 32 | ## 1.1.0 - 2019-10-30 33 | 34 | * Updated `onig` to `5.0`. 35 | * Use `Regex::foreach_names` instead of `Regex::capture_names` to work on 32 bit platforms. Thanks @a-rodin! 36 | 37 | ## 1.0.0 - 2019-03-28 38 | 39 | * Updated `onig` to `4.3`. 40 | 41 | ## 0.5.0 - 2018-02-19 42 | 43 | * Updated `onig` to `3.1`. 44 | 45 | ## 0.4.1 - 2017-11-15 46 | 47 | * Fixed a bug where the named pattern on compilation is also accessible from the iterator. 48 | 49 | ## 0.4.0 - 2017-11-15 50 | 51 | * Allow to specify named patterns when compiling, without inserting the definition beforehand. 52 | 53 | ## 0.3.0 - 2017-09-13 54 | 55 | * `regex` has been switched to `onig` so we have full compatibility with all the other grok patterns. 56 | * Added `Grok::with_patterns()` which loads all the default patterns. `Grok::defalt()` also uses that now. 57 | * `iter()` is available on `Matches` which yields a `(&str, &str)` kv pair of match/result. 58 | 59 | ## 0.2.0 - 2017-09-06 60 | 61 | * Instead of panicing, all methods that could return a `Result`. 62 | * `Grok::new()` has been renamed to `Grok::empty()` (or `Grok::default()`) 63 | * `is_empty()` is available in the `Matches` API to check if there are matches at all. 64 | * `len()` is available in the `Matches` API to get the total number of matches. 65 | 66 | ## 0.1.0 - 2017-09-05 67 | 68 | * Initial Release -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "grok" 3 | version = "2.1.0" 4 | authors = ["Matt Mastracci ", "Michael Nitschinger "] 5 | license = "Apache-2.0" 6 | readme = "README.md" 7 | repository = "https://github.com/mmastrac/grok" 8 | documentation = "https://docs.rs/grok2" 9 | homepage = "https://github.com/mmastrac/grok" 10 | description = """ 11 | A Rust implementation of the popular Java & Ruby grok library 12 | which allows easy text and log file processing with composable 13 | patterns. 14 | """ 15 | categories = ["text-processing"] 16 | build = "build.rs" 17 | edition = "2021" 18 | rust-version = "1.56" 19 | 20 | [features] 21 | default = ["onig"] 22 | 23 | onig = [] 24 | regex = ["dep:regex"] 25 | fancy-regex = ["dep:fancy-regex"] 26 | pcre2 = ["dep:pcre2"] 27 | 28 | [dependencies] 29 | # The default regex engine. Use default-feature = false to disable it. 30 | onig = { version = "6.5", default-features = false } 31 | 32 | # The Rust regex library. Does not support backtracking, so many patterns are unusable. 33 | regex = { version = "1", optional = true, default-features = false, features = ["std", "unicode", "perf", "perf-dfa-full"] } 34 | 35 | # A more complete Rust regex library supporting backtracking. 36 | fancy-regex = { version = "0.14", optional = true, default-features = false, features = ["std", "unicode", "perf"] } 37 | 38 | # A PCRE2 binding. 39 | pcre2 = { version = "0.2.9", optional = true } 40 | 41 | [build-dependencies] 42 | glob = "0.3" 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | grok 2 | ==== 3 | 4 | The `grok` library allows you to quickly parse and match potentially unstructured data into a structed result. It is especially helpful when parsing logfiles of all kinds. This [Rust](http://rust-lang.org) version is mainly a port from the [java version](https://github.com/thekrakken/java-grok) which in turn drew inspiration from the original [ruby version](https://github.com/logstash-plugins/logstash-filter-grok). 5 | 6 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 7 | [![Latest Version](https://img.shields.io/crates/v/grok.svg)](https://crates.io/crates/grok) 8 | [![Documentation](https://docs.rs/grok/badge.svg)](https://docs.rs/grok) 9 | ![Continuous Integration](https://github.com/mmastrac/grok/actions/workflows/ci.yml/badge.svg?branch=main) 10 | 11 | ## Usage 12 | Add this to your `Cargo.toml`: 13 | 14 | ```toml 15 | [dependencies] 16 | grok = "2.0" 17 | ``` 18 | 19 | Here is a simple example which stores a pattern, compiles it and then matches a line on it: 20 | 21 | ```rust 22 | use grok::Grok; 23 | 24 | fn main() { 25 | // Instantiate Grok 26 | let mut grok = Grok::default(); 27 | 28 | // Add a pattern which might be a regex or an alias 29 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 30 | 31 | // Compile the definitions into the pattern you want 32 | let pattern = grok 33 | .compile("%{USERNAME}", false) 34 | .expect("Error while compiling!"); 35 | 36 | // Match the compiled pattern against a string 37 | match pattern.match_against("root") { 38 | Some(m) => println!("Found username {:?}", m.get("USERNAME")), 39 | None => println!("No matches found!"), 40 | } 41 | } 42 | ``` 43 | 44 | Note that compiling the pattern is an expensive operation, so very similar to plain regex handling the `compile` 45 | operation should be performed once and then the `match_against` method on the pattern can be called repeatedly 46 | in a loop or iterator. The returned pattern is not bound to the lifetime of the original grok instance so it can 47 | be passed freely around. For performance reasons the `Match` returned is bound to the pattern lifetime so keep 48 | them close together or clone/copy out the containing results as needed. 49 | 50 | ## Further Information 51 | 52 | This library supports multiple regex engines through feature flags. By default, 53 | it uses [onig](https://crates.io/crates/onig), which is a Rust binding for the 54 | powerful [Oniguruma](https://github.com/kkos/oniguruma) regex library. You can 55 | also use the standard Rust regex engine or fancy-regex by enabling the 56 | respective features: 57 | 58 | The default engine is `onig` for compatibility with previous 2.x releases: 59 | 60 | ```toml 61 | [dependencies] 62 | grok = { version = "2.0", features = ["onig"] } 63 | ``` 64 | 65 | The `pcre2` engine is a more complete Rust regex library supporting 66 | backtracking, JIT compilation and is the fastest engine for most use cases: 67 | 68 | ```toml 69 | [dependencies] 70 | grok = { version = "2.0", default-features = false, features = ["pcre2"] } 71 | ``` 72 | 73 | The `fancy-regex` engine is a more complete Rust regex library supporting 74 | backtracking: 75 | 76 | ```toml 77 | [dependencies] 78 | grok = { version = "2.0", default-features = false, features = ["fancy-regex"] } 79 | ``` 80 | 81 | The `regex` engine is supported, but it does not support backtracking, so many 82 | patterns are unusable. This is not recommended for most use cases: 83 | 84 | ```toml 85 | [dependencies] 86 | grok = { version = "2.0", default-features = false, features = ["regex"] } 87 | ``` 88 | 89 | ## License 90 | `grok` is distributed under the terms of the Apache License (Version 2.0). 91 | See LICENSE for details. 92 | -------------------------------------------------------------------------------- /benches/apache.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use grok::Grok; 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_apache_log_match(b: &mut Bencher) { 10 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "GET /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""#; 11 | 12 | let mut grok = Grok::default(); 13 | let pattern = grok.compile(r#"%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}"#, false) 14 | .expect("Error while compiling!"); 15 | 16 | b.iter(|| { 17 | if let Some(found) = pattern.match_against(msg) { 18 | test::black_box(&found); 19 | } 20 | }); 21 | } 22 | 23 | #[bench] 24 | fn bench_apache_log_no_match_start(b: &mut Bencher) { 25 | let msg = r#"tash-scale11x/css/fonts/Roboto-Regular.ttf HTTP/1.1" 200 41820 "http://semicomplete.com/presentations/logs"#; 26 | 27 | let mut grok = Grok::default(); 28 | let pattern = grok.compile(r#"%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}"#, false) 29 | .expect("Error while compiling!"); 30 | 31 | b.iter(|| { 32 | if let Some(found) = pattern.match_against(msg) { 33 | test::black_box(&found); 34 | } 35 | }); 36 | } 37 | 38 | #[bench] 39 | fn bench_apache_log_no_match_middle(b: &mut Bencher) { 40 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "111 /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""#; 41 | 42 | let mut grok = Grok::default(); 43 | let pattern = grok.compile(r#"%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}"#, false) 44 | .expect("Error while compiling!"); 45 | 46 | b.iter(|| { 47 | if let Some(found) = pattern.match_against(msg) { 48 | test::black_box(&found); 49 | } 50 | }); 51 | } 52 | 53 | #[bench] 54 | fn bench_apache_log_no_match_end(b: &mut Bencher) { 55 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "GET /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" 1"#; 56 | 57 | let mut grok = Grok::default(); 58 | let pattern = grok.compile(r#"%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}"#, false) 59 | .expect("Error while compiling!"); 60 | 61 | b.iter(|| { 62 | if let Some(found) = pattern.match_against(msg) { 63 | test::black_box(&found); 64 | } 65 | }); 66 | } 67 | 68 | #[bench] 69 | fn bench_apache_log_match_anchor(b: &mut Bencher) { 70 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "GET /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""#; 71 | 72 | let mut grok = Grok::default(); 73 | let pattern = grok.compile(r#"^%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}$"#, false) 74 | .expect("Error while compiling!"); 75 | 76 | b.iter(|| { 77 | if let Some(found) = pattern.match_against(msg) { 78 | test::black_box(&found); 79 | } 80 | }); 81 | } 82 | 83 | #[bench] 84 | fn bench_apache_log_no_match_start_anchor(b: &mut Bencher) { 85 | let msg = r#"tash-scale11x/css/fonts/Roboto-Regular.ttf HTTP/1.1" 200 41820 "http://semicomplete.com/presentations/logs"#; 86 | 87 | let mut grok = Grok::default(); 88 | let pattern = grok.compile(r#"^%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}$"#, false) 89 | .expect("Error while compiling!"); 90 | 91 | b.iter(|| { 92 | if let Some(found) = pattern.match_against(msg) { 93 | test::black_box(&found); 94 | } 95 | }); 96 | } 97 | 98 | #[bench] 99 | fn bench_apache_log_no_match_middle_anchor(b: &mut Bencher) { 100 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "111 /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""#; 101 | 102 | let mut grok = Grok::default(); 103 | let pattern = grok.compile(r#"^%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}$"#, false) 104 | .expect("Error while compiling!"); 105 | 106 | b.iter(|| { 107 | if let Some(found) = pattern.match_against(msg) { 108 | test::black_box(&found); 109 | } 110 | }); 111 | } 112 | 113 | #[bench] 114 | fn bench_apache_log_no_match_end_anchor(b: &mut Bencher) { 115 | let msg = r#"220.181.108.96 - - [13/Jun/2015:21:14:28 +0000] "GET /blog/geekery/xvfb-firefox.html HTTP/1.1" 200 10975 "-" 1"#; 116 | 117 | let mut grok = Grok::default(); 118 | let pattern = grok.compile(r#"^%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} %{NUMBER:bytes} %{QS:referrer} %{QS:agent}$"#, false) 119 | .expect("Error while compiling!"); 120 | 121 | b.iter(|| { 122 | if let Some(found) = pattern.match_against(msg) { 123 | test::black_box(&found); 124 | } 125 | }); 126 | } 127 | -------------------------------------------------------------------------------- /benches/log.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use grok::Grok; 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_log_match(b: &mut Bencher) { 10 | let msg = "2016-09-19T18:19:00 [8.8.8.8:prd] DEBUG this is an example log message"; 11 | 12 | let mut grok = Grok::default(); 13 | let pattern = grok.compile(r"%{TIMESTAMP_ISO8601:timestamp} \[%{IPV4:ip}:%{WORD:environment}\] %{LOGLEVEL:log_level} %{GREEDYDATA:message}", false) 14 | .expect("Error while compiling!"); 15 | 16 | b.iter(|| { 17 | if let Some(found) = pattern.match_against(msg) { 18 | test::black_box(&found); 19 | } 20 | }); 21 | } 22 | 23 | #[bench] 24 | fn bench_log_no_match(b: &mut Bencher) { 25 | let msg = "2016-09-19T18:19:00 [8.8.8.8:prd] DEBUG this is an example log message"; 26 | 27 | let mut grok = Grok::default(); 28 | let pattern = grok.compile(r"%{TIMESTAMP_ISO8601:timestamp} \[%{IPV4:ip};%{WORD:environment}\] %{LOGLEVEL:log_level} %{GREEDYDATA:message}", false) 29 | .expect("Error while compiling!"); 30 | 31 | b.iter(|| { 32 | if let Some(found) = pattern.match_against(msg) { 33 | test::black_box(&found); 34 | } 35 | }); 36 | } 37 | 38 | #[bench] 39 | fn bench_log_match_with_anchors(b: &mut Bencher) { 40 | let msg = "2016-09-19T18:19:00 [8.8.8.8:prd] DEBUG this is an example log message"; 41 | 42 | let mut grok = Grok::default(); 43 | let pattern = grok.compile(r"^%{TIMESTAMP_ISO8601:timestamp} \[%{IPV4:ip}:%{WORD:environment}\] %{LOGLEVEL:log_level} %{GREEDYDATA:message}$", false) 44 | .expect("Error while compiling!"); 45 | 46 | b.iter(|| { 47 | if let Some(found) = pattern.match_against(msg) { 48 | test::black_box(&found); 49 | } 50 | }); 51 | } 52 | 53 | #[bench] 54 | fn bench_log_no_match_with_anchors(b: &mut Bencher) { 55 | let msg = "2016-09-19T18:19:00 [8.8.8.8;prd] DEBUG this is an example log message"; 56 | 57 | let mut grok = Grok::default(); 58 | let pattern = grok.compile(r"^%{TIMESTAMP_ISO8601:timestamp} \[%{IPV4:ip}:%{WORD:environment}\] %{LOGLEVEL:log_level} %{GREEDYDATA:message}$", false) 59 | .expect("Error while compiling!"); 60 | 61 | b.iter(|| { 62 | if let Some(found) = pattern.match_against(msg) { 63 | test::black_box(&found); 64 | } 65 | }); 66 | } 67 | -------------------------------------------------------------------------------- /benches/simple.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use grok::Grok; 6 | use test::Bencher; 7 | 8 | #[bench] 9 | fn bench_simple_pattern_match(b: &mut Bencher) { 10 | let mut grok = Grok::empty(); 11 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 12 | let pattern = grok 13 | .compile("%{USERNAME}", false) 14 | .expect("Error while compiling!"); 15 | 16 | b.iter(|| { 17 | if let Some(found) = pattern.match_against("user") { 18 | test::black_box(&found); 19 | } 20 | }); 21 | } 22 | 23 | #[bench] 24 | fn bench_simple_pattern_no_match(b: &mut Bencher) { 25 | let mut grok = Grok::empty(); 26 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 27 | let pattern = grok 28 | .compile("%{USERNAME}", false) 29 | .expect("Error while compiling!"); 30 | 31 | b.iter(|| { 32 | if let Some(found) = pattern.match_against("$$$$") { 33 | test::black_box(&found); 34 | } 35 | }); 36 | } 37 | 38 | #[bench] 39 | fn bench_simple_pattern_match_with_anchor(b: &mut Bencher) { 40 | let mut grok = Grok::empty(); 41 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 42 | let pattern = grok 43 | .compile("^%{USERNAME}$", false) 44 | .expect("Error while compiling!"); 45 | 46 | b.iter(|| { 47 | if let Some(found) = pattern.match_against("user") { 48 | test::black_box(&found); 49 | } 50 | }); 51 | } 52 | 53 | #[bench] 54 | fn bench_simple_pattern_no_match_with_anchor(b: &mut Bencher) { 55 | let mut grok = Grok::empty(); 56 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 57 | let pattern = grok 58 | .compile("^%{USERNAME}$", false) 59 | .expect("Error while compiling!"); 60 | 61 | b.iter(|| { 62 | if let Some(found) = pattern.match_against("$$$$") { 63 | test::black_box(&found); 64 | } 65 | }); 66 | } 67 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate glob; 2 | 3 | use glob::glob; 4 | use std::env; 5 | use std::fmt; 6 | use std::fs::File; 7 | use std::io::prelude::*; 8 | use std::io::BufReader; 9 | use std::path::Path; 10 | 11 | fn main() { 12 | let mut output = String::new(); 13 | 14 | fmt::write( 15 | &mut output, 16 | format_args!("static PATTERNS: &[(&str, &str)] = &[\n"), 17 | ) 18 | .unwrap(); 19 | 20 | for line in glob("patterns/*.pattern") 21 | .unwrap() // load filepaths 22 | // extract the filepath 23 | .map(|e| e.unwrap()) 24 | // open file for path 25 | .map(|path| File::open(path).unwrap()) 26 | // flatten to actual lines 27 | .flat_map(|f| BufReader::new(f).lines()) 28 | .map(|line| line.unwrap()) 29 | // filter comments 30 | .filter(|line| !line.starts_with('#')) 31 | // filter empty lines 32 | .filter(|line| !line.is_empty()) 33 | { 34 | let (key, value) = line.split_at(line.find(' ').unwrap()); 35 | fmt::write( 36 | &mut output, 37 | format_args!("\t(\"{}\", r#\"{}\"#),\n", key, &value[1..]), 38 | ) 39 | .unwrap(); 40 | } 41 | 42 | fmt::write(&mut output, format_args!("];\n")).unwrap(); 43 | 44 | let out_dir = env::var("OUT_DIR").unwrap(); 45 | let dest_path = Path::new(&out_dir).join("default_patterns.rs"); 46 | let mut file = File::create(&dest_path).unwrap(); 47 | file.write_all(output.as_bytes()).unwrap(); 48 | } 49 | -------------------------------------------------------------------------------- /examples/simple.rs: -------------------------------------------------------------------------------- 1 | use grok::Grok; 2 | 3 | fn main() { 4 | // Instantiate Grok 5 | let mut grok = Grok::default(); 6 | 7 | // Add a pattern which might be a regex or an alias 8 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 9 | 10 | // Compile the definitions into the pattern you want 11 | let pattern = grok 12 | .compile("%{USERNAME}", false) 13 | .expect("Error while compiling!"); 14 | 15 | // Match the compiled pattern against a string 16 | match pattern.match_against("root") { 17 | Some(m) => println!("Found username {:?}", m.get("USERNAME")), 18 | None => println!("No matches found!"), 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /patterns/README.md: -------------------------------------------------------------------------------- 1 | # Patterns 2 | Note that all the patterns have been originally copied from https://github.com/logstash-plugins/logstash-patterns-core/tree/master/patterns 3 | which is distributed under the Apache license as well. 4 | 5 | See https://github.com/logstash-plugins/logstash-patterns-core/blob/master/LICENSE 6 | for more information. -------------------------------------------------------------------------------- /patterns/aws.pattern: -------------------------------------------------------------------------------- 1 | S3_REQUEST_LINE (?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) 2 | 3 | S3_ACCESS_LOG %{WORD:owner} %{NOTSPACE:bucket} \[%{HTTPDATE:timestamp}\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:"%{S3_REQUEST_LINE}"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:"?%{QS:agent}"?|-) (?:-|%{NOTSPACE:version_id}) 4 | 5 | ELB_URIPATHPARAM %{URIPATH:path}(?:%{URIPARAM:params})? 6 | 7 | ELB_URI %{URIPROTO:proto}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST:urihost})?(?:%{ELB_URIPATHPARAM})? 8 | 9 | ELB_REQUEST_LINE (?:%{WORD:verb} %{ELB_URI:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) 10 | 11 | ELB_ACCESS_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:elb} %{IP:clientip}:%{INT:clientport:int} (?:(%{IP:backendip}:?:%{INT:backendport:int})|-) %{NUMBER:request_processing_time:float} %{NUMBER:backend_processing_time:float} %{NUMBER:response_processing_time:float} %{INT:response:int} %{INT:backend_response:int} %{INT:received_bytes:int} %{INT:bytes:int} "%{ELB_REQUEST_LINE}" 12 | 13 | CLOUDFRONT_ACCESS_LOG (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY}\t%{TIME})\t%{WORD:x_edge_location}\t(?:%{NUMBER:sc_bytes:int}|-)\t%{IPORHOST:clientip}\t%{WORD:cs_method}\t%{HOSTNAME:cs_host}\t%{NOTSPACE:cs_uri_stem}\t%{NUMBER:sc_status:int}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:agent}\t%{GREEDYDATA:cs_uri_query}\t%{GREEDYDATA:cookies}\t%{WORD:x_edge_result_type}\t%{NOTSPACE:x_edge_request_id}\t%{HOSTNAME:x_host_header}\t%{URIPROTO:cs_protocol}\t%{INT:cs_bytes:int}\t%{GREEDYDATA:time_taken:float}\t%{GREEDYDATA:x_forwarded_for}\t%{GREEDYDATA:ssl_protocol}\t%{GREEDYDATA:ssl_cipher}\t%{GREEDYDATA:x_edge_response_result_type} 14 | 15 | -------------------------------------------------------------------------------- /patterns/bacula.pattern: -------------------------------------------------------------------------------- 1 | BACULA_TIMESTAMP %{MONTHDAY}-%{MONTH} %{HOUR}:%{MINUTE} 2 | BACULA_HOST [a-zA-Z0-9-]+ 3 | BACULA_VOLUME %{USER} 4 | BACULA_DEVICE %{USER} 5 | BACULA_DEVICEPATH %{UNIXPATH} 6 | BACULA_CAPACITY %{INT}{1,3}(,%{INT}{3})* 7 | BACULA_VERSION %{USER} 8 | BACULA_JOB %{USER} 9 | 10 | BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY} exceeded on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) 11 | BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:volume}\" Bytes=%{BACULA_CAPACITY} Blocks=%{BACULA_CAPACITY} at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. 12 | BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:volume}\" in catalog. 13 | BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\). 14 | BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE}\" \(%{BACULA_DEVICEPATH}\) 15 | BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:volume}\" mounted on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. 16 | BACULA_LOG_NOOPEN \s+Cannot open %{DATA}: ERR=%{GREEDYDATA:berror} 17 | BACULA_LOG_NOOPENDIR \s+Could not open directory %{DATA}: ERR=%{GREEDYDATA:berror} 18 | BACULA_LOG_NOSTAT \s+Could not stat %{DATA}: ERR=%{GREEDYDATA:berror} 19 | BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:volume}\". Marking it purged. 20 | BACULA_LOG_ALL_RECORDS_PRUNED All records pruned from Volume \"%{BACULA_VOLUME:volume}\"; marking it \"Purged\" 21 | BACULA_LOG_BEGIN_PRUNE_JOBS Begin pruning Jobs older than %{INT} month %{INT} days . 22 | BACULA_LOG_BEGIN_PRUNE_FILES Begin pruning Files. 23 | BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. 24 | BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. 25 | BACULA_LOG_ENDPRUNE End auto prune. 26 | BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:job} 27 | BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:job} 28 | BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:device}\" 29 | BACULA_LOG_DIFF_FS \s+%{UNIXPATH} is a different filesystem. Will not descend from %{UNIXPATH} into it. 30 | BACULA_LOG_JOBEND Job write elapsed time = %{DATA:elapsed}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second 31 | BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune. 32 | BACULA_LOG_NOPRUNE_FILES No Files found to prune. 33 | BACULA_LOG_VOLUME_PREVWRITTEN Volume \"%{BACULA_VOLUME:volume}\" previously written, moving to end of data. 34 | BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:volume}\" size=%{INT} 35 | BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT}. 36 | BACULA_LOG_MARKCANCEL JobId %{INT}, Job %{BACULA_JOB:job} marked to be canceled. 37 | BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:runjob}\" 38 | BACULA_LOG_VSS (Generate )?VSS (Writer)? 39 | BACULA_LOG_MAXSTART Fatal error: Job canceled because max start delay time exceeded. 40 | BACULA_LOG_DUPLICATE Fatal error: JobId %{INT:duplicate} already running. Duplicate job not allowed. 41 | BACULA_LOG_NOJOBSTAT Fatal error: No Job status returned from FD. 42 | BACULA_LOG_FATAL_CONN Fatal error: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(%{GREEDYDATA}) 43 | BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(%{GREEDYDATA}) 44 | BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at %{HOSTNAME}. Possible causes: 45 | BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup. 46 | BACULA_LOG_NOPRIOR No prior Full backup Job record found. 47 | 48 | BACULA_LOG_JOB (Error: )?Bacula %{BACULA_HOST} %{BACULA_VERSION} \(%{BACULA_VERSION}\): 49 | 50 | BACULA_LOGLINE %{BACULA_TIMESTAMP:bts} %{BACULA_HOST:hostname} JobId %{INT:jobid}: (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR}) 51 | -------------------------------------------------------------------------------- /patterns/bind.pattern: -------------------------------------------------------------------------------- 1 | BIND9_TIMESTAMP %{MONTHDAY}[-]%{MONTH}[-]%{YEAR} %{TIME} 2 | 3 | BIND9 %{BIND9_TIMESTAMP:timestamp} queries: %{LOGLEVEL:loglevel}: client %{IP:clientip}#%{POSINT:clientport} \(%{GREEDYDATA:query}\): query: %{GREEDYDATA:query} IN %{GREEDYDATA:querytype} \(%{IP:dns}\) 4 | -------------------------------------------------------------------------------- /patterns/bro.pattern: -------------------------------------------------------------------------------- 1 | # https://www.bro.org/sphinx/script-reference/log-files.html 2 | 3 | # http.log 4 | BRO_HTTP %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{INT:trans_depth}\t%{GREEDYDATA:method}\t%{GREEDYDATA:domain}\t%{GREEDYDATA:uri}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:user_agent}\t%{NUMBER:request_body_len}\t%{NUMBER:response_body_len}\t%{GREEDYDATA:status_code}\t%{GREEDYDATA:status_msg}\t%{GREEDYDATA:info_code}\t%{GREEDYDATA:info_msg}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:bro_tags}\t%{GREEDYDATA:username}\t%{GREEDYDATA:password}\t%{GREEDYDATA:proxied}\t%{GREEDYDATA:orig_fuids}\t%{GREEDYDATA:orig_mime_types}\t%{GREEDYDATA:resp_fuids}\t%{GREEDYDATA:resp_mime_types} 5 | 6 | # dns.log 7 | BRO_DNS %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{INT:trans_id}\t%{GREEDYDATA:query}\t%{GREEDYDATA:qclass}\t%{GREEDYDATA:qclass_name}\t%{GREEDYDATA:qtype}\t%{GREEDYDATA:qtype_name}\t%{GREEDYDATA:rcode}\t%{GREEDYDATA:rcode_name}\t%{GREEDYDATA:AA}\t%{GREEDYDATA:TC}\t%{GREEDYDATA:RD}\t%{GREEDYDATA:RA}\t%{GREEDYDATA:Z}\t%{GREEDYDATA:answers}\t%{GREEDYDATA:TTLs}\t%{GREEDYDATA:rejected} 8 | 9 | # conn.log 10 | BRO_CONN %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{GREEDYDATA:service}\t%{NUMBER:duration}\t%{NUMBER:orig_bytes}\t%{NUMBER:resp_bytes}\t%{GREEDYDATA:conn_state}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:missed_bytes}\t%{GREEDYDATA:history}\t%{GREEDYDATA:orig_pkts}\t%{GREEDYDATA:orig_ip_bytes}\t%{GREEDYDATA:resp_pkts}\t%{GREEDYDATA:resp_ip_bytes}\t%{GREEDYDATA:tunnel_parents} 11 | 12 | # files.log 13 | BRO_FILES %{NUMBER:ts}\t%{NOTSPACE:fuid}\t%{IP:tx_hosts}\t%{IP:rx_hosts}\t%{NOTSPACE:conn_uids}\t%{GREEDYDATA:source}\t%{GREEDYDATA:depth}\t%{GREEDYDATA:analyzers}\t%{GREEDYDATA:mime_type}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:duration}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:is_orig}\t%{GREEDYDATA:seen_bytes}\t%{GREEDYDATA:total_bytes}\t%{GREEDYDATA:missing_bytes}\t%{GREEDYDATA:overflow_bytes}\t%{GREEDYDATA:timedout}\t%{GREEDYDATA:parent_fuid}\t%{GREEDYDATA:md5}\t%{GREEDYDATA:sha1}\t%{GREEDYDATA:sha256}\t%{GREEDYDATA:extracted} 14 | -------------------------------------------------------------------------------- /patterns/exim.pattern: -------------------------------------------------------------------------------- 1 | EXIM_MSGID [0-9A-Za-z]{6}-[0-9A-Za-z]{6}-[0-9A-Za-z]{2} 2 | EXIM_FLAGS (<=|[-=>*]>|[*]{2}|==) 3 | EXIM_DATE %{YEAR:exim_year}-%{MONTHNUM:exim_month}-%{MONTHDAY:exim_day} %{TIME:exim_time} 4 | EXIM_PID \[%{POSINT}\] 5 | EXIM_QT ((\d+y)?(\d+w)?(\d+d)?(\d+h)?(\d+m)?(\d+s)?) 6 | EXIM_EXCLUDE_TERMS (Message is frozen|(Start|End) queue run| Warning: | retry time not reached | no (IP address|host name) found for (IP address|host) | unexpected disconnection while reading SMTP command | no immediate delivery: |another process is handling this message) 7 | EXIM_REMOTE_HOST (H=(%{NOTSPACE:remote_hostname} )?(\(%{NOTSPACE:remote_heloname}\) )?\[%{IP:remote_host}\]) 8 | EXIM_INTERFACE (I=\[%{IP:exim_interface}\](:%{NUMBER:exim_interface_port})) 9 | EXIM_PROTOCOL (P=%{NOTSPACE:protocol}) 10 | EXIM_MSG_SIZE (S=%{NUMBER:exim_msg_size}) 11 | EXIM_HEADER_ID (id=%{NOTSPACE:exim_header_id}) 12 | EXIM_SUBJECT (T=%{QS:exim_subject}) 13 | 14 | -------------------------------------------------------------------------------- /patterns/firewalls.pattern: -------------------------------------------------------------------------------- 1 | # NetScreen firewall logs 2 | NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:date} %{IPORHOST:device} %{IPORHOST}: NetScreen device_id=%{WORD:device_id}%{DATA}: start_time=%{QUOTEDSTRING:start_time} duration=%{INT:duration} policy_id=%{INT:policy_id} service=%{DATA:service} proto=%{INT:proto} src zone=%{WORD:src_zone} dst zone=%{WORD:dst_zone} action=%{WORD:action} sent=%{INT:sent} rcvd=%{INT:rcvd} src=%{IPORHOST:src_ip} dst=%{IPORHOST:dst_ip} src_port=%{INT:src_port} dst_port=%{INT:dst_port} src-xlated ip=%{IPORHOST:src_xlated_ip} port=%{INT:src_xlated_port} dst-xlated ip=%{IPORHOST:dst_xlated_ip} port=%{INT:dst_xlated_port} session_id=%{INT:session_id} reason=%{GREEDYDATA:reason} 3 | 4 | #== Cisco ASA == 5 | CISCO_TAGGED_SYSLOG ^<%{POSINT:syslog_pri}>%{CISCOTIMESTAMP:timestamp}( %{SYSLOGHOST:sysloghost})? ?: %%{CISCOTAG:ciscotag}: 6 | CISCOTIMESTAMP %{MONTH} +%{MONTHDAY}(?: %{YEAR})? %{TIME} 7 | CISCOTAG [A-Z0-9]+-%{INT}-(?:[A-Z0-9_]+) 8 | # Common Particles 9 | CISCO_ACTION Built|Teardown|Deny|Denied|denied|requested|permitted|denied by ACL|discarded|est-allowed|Dropping|created|deleted 10 | CISCO_REASON Duplicate TCP SYN|Failed to locate egress interface|Invalid transport field|No matching connection|DNS Response|DNS Query|(?:%{WORD}\s*)* 11 | CISCO_DIRECTION Inbound|inbound|Outbound|outbound 12 | CISCO_INTERVAL first hit|%{INT}-second interval 13 | CISCO_XLATE_TYPE static|dynamic 14 | # ASA-1-104001 15 | CISCOFW104001 \((?:Primary|Secondary)\) Switching to ACTIVE - %{GREEDYDATA:switch_reason} 16 | # ASA-1-104002 17 | CISCOFW104002 \((?:Primary|Secondary)\) Switching to STANDBY - %{GREEDYDATA:switch_reason} 18 | # ASA-1-104003 19 | CISCOFW104003 \((?:Primary|Secondary)\) Switching to FAILED\. 20 | # ASA-1-104004 21 | CISCOFW104004 \((?:Primary|Secondary)\) Switching to OK\. 22 | # ASA-1-105003 23 | CISCOFW105003 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} waiting 24 | # ASA-1-105004 25 | CISCOFW105004 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} normal 26 | # ASA-1-105005 27 | CISCOFW105005 \((?:Primary|Secondary)\) Lost Failover communications with mate on [Ii]nterface %{GREEDYDATA:interface_name} 28 | # ASA-1-105008 29 | CISCOFW105008 \((?:Primary|Secondary)\) Testing [Ii]nterface %{GREEDYDATA:interface_name} 30 | # ASA-1-105009 31 | CISCOFW105009 \((?:Primary|Secondary)\) Testing on [Ii]nterface %{GREEDYDATA:interface_name} (?:Passed|Failed) 32 | # ASA-2-106001 33 | CISCOFW106001 %{CISCO_DIRECTION:direction} %{WORD:protocol} connection %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{GREEDYDATA:tcp_flags} on interface %{GREEDYDATA:interface} 34 | # ASA-2-106006, ASA-2-106007, ASA-2-106010 35 | CISCOFW106006_106007_106010 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} (?:from|src) %{IP:src_ip}/%{INT:src_port}(\(%{DATA:src_fwuser}\))? (?:to|dst) %{IP:dst_ip}/%{INT:dst_port}(\(%{DATA:dst_fwuser}\))? (?:on interface %{DATA:interface}|due to %{CISCO_REASON:reason}) 36 | # ASA-3-106014 37 | CISCOFW106014 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} src %{DATA:src_interface}:%{IP:src_ip}(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{IP:dst_ip}(\(%{DATA:dst_fwuser}\))? \(type %{INT:icmp_type}, code %{INT:icmp_code}\) 38 | # ASA-6-106015 39 | CISCOFW106015 %{CISCO_ACTION:action} %{WORD:protocol} \(%{DATA:policy_id}\) from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{DATA:tcp_flags} on interface %{GREEDYDATA:interface} 40 | # ASA-1-106021 41 | CISCOFW106021 %{CISCO_ACTION:action} %{WORD:protocol} reverse path check from %{IP:src_ip} to %{IP:dst_ip} on interface %{GREEDYDATA:interface} 42 | # ASA-4-106023 43 | CISCOFW106023 %{CISCO_ACTION:action}( protocol)? %{WORD:protocol} src %{DATA:src_interface}:%{DATA:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{DATA:dst_ip}(/%{INT:dst_port})?(\(%{DATA:dst_fwuser}\))?( \(type %{INT:icmp_type}, code %{INT:icmp_code}\))? by access-group "?%{DATA:policy_id}"? \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 44 | # ASA-4-106100, ASA-4-106102, ASA-4-106103 45 | CISCOFW106100_2_3 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} for user '%{DATA:src_fwuser}' %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\) -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\) hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 46 | # ASA-5-106100 47 | CISCOFW106100 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\)(\(%{DATA:src_fwuser}\))? -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\)(\(%{DATA:src_fwuser}\))? hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 48 | # ASA-5-304001 49 | CISCOFW304001 %{IP:src_ip}(\(%{DATA:src_fwuser}\))? Accessed URL %{IP:dst_ip}:%{GREEDYDATA:dst_url} 50 | # ASA-6-110002 51 | CISCOFW110002 %{CISCO_REASON:reason} for %{WORD:protocol} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} 52 | # ASA-6-302010 53 | CISCOFW302010 %{INT:connection_count} in use, %{INT:connection_count_max} most used 54 | # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 55 | CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection %{INT:connection_id} for %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port}( \(%{IP:src_mapped_ip}/%{INT:src_mapped_port}\))?(\(%{DATA:src_fwuser}\))? to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}( \(%{IP:dst_mapped_ip}/%{INT:dst_mapped_port}\))?(\(%{DATA:dst_fwuser}\))?( duration %{TIME:duration} bytes %{INT:bytes})?(?: %{CISCO_REASON:reason})?( \(%{DATA:user}\))? 56 | # ASA-6-302020, ASA-6-302021 57 | CISCOFW302020_302021 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection for faddr %{IP:dst_ip}/%{INT:icmp_seq_num}(?:\(%{DATA:fwuser}\))? gaddr %{IP:src_xlated_ip}/%{INT:icmp_code_xlated} laddr %{IP:src_ip}/%{INT:icmp_code}( \(%{DATA:user}\))? 58 | # ASA-6-305011 59 | CISCOFW305011 %{CISCO_ACTION:action} %{CISCO_XLATE_TYPE:xlate_type} %{WORD:protocol} translation from %{DATA:src_interface}:%{IP:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? to %{DATA:src_xlated_interface}:%{IP:src_xlated_ip}/%{DATA:src_xlated_port} 60 | # ASA-3-313001, ASA-3-313004, ASA-3-313008 61 | CISCOFW313001_313004_313008 %{CISCO_ACTION:action} %{WORD:protocol} type=%{INT:icmp_type}, code=%{INT:icmp_code} from %{IP:src_ip} on interface %{DATA:interface}( to %{IP:dst_ip})? 62 | # ASA-4-313005 63 | CISCOFW313005 %{CISCO_REASON:reason} for %{WORD:protocol} error message: %{WORD:err_protocol} src %{DATA:err_src_interface}:%{IP:err_src_ip}(\(%{DATA:err_src_fwuser}\))? dst %{DATA:err_dst_interface}:%{IP:err_dst_ip}(\(%{DATA:err_dst_fwuser}\))? \(type %{INT:err_icmp_type}, code %{INT:err_icmp_code}\) on %{DATA:interface} interface\. Original IP payload: %{WORD:protocol} src %{IP:orig_src_ip}/%{INT:orig_src_port}(\(%{DATA:orig_src_fwuser}\))? dst %{IP:orig_dst_ip}/%{INT:orig_dst_port}(\(%{DATA:orig_dst_fwuser}\))? 64 | # ASA-5-321001 65 | CISCOFW321001 Resource '%{WORD:resource_name}' limit of %{POSINT:resource_limit} reached for system 66 | # ASA-4-402117 67 | CISCOFW402117 %{WORD:protocol}: Received a non-IPSec packet \(protocol= %{WORD:orig_protocol}\) from %{IP:src_ip} to %{IP:dst_ip} 68 | # ASA-4-402119 69 | CISCOFW402119 %{WORD:protocol}: Received an %{WORD:orig_protocol} packet \(SPI= %{DATA:spi}, sequence number= %{DATA:seq_num}\) from %{IP:src_ip} \(user= %{DATA:user}\) to %{IP:dst_ip} that failed anti-replay checking 70 | # ASA-4-419001 71 | CISCOFW419001 %{CISCO_ACTION:action} %{WORD:protocol} packet from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}, reason: %{GREEDYDATA:reason} 72 | # ASA-4-419002 73 | CISCOFW419002 %{CISCO_REASON:reason} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} with different initial sequence number 74 | # ASA-4-500004 75 | CISCOFW500004 %{CISCO_REASON:reason} for protocol=%{WORD:protocol}, from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} 76 | # ASA-6-602303, ASA-6-602304 77 | CISCOFW602303_602304 %{WORD:protocol}: An %{CISCO_DIRECTION:direction} %{GREEDYDATA:tunnel_type} SA \(SPI= %{DATA:spi}\) between %{IP:src_ip} and %{IP:dst_ip} \(user= %{DATA:user}\) has been %{CISCO_ACTION:action} 78 | # ASA-7-710001, ASA-7-710002, ASA-7-710003, ASA-7-710005, ASA-7-710006 79 | CISCOFW710001_710002_710003_710005_710006 %{WORD:protocol} (?:request|access) %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} 80 | # ASA-6-713172 81 | CISCOFW713172 Group = %{GREEDYDATA:group}, IP = %{IP:src_ip}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:is_remote_natted}\s*behind a NAT device\s+This\s+end\s*%{DATA:is_local_natted}\s*behind a NAT device 82 | # ASA-4-733100 83 | CISCOFW733100 \[\s*%{DATA:drop_type}\s*\] drop %{DATA:drop_rate_id} exceeded. Current burst rate is %{INT:drop_rate_current_burst} per second, max configured rate is %{INT:drop_rate_max_burst}; Current average rate is %{INT:drop_rate_current_avg} per second, max configured rate is %{INT:drop_rate_max_avg}; Cumulative total count is %{INT:drop_total_count} 84 | #== End Cisco ASA == 85 | 86 | IPTABLES_TCP_FLAGS (CWR |ECE |URG |ACK |PSH |RST |SYN |FIN )* 87 | IPTABLES_TCP_PART (?:SEQ=%{INT:[iptables][tcp][seq]:int}\s+)?(?:ACK=%{INT:[iptables][tcp][ack]:int}\s+)?WINDOW=%{INT:[iptables][tcp][window]:int}\s+RES=0x%{BASE16NUM:[iptables][tcp_reserved_bits]}\s+%{IPTABLES_TCP_FLAGS:[iptables][tcp][flags]} 88 | 89 | IPTABLES4_FRAG (?:(?<= )(?:CE|DF|MF))* 90 | IPTABLES4_PART SRC=%{IPV4:[source][ip]}\s+DST=%{IPV4:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TOS=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+PREC=(?:0x%{BASE16NUM:[iptables][precedence_bits]})?\s+TTL=(?:%{INT:[iptables][ttl]:int})?\s+ID=(?:%{INT:[iptables][id]})?\s+(?:%{IPTABLES4_FRAG:[iptables][fragment_flags]})?(?:\s+FRAG: %{INT:[iptables][fragment_offset]:int})? 91 | IPTABLES6_PART SRC=%{IPV6:[source][ip]}\s+DST=%{IPV6:[destination][ip]}\s+LEN=(?:%{INT:[iptables][length]:int})?\s+TC=(?:0|0x%{BASE16NUM:[iptables][tos]})?\s+HOPLIMIT=(?:%{INT:[iptables][ttl]:int})?\s+FLOWLBL=(?:%{INT:[iptables][flow_label]})? 92 | 93 | IPTABLES IN=(?:%{NOTSPACE:[observer][ingress][interface][name]})?\s+OUT=(?:%{NOTSPACE:[observer][egress][interface][name]})?\s+(?:MAC=(?:%{COMMONMAC:[destination][mac]})?(?::%{COMMONMAC:[source][mac]})?(?::[A-Fa-f0-9]{2}:[A-Fa-f0-9]{2})?\s+)?(:?%{IPTABLES4_PART}|%{IPTABLES6_PART}).*?PROTO=(?:%{WORD:[network][transport]})?\s+SPT=(?:%{INT:[source][port]:int})?\s+DPT=(?:%{INT:[destination][port]:int})?\s+(?:%{IPTABLES_TCP_PART})? 94 | 95 | # Shorewall firewall logs 96 | SHOREWALL (?:%{SYSLOGTIMESTAMP:timestamp}) (?:%{WORD:[observer][hostname]}) .*Shorewall:(?:%{WORD:[shorewall][firewall][type]})?:(?:%{WORD:[shorewall][firewall][action]})?.*%{IPTABLES} 97 | #== End Shorewall 98 | #== SuSE Firewall 2 == 99 | SFW2_LOG_PREFIX SFW2\-INext\-%{NOTSPACE:[suse][firewall][action]} 100 | SFW2 ((?:%{SYSLOGTIMESTAMP:timestamp})|(?:%{TIMESTAMP_ISO8601:timestamp}))\s*%{HOSTNAME:[observer][hostname]}.*?%{SFW2_LOG_PREFIX:[suse][firewall][log_prefix]}\s*%{IPTABLES} 101 | #== End SuSE == 102 | -------------------------------------------------------------------------------- /patterns/grok.pattern: -------------------------------------------------------------------------------- 1 | USERNAME [a-zA-Z0-9._-]+ 2 | USER %{USERNAME} 3 | EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+ 4 | EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME} 5 | INT (?:[+-]?(?:[0-9]+)) 6 | BASE10NUM (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 7 | NUMBER (?:%{BASE10NUM}) 8 | BASE16NUM (?(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) 19 | UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} 20 | # URN, allowing use of RFC 2141 section 2.3 reserved characters 21 | URN urn:[0-9A-Za-z][0-9A-Za-z-]{0,31}:(?:%[0-9a-fA-F]{2}|[0-9A-Za-z()+,.:=@;$_!*'/?#-])+ 22 | 23 | # Networking 24 | MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) 25 | CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) 26 | WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) 27 | COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) 28 | IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? 29 | IPV4 (?[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ 40 | URIPROTO [A-Za-z]([A-Za-z0-9+\-.]+)+ 41 | URIHOST %{IPORHOST}(?::%{POSINT:port})? 42 | # uripath comes loosely from RFC1738, but mostly from what Firefox 43 | # doesn't turn into %XX 44 | URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%&_\-]*)+ 45 | #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? 46 | URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]* 47 | URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? 48 | URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? 49 | 50 | # Months: January, Feb, 3, 03, 12, December 51 | MONTH \b(?:[Jj]an(?:uary|uar)?|[Ff]eb(?:ruary|ruar)?|[Mm](?:a|ä)?r(?:ch|z)?|[Aa]pr(?:il)?|[Mm]a(?:y|i)?|[Jj]un(?:e|i)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo](?:c|k)?t(?:ober)?|[Nn]ov(?:ember)?|[Dd]e(?:c|z)(?:ember)?)\b 52 | MONTHNUM (?:0?[1-9]|1[0-2]) 53 | MONTHNUM2 (?:0[1-9]|1[0-2]) 54 | MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) 55 | 56 | # Days: Monday, Tue, Thu, etc... 57 | DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) 58 | 59 | # Years? 60 | YEAR (?>\d\d){1,2} 61 | HOUR (?:2[0123]|[01]?[0-9]) 62 | MINUTE (?:[0-5][0-9]) 63 | # '60' is a leap second in most time standards and thus is valid. 64 | SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) 65 | TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) 66 | # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) 67 | DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} 68 | DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} 69 | ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) 70 | ISO8601_SECOND (?:%{SECOND}|60) 71 | TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? 72 | DATE %{DATE_US}|%{DATE_EU} 73 | DATESTAMP %{DATE}[- ]%{TIME} 74 | TZ (?:[APMCE][SD]T|UTC) 75 | DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} 76 | DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} 77 | DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} 78 | DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} 79 | 80 | # Syslog Dates: Month Day HH:MM:SS 81 | SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} 82 | PROG [\x21-\x5a\x5c\x5e-\x7e]+ 83 | SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? 84 | SYSLOGHOST %{IPORHOST} 85 | SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> 86 | HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} 87 | 88 | # Shortcuts 89 | QS %{QUOTEDSTRING} 90 | 91 | # Log formats 92 | SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: 93 | 94 | # Log Levels 95 | LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?) 96 | -------------------------------------------------------------------------------- /patterns/haproxy.pattern: -------------------------------------------------------------------------------- 1 | ## These patterns were tested w/ haproxy-1.4.15 2 | 3 | ## Documentation of the haproxy log formats can be found at the following links: 4 | ## http://code.google.com/p/haproxy-docs/wiki/HTTPLogFormat 5 | ## http://code.google.com/p/haproxy-docs/wiki/TCPLogFormat 6 | 7 | HAPROXYTIME (?!<[0-9])%{HOUR:haproxy_hour}:%{MINUTE:haproxy_minute}(?::%{SECOND:haproxy_second})(?![0-9]) 8 | HAPROXYDATE %{MONTHDAY:haproxy_monthday}/%{MONTH:haproxy_month}/%{YEAR:haproxy_year}:%{HAPROXYTIME:haproxy_time}.%{INT:haproxy_milliseconds} 9 | 10 | # Override these default patterns to parse out what is captured in your haproxy.cfg 11 | HAPROXYCAPTUREDREQUESTHEADERS %{DATA:captured_request_headers} 12 | HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:captured_response_headers} 13 | 14 | # Example: 15 | # These haproxy config lines will add data to the logs that are captured 16 | # by the patterns below. Place them in your custom patterns directory to 17 | # override the defaults. 18 | # 19 | # capture request header Host len 40 20 | # capture request header X-Forwarded-For len 50 21 | # capture request header Accept-Language len 50 22 | # capture request header Referer len 200 23 | # capture request header User-Agent len 200 24 | # 25 | # capture response header Content-Type len 30 26 | # capture response header Content-Encoding len 10 27 | # capture response header Cache-Control len 200 28 | # capture response header Last-Modified len 200 29 | # 30 | # HAPROXYCAPTUREDREQUESTHEADERS %{DATA:request_header_host}\|%{DATA:request_header_x_forwarded_for}\|%{DATA:request_header_accept_language}\|%{DATA:request_header_referer}\|%{DATA:request_header_user_agent} 31 | # HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:response_header_content_type}\|%{DATA:response_header_content_encoding}\|%{DATA:response_header_cache_control}\|%{DATA:response_header_last_modified} 32 | 33 | # parse a haproxy 'httplog' line 34 | HAPROXYHTTPBASE %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_request}/%{INT:time_queue}/%{INT:time_backend_connect}/%{INT:time_backend_response}/%{NOTSPACE:time_duration} %{INT:http_status_code} %{NOTSPACE:bytes_read} %{DATA:captured_request_cookie} %{DATA:captured_response_cookie} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} (\{%{HAPROXYCAPTUREDREQUESTHEADERS}\})?( )?(\{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?( )?"(|(%{WORD:http_verb} (%{URIPROTO:http_proto}://)?(?:%{USER:http_user}(?::[^@]*)?@)?(?:%{URIHOST:http_host})?(?:%{URIPATHPARAM:http_request})?( HTTP/%{NUMBER:http_version})?))?"? 35 | 36 | HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} 37 | 38 | # parse a haproxy 'tcplog' line 39 | HAPROXYTCP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_queue}/%{INT:time_backend_connect}/%{NOTSPACE:time_duration} %{NOTSPACE:bytes_read} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} 40 | -------------------------------------------------------------------------------- /patterns/httpd.pattern: -------------------------------------------------------------------------------- 1 | HTTPDUSER %{EMAILADDRESS}|%{USER} 2 | HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR} 3 | 4 | # Log formats 5 | HTTPD_COMMONLOG %{IPORHOST:clientip} %{HTTPDUSER:ident} %{HTTPDUSER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) 6 | HTTPD_COMBINEDLOG %{HTTPD_COMMONLOG} %{QS:referrer} %{QS:agent} 7 | 8 | # Error logs 9 | HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:message} 10 | HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}(:tid %{NUMBER:tid})?\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_message}:)?( \[client %{IPORHOST:clientip}:%{POSINT:clientport}\])?( %{DATA:errorcode}:)? %{GREEDYDATA:message} 11 | HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} 12 | 13 | # Deprecated 14 | COMMONAPACHELOG %{HTTPD_COMMONLOG} 15 | COMBINEDAPACHELOG %{HTTPD_COMBINEDLOG} 16 | -------------------------------------------------------------------------------- /patterns/java.pattern: -------------------------------------------------------------------------------- 1 | JAVACLASS (?:[a-zA-Z$_][a-zA-Z$_0-9]*\.)*[a-zA-Z$_][a-zA-Z$_0-9]* 2 | #Space is an allowed character to match special cases like 'Native Method' or 'Unknown Source' 3 | JAVAFILE (?:[A-Za-z0-9_. -]+) 4 | #Allow special , methods 5 | JAVAMETHOD (?:(<(?:cl)?init>)|[a-zA-Z$_][a-zA-Z$_0-9]*) 6 | #Line number is optional in special cases 'Native method' or 'Unknown source' 7 | JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:class}\.%{JAVAMETHOD:method}\(%{JAVAFILE:file}(?::%{NUMBER:line})?\) 8 | # Java Logs 9 | JAVATHREAD (?:[A-Z]{2}-Processor[\d]+) 10 | JAVACLASS (?:[a-zA-Z0-9-]+\.)+[A-Za-z0-9$]+ 11 | JAVAFILE (?:[A-Za-z0-9_.-]+) 12 | JAVALOGMESSAGE (.*) 13 | # MMM dd, yyyy HH:mm:ss eg: Jan 9, 2014 7:13:13 AM 14 | CATALINA_DATESTAMP %{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM) 15 | # yyyy-MM-dd HH:mm:ss,SSS ZZZ eg: 2014-01-09 17:32:25,527 -0800 16 | TOMCAT_DATESTAMP 20%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) %{ISO8601_TIMEZONE} 17 | CATALINALOG %{CATALINA_DATESTAMP:timestamp} %{JAVACLASS:class} %{JAVALOGMESSAGE:logmessage} 18 | # 2014-01-09 20:03:28,269 -0800 | ERROR | com.example.service.ExampleService - something compeletely unexpected happened... 19 | TOMCATLOG %{TOMCAT_DATESTAMP:timestamp} \| %{LOGLEVEL:level} \| %{JAVACLASS:class} - %{JAVALOGMESSAGE:logmessage} 20 | -------------------------------------------------------------------------------- /patterns/junos.pattern: -------------------------------------------------------------------------------- 1 | # JUNOS 11.4 RT_FLOW patterns 2 | RT_FLOW_EVENT (RT_FLOW_SESSION_CREATE|RT_FLOW_SESSION_CLOSE|RT_FLOW_SESSION_DENY) 3 | 4 | RT_FLOW1 %{RT_FLOW_EVENT:event}: %{GREEDYDATA:close-reason}: %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} \d+\(%{DATA:sent}\) \d+\(%{DATA:received}\) %{INT:elapsed-time} .* 5 | 6 | RT_FLOW2 %{RT_FLOW_EVENT:event}: session created %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} .* 7 | 8 | RT_FLOW3 %{RT_FLOW_EVENT:event}: session denied %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{INT:protocol-id}\(\d\) %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} .* 9 | 10 | -------------------------------------------------------------------------------- /patterns/linux-syslog.pattern: -------------------------------------------------------------------------------- 1 | SYSLOG5424PRINTASCII [!-~]+ 2 | 3 | SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource}+(?: %{SYSLOGPROG}:|) 4 | SYSLOGPAMSESSION %{SYSLOGBASE} (?=%{GREEDYDATA:message})%{WORD:pam_module}\(%{DATA:pam_caller}\): session %{WORD:pam_session_state} for user %{USERNAME:username}(?: by %{GREEDYDATA:pam_by})? 5 | 6 | CRON_ACTION [A-Z ]+ 7 | CRONLOG %{SYSLOGBASE} \(%{USER:user}\) %{CRON_ACTION:action} \(%{DATA:message}\) 8 | 9 | SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} 10 | 11 | # IETF 5424 syslog(8) format (see http://www.rfc-editor.org/info/rfc5424) 12 | SYSLOG5424PRI <%{NONNEGINT:syslog5424_pri}> 13 | SYSLOG5424SD \[%{DATA}\]+ 14 | SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:syslog5424_ts}|-) +(?:%{IPORHOST:syslog5424_host}|-) +(-|%{SYSLOG5424PRINTASCII:syslog5424_app}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_proc}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_msgid}) +(?:%{SYSLOG5424SD:syslog5424_sd}|-|) 15 | 16 | SYSLOG5424LINE %{SYSLOG5424BASE} +%{GREEDYDATA:syslog5424_msg} 17 | -------------------------------------------------------------------------------- /patterns/maven.pattern: -------------------------------------------------------------------------------- 1 | MAVEN_VERSION (?:(\d+)\.)?(?:(\d+)\.)?(\*|\d+)(?:[.-](RELEASE|SNAPSHOT))? 2 | -------------------------------------------------------------------------------- /patterns/mcollective.pattern: -------------------------------------------------------------------------------- 1 | # Remember, these can be multi-line events. 2 | MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\]%{SPACE}%{LOGLEVEL:event_level} 3 | 4 | MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}: -------------------------------------------------------------------------------- /patterns/mongodb.pattern: -------------------------------------------------------------------------------- 1 | MONGO_LOG %{SYSLOGTIMESTAMP:timestamp} \[%{WORD:component}\] %{GREEDYDATA:message} 2 | MONGO_QUERY \{ (?<={ ).*(?= } ntoreturn:) \} 3 | MONGO_SLOWQUERY %{WORD} %{MONGO_WORDDASH:database}\.%{MONGO_WORDDASH:collection} %{WORD}: %{MONGO_QUERY:query} %{WORD}:%{NONNEGINT:ntoreturn} %{WORD}:%{NONNEGINT:ntoskip} %{WORD}:%{NONNEGINT:nscanned}.*nreturned:%{NONNEGINT:nreturned}..+ (?[0-9]+)ms 4 | MONGO_WORDDASH \b[\w-]+\b 5 | MONGO3_SEVERITY \w 6 | MONGO3_COMPONENT %{WORD}|- 7 | MONGO3_LOG %{TIMESTAMP_ISO8601:timestamp} %{MONGO3_SEVERITY:severity} %{MONGO3_COMPONENT:component}%{SPACE}(?:\[%{DATA:context}\])? %{GREEDYDATA:message} 8 | -------------------------------------------------------------------------------- /patterns/nagios.pattern: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | ################################################################################## 3 | # Chop Nagios log files to smithereens! 4 | # 5 | # A set of GROK filters to process logfiles generated by Nagios. 6 | # While it does not, this set intends to cover all possible Nagios logs. 7 | # 8 | # Some more work needs to be done to cover all External Commands: 9 | # http://old.nagios.org/developerinfo/externalcommands/commandlist.php 10 | # 11 | # If you need some support on these rules please contact: 12 | # Jelle Smet http://smetj.net 13 | # 14 | ################################################################################# 15 | ################################################################################# 16 | 17 | NAGIOSTIME \[%{NUMBER:nagios_epoch}\] 18 | 19 | ############################################### 20 | ######## Begin nagios log types 21 | ############################################### 22 | NAGIOS_TYPE_CURRENT_SERVICE_STATE CURRENT SERVICE STATE 23 | NAGIOS_TYPE_CURRENT_HOST_STATE CURRENT HOST STATE 24 | 25 | NAGIOS_TYPE_SERVICE_NOTIFICATION SERVICE NOTIFICATION 26 | NAGIOS_TYPE_HOST_NOTIFICATION HOST NOTIFICATION 27 | 28 | NAGIOS_TYPE_SERVICE_ALERT SERVICE ALERT 29 | NAGIOS_TYPE_HOST_ALERT HOST ALERT 30 | 31 | NAGIOS_TYPE_SERVICE_FLAPPING_ALERT SERVICE FLAPPING ALERT 32 | NAGIOS_TYPE_HOST_FLAPPING_ALERT HOST FLAPPING ALERT 33 | 34 | NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT SERVICE DOWNTIME ALERT 35 | NAGIOS_TYPE_HOST_DOWNTIME_ALERT HOST DOWNTIME ALERT 36 | 37 | NAGIOS_TYPE_PASSIVE_SERVICE_CHECK PASSIVE SERVICE CHECK 38 | NAGIOS_TYPE_PASSIVE_HOST_CHECK PASSIVE HOST CHECK 39 | 40 | NAGIOS_TYPE_SERVICE_EVENT_HANDLER SERVICE EVENT HANDLER 41 | NAGIOS_TYPE_HOST_EVENT_HANDLER HOST EVENT HANDLER 42 | 43 | NAGIOS_TYPE_EXTERNAL_COMMAND EXTERNAL COMMAND 44 | NAGIOS_TYPE_TIMEPERIOD_TRANSITION TIMEPERIOD TRANSITION 45 | ############################################### 46 | ######## End nagios log types 47 | ############################################### 48 | 49 | ############################################### 50 | ######## Begin external check types 51 | ############################################### 52 | NAGIOS_EC_DISABLE_SVC_CHECK DISABLE_SVC_CHECK 53 | NAGIOS_EC_ENABLE_SVC_CHECK ENABLE_SVC_CHECK 54 | NAGIOS_EC_DISABLE_HOST_CHECK DISABLE_HOST_CHECK 55 | NAGIOS_EC_ENABLE_HOST_CHECK ENABLE_HOST_CHECK 56 | NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT PROCESS_SERVICE_CHECK_RESULT 57 | NAGIOS_EC_PROCESS_HOST_CHECK_RESULT PROCESS_HOST_CHECK_RESULT 58 | NAGIOS_EC_SCHEDULE_SERVICE_DOWNTIME SCHEDULE_SERVICE_DOWNTIME 59 | NAGIOS_EC_SCHEDULE_HOST_DOWNTIME SCHEDULE_HOST_DOWNTIME 60 | NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS DISABLE_HOST_SVC_NOTIFICATIONS 61 | NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS ENABLE_HOST_SVC_NOTIFICATIONS 62 | NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS DISABLE_HOST_NOTIFICATIONS 63 | NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS ENABLE_HOST_NOTIFICATIONS 64 | NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS DISABLE_SVC_NOTIFICATIONS 65 | NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS ENABLE_SVC_NOTIFICATIONS 66 | ############################################### 67 | ######## End external check types 68 | ############################################### 69 | NAGIOS_WARNING Warning:%{SPACE}%{GREEDYDATA:nagios_message} 70 | 71 | NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 72 | NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 73 | 74 | NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 75 | NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 76 | 77 | NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 78 | NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 79 | 80 | NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 81 | NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 82 | 83 | NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 84 | NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 85 | 86 | NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 87 | NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 88 | 89 | NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 90 | NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 91 | 92 | NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:nagios_type}: %{DATA:nagios_service};%{DATA:nagios_unknown1};%{DATA:nagios_unknown2} 93 | 94 | #################### 95 | #### External checks 96 | #################### 97 | 98 | #Disable host & service check 99 | NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 100 | NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 101 | 102 | #Enable host & service check 103 | NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 104 | NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 105 | 106 | #Process host & service check 107 | NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 108 | NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 109 | 110 | #Disable host & service notifications 111 | NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 112 | NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 113 | NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} 114 | 115 | #Enable host & service notifications 116 | NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 117 | NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 118 | NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} 119 | 120 | #Schedule host & service downtime 121 | NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:nagios_command};%{DATA:nagios_hostname};%{NUMBER:nagios_start_time};%{NUMBER:nagios_end_time};%{NUMBER:nagios_fixed};%{NUMBER:nagios_trigger_id};%{NUMBER:nagios_duration};%{DATA:author};%{DATA:comment} 122 | 123 | #End matching line 124 | NAGIOSLOGLINE %{NAGIOSTIME} (?:%{NAGIOS_WARNING}|%{NAGIOS_CURRENT_SERVICE_STATE}|%{NAGIOS_CURRENT_HOST_STATE}|%{NAGIOS_SERVICE_NOTIFICATION}|%{NAGIOS_HOST_NOTIFICATION}|%{NAGIOS_SERVICE_ALERT}|%{NAGIOS_HOST_ALERT}|%{NAGIOS_SERVICE_FLAPPING_ALERT}|%{NAGIOS_HOST_FLAPPING_ALERT}|%{NAGIOS_SERVICE_DOWNTIME_ALERT}|%{NAGIOS_HOST_DOWNTIME_ALERT}|%{NAGIOS_PASSIVE_SERVICE_CHECK}|%{NAGIOS_PASSIVE_HOST_CHECK}|%{NAGIOS_SERVICE_EVENT_HANDLER}|%{NAGIOS_HOST_EVENT_HANDLER}|%{NAGIOS_TIMEPERIOD_TRANSITION}|%{NAGIOS_EC_LINE_DISABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_ENABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_DISABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_ENABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT}|%{NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT}|%{NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME}|%{NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS}) 125 | -------------------------------------------------------------------------------- /patterns/postgresql.pattern: -------------------------------------------------------------------------------- 1 | # Default postgresql pg_log format pattern 2 | POSTGRESQL %{DATESTAMP:timestamp} %{TZ} %{DATA:user_id} %{GREEDYDATA:connection_id} %{POSINT:pid} 3 | 4 | -------------------------------------------------------------------------------- /patterns/rails.pattern: -------------------------------------------------------------------------------- 1 | RUUID \h{32} 2 | # rails controller with action 3 | RCONTROLLER (?[^#]+)#(?\w+) 4 | 5 | # this will often be the only line: 6 | RAILS3HEAD (?m)Started %{WORD:verb} "%{URIPATHPARAM:request}" for %{IPORHOST:clientip} at (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{ISO8601_TIMEZONE}) 7 | # for some a strange reason, params are stripped of {} - not sure that's a good idea. 8 | RPROCESSING \W*Processing by %{RCONTROLLER} as (?\S+)(?:\W*Parameters: {%{DATA:params}}\W*)? 9 | RAILS3FOOT Completed %{NUMBER:response}%{DATA} in %{NUMBER:totalms}ms %{RAILS3PROFILE}%{GREEDYDATA} 10 | RAILS3PROFILE (?:\(Views: %{NUMBER:viewms}ms \| ActiveRecord: %{NUMBER:activerecordms}ms|\(ActiveRecord: %{NUMBER:activerecordms}ms)? 11 | 12 | # putting it all together 13 | RAILS3 %{RAILS3HEAD}(?:%{RPROCESSING})?(?(?:%{DATA}\n)*)(?:%{RAILS3FOOT})? 14 | -------------------------------------------------------------------------------- /patterns/redis.pattern: -------------------------------------------------------------------------------- 1 | REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME} 2 | REDISLOG \[%{POSINT:pid}\] %{REDISTIMESTAMP:timestamp} \* 3 | REDISMONLOG %{NUMBER:timestamp} \[%{INT:database} %{IP:client}:%{NUMBER:port}\] "%{WORD:command}"\s?%{GREEDYDATA:params} 4 | -------------------------------------------------------------------------------- /patterns/ruby.pattern: -------------------------------------------------------------------------------- 1 | RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO) 2 | RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- +%{DATA:progname}: %{GREEDYDATA:message} 3 | -------------------------------------------------------------------------------- /patterns/squid.pattern: -------------------------------------------------------------------------------- 1 | # Pattern squid3 2 | # Documentation of squid3 logs formats can be found at the following link: 3 | # http://wiki.squid-cache.org/Features/LogFormat 4 | SQUID3 %{NUMBER:timestamp}\s+%{NUMBER:duration}\s%{IP:client_address}\s%{WORD:cache_result}/%{POSINT:status_code}\s%{NUMBER:bytes}\s%{WORD:request_method}\s%{NOTSPACE:url}\s(%{NOTSPACE:user}|-)\s%{WORD:hierarchy_code}/%{IPORHOST:server}\s%{NOTSPACE:content_type} 5 | -------------------------------------------------------------------------------- /src/fancy_regex.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use fancy_regex::{Captures, Regex}; 3 | use std::collections::{btree_map, BTreeMap, HashMap}; 4 | 5 | /// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text. 6 | #[derive(Debug)] 7 | pub struct FancyRegexPattern { 8 | regex: Regex, 9 | names: BTreeMap, 10 | } 11 | 12 | impl FancyRegexPattern { 13 | /// Creates a new pattern from a raw regex string and an alias map to identify the 14 | /// fields properly. 15 | pub(crate) fn new(regex: &str, alias: &HashMap) -> Result { 16 | match Regex::new(regex) { 17 | Ok(r) => Ok({ 18 | let mut names = BTreeMap::new(); 19 | for (i, name) in r.capture_names().enumerate() { 20 | if let Some(name) = name { 21 | let name = match alias.iter().find(|&(_k, v)| *v == name) { 22 | Some(item) => item.0.clone(), 23 | None => String::from(name), 24 | }; 25 | names.insert(name, i); 26 | } 27 | } 28 | Self { regex: r, names } 29 | }), 30 | Err(e) => Err(Error::RegexCompilationFailed(format!( 31 | "Regex compilation failed: {e:?}:\n{regex}" 32 | ))), 33 | } 34 | } 35 | 36 | /// Matches this compiled `Pattern` against the text and returns the matches. 37 | pub fn match_against<'a>(&'a self, text: &'a str) -> Option> { 38 | self.regex.captures(text).ok().flatten().and_then(|caps| { 39 | Some(FancyRegexMatches { 40 | captures: caps, 41 | pattern: self, 42 | }) 43 | }) 44 | } 45 | 46 | /// Returns all names this `Pattern` captures. 47 | pub fn capture_names(&self) -> impl Iterator { 48 | self.names.keys().map(|s| s.as_str()) 49 | } 50 | } 51 | 52 | /// The `Matches` represent matched results from a `Pattern` against a provided text. 53 | #[derive(Debug)] 54 | pub struct FancyRegexMatches<'a> { 55 | captures: Captures<'a>, 56 | pattern: &'a FancyRegexPattern, 57 | } 58 | 59 | impl<'a> FancyRegexMatches<'a> { 60 | /// Gets the value for the name (or) alias if found, `None` otherwise. 61 | pub fn get(&self, name_or_alias: &str) -> Option<&str> { 62 | self.pattern 63 | .names 64 | .get(name_or_alias) 65 | .and_then(|&idx| self.captures.get(idx)) 66 | .map(|m| m.as_str()) 67 | } 68 | 69 | /// Returns the number of matches. 70 | pub fn len(&self) -> usize { 71 | self.pattern.names.len() 72 | } 73 | 74 | /// Returns true if there are no matches, false otherwise. 75 | pub fn is_empty(&self) -> bool { 76 | self.len() == 0 77 | } 78 | 79 | /// Returns a tuple of key/value with all the matches found. 80 | /// 81 | /// Note that if no match is found, the value is empty. 82 | pub fn iter(&'a self) -> FancyRegexMatchesIter<'a> { 83 | FancyRegexMatchesIter { 84 | captures: &self.captures, 85 | names: self.pattern.names.iter(), 86 | } 87 | } 88 | } 89 | 90 | impl<'a> IntoIterator for &'a FancyRegexMatches<'a> { 91 | type Item = (&'a str, &'a str); 92 | type IntoIter = FancyRegexMatchesIter<'a>; 93 | 94 | fn into_iter(self) -> Self::IntoIter { 95 | self.iter() 96 | } 97 | } 98 | 99 | /// An `Iterator` over all matches, accessible via `Matches`. 100 | pub struct FancyRegexMatchesIter<'a> { 101 | captures: &'a Captures<'a>, 102 | names: btree_map::Iter<'a, String, usize>, 103 | } 104 | 105 | impl<'a> Iterator for FancyRegexMatchesIter<'a> { 106 | type Item = (&'a str, &'a str); 107 | 108 | fn next(&mut self) -> Option { 109 | for (k, &v) in self.names.by_ref() { 110 | if let Some(m) = self.captures.get(v) { 111 | return Some((k.as_str(), m.as_str())); 112 | } 113 | } 114 | None 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! The `grok` library allows you to quickly parse and match potentially unstructured data 2 | //! into a structed result. It is especially helpful when parsing logfiles of all kinds. This 3 | //! [Rust](http://rust-lang.org) version is mainly a port from the 4 | //! [java version](https://github.com/thekrakken/java-grok) 5 | //! which in drew inspiration from the original 6 | //! [ruby version](https://github.com/logstash-plugins/logstash-filter-grok). 7 | #![doc(html_root_url = "https://docs.rs/grok/2.0.0")] 8 | 9 | include!(concat!(env!("OUT_DIR"), "/default_patterns.rs")); 10 | 11 | use std::collections::{BTreeMap, HashMap}; 12 | use std::error::Error as StdError; 13 | use std::fmt; 14 | 15 | #[cfg(feature = "fancy-regex")] 16 | mod fancy_regex; 17 | #[cfg(feature = "onig")] 18 | mod onig; 19 | #[cfg(feature = "pcre2")] 20 | mod pcre2; 21 | #[cfg(feature = "regex")] 22 | mod regex; 23 | 24 | // Enable features in the following preferred order. If multiple features are 25 | // enabled, the first one in the list is used. 26 | 27 | // 0. pcre2 28 | // 1. fancy-regex 29 | // 3. onig 30 | // 3. regex 31 | 32 | #[cfg(feature = "pcre2")] 33 | pub use pcre2::{ 34 | Pcre2Matches as Matches, Pcre2MatchesIter as MatchesIter, Pcre2Pattern as Pattern, 35 | }; 36 | 37 | #[cfg(all(not(feature = "pcre2"), feature = "fancy-regex"))] 38 | pub use fancy_regex::{ 39 | FancyRegexMatches as Matches, FancyRegexMatchesIter as MatchesIter, 40 | FancyRegexPattern as Pattern, 41 | }; 42 | 43 | #[cfg(all(not(feature = "pcre2"), not(feature = "fancy-regex"), feature = "onig"))] 44 | pub use onig::{OnigMatches as Matches, OnigMatchesIter as MatchesIter, OnigPattern as Pattern}; 45 | 46 | #[cfg(all( 47 | not(feature = "pcre2"), 48 | not(feature = "fancy-regex"), 49 | not(feature = "onig"), 50 | feature = "regex" 51 | ))] 52 | pub use regex::{ 53 | RegexMatches as Matches, RegexMatchesIter as MatchesIter, RegexPattern as Pattern, 54 | }; 55 | 56 | #[cfg(all( 57 | not(feature = "onig"), 58 | not(feature = "fancy-regex"), 59 | not(feature = "regex"), 60 | not(feature = "pcre2") 61 | ))] 62 | compile_error!("No regex engine selected. Please enable one of the following features: fancy-regex, onig, regex"); 63 | 64 | const MAX_RECURSION: usize = 1024; 65 | 66 | const GROK_PATTERN: &str = r"%\{(?(?[A-z0-9]+)(?::(?[A-z0-9_:;\/\s\.]+))?)(?:=(?(?:(?:[^{}]+|\.+)+)+))?\}"; 67 | const NAME_INDEX: usize = 1; 68 | const PATTERN_INDEX: usize = 2; 69 | const ALIAS_INDEX: usize = 3; 70 | const DEFINITION_INDEX: usize = 4; 71 | 72 | /// Returns the default patterns, also used by the default constructor of `Grok`. 73 | pub fn patterns<'a>() -> &'a [(&'a str, &'a str)] { 74 | PATTERNS 75 | } 76 | 77 | /// The `Grok` struct is the main entry point into using this library. 78 | #[derive(Debug)] 79 | pub struct Grok { 80 | patterns: BTreeMap, 81 | } 82 | 83 | impl Grok { 84 | /// Creates a new `Grok` instance with no patterns. 85 | pub fn empty() -> Self { 86 | Grok { 87 | patterns: BTreeMap::new(), 88 | } 89 | } 90 | 91 | /// Creates a new `Grok` instance and loads all the default patterns. 92 | pub fn with_default_patterns() -> Self { 93 | let mut grok = Grok::empty(); 94 | for &(key, value) in PATTERNS { 95 | grok.add_pattern(String::from(key), String::from(value)); 96 | } 97 | grok 98 | } 99 | 100 | /// Adds a custom pattern. 101 | pub fn add_pattern>(&mut self, name: S, pattern: S) { 102 | self.patterns.insert(name.into(), pattern.into()); 103 | } 104 | 105 | /// Compiles the given pattern, making it ready for matching. 106 | pub fn compile(&mut self, pattern: &str, with_alias_only: bool) -> Result { 107 | let mut named_regex = String::from(pattern); 108 | let mut alias: HashMap = HashMap::new(); 109 | 110 | let mut index = 0; 111 | let mut iteration_left = MAX_RECURSION; 112 | let mut continue_iteration = true; 113 | 114 | let grok_regex = match ::onig::Regex::new(GROK_PATTERN) { 115 | Ok(r) => r, 116 | Err(_) => return Err(Error::RegexCompilationFailed(GROK_PATTERN.into())), 117 | }; 118 | 119 | while continue_iteration { 120 | continue_iteration = false; 121 | if iteration_left == 0 { 122 | return Err(Error::RecursionTooDeep); 123 | } 124 | iteration_left -= 1; 125 | 126 | if let Some(m) = grok_regex.captures(&named_regex.clone()) { 127 | continue_iteration = true; 128 | let raw_pattern = match m.at(PATTERN_INDEX) { 129 | Some(p) => p, 130 | None => { 131 | return Err(Error::GenericCompilationFailure( 132 | "Could not find pattern in matches".into(), 133 | )) 134 | } 135 | }; 136 | 137 | let mut name = match m.at(NAME_INDEX) { 138 | Some(n) => String::from(n), 139 | None => { 140 | return Err(Error::GenericCompilationFailure( 141 | "Could not find name in matches".into(), 142 | )) 143 | } 144 | }; 145 | 146 | if let Some(definition) = m.at(DEFINITION_INDEX) { 147 | self.add_pattern(raw_pattern, definition); 148 | name = format!("{}={}", name, definition); 149 | } 150 | 151 | // Since a pattern with a given name can show up more than once, we need to 152 | // loop through the number of matches found and apply the transformations 153 | // on each of them. 154 | for _ in 0..named_regex.matches(&format!("%{{{}}}", name)).count() { 155 | // Check if we have a definition for the raw pattern key and fail quickly 156 | // if not. 157 | let pattern_definition = match self.patterns.get(raw_pattern) { 158 | Some(d) => d, 159 | None => return Err(Error::DefinitionNotFound(raw_pattern.into())), 160 | }; 161 | 162 | // If no alias is specified and all but with alias are ignored, 163 | // the replacement tells the regex engine to ignore the matches. 164 | // Otherwise, the definition is turned into a regex that the 165 | // engine understands and uses a named group. 166 | 167 | let replacement = if with_alias_only && m.at(ALIAS_INDEX).is_none() { 168 | format!("(?:{})", pattern_definition) 169 | } else { 170 | // If an alias is specified by the user use that one to 171 | // match the name conversion, otherwise just use 172 | // the name of the pattern definition directly. 173 | alias.insert( 174 | match m.at(ALIAS_INDEX) { 175 | Some(a) => a.into(), 176 | None => name.clone(), 177 | }, 178 | format!("name{}", index), 179 | ); 180 | 181 | format!("(?{})", index, pattern_definition) 182 | }; 183 | 184 | // Finally, look for the original %{...} style pattern and 185 | // replace it with our replacement (only the first occurrence 186 | // since we are iterating one by one). 187 | named_regex = named_regex.replacen(&format!("%{{{}}}", name), &replacement, 1); 188 | 189 | index += 1; 190 | } 191 | } 192 | } 193 | 194 | if named_regex.is_empty() { 195 | Err(Error::CompiledPatternIsEmpty(pattern.into())) 196 | } else { 197 | Pattern::new(&named_regex, &alias) 198 | } 199 | } 200 | } 201 | 202 | /// The Default implementation for Grok whuich will load the default patterns. 203 | impl Default for Grok { 204 | fn default() -> Grok { 205 | Grok::with_default_patterns() 206 | } 207 | } 208 | 209 | /// Allows to initialize Grok with an iterator of patterns. 210 | /// 211 | /// Example: 212 | /// ```rs 213 | /// let patterns = [("USERNAME", r"[a-zA-Z0-9._-]+")]; 214 | /// let mut grok = Grok::from_iter(patterns.into_iter()); 215 | /// ``` 216 | impl> FromIterator<(S, S)> for Grok { 217 | fn from_iter>(iter: I) -> Self { 218 | let mut grok = Grok::empty(); 219 | for (k, v) in iter { 220 | grok.add_pattern(k, v); 221 | } 222 | grok 223 | } 224 | } 225 | 226 | /// Allows to construct Grok with an array of patterns directly. 227 | /// 228 | /// Example: 229 | /// ```rs 230 | /// let mut grok = Grok::from([("USERNAME", r"[a-zA-Z0-9._-]+")]); 231 | /// ``` 232 | impl, const N: usize> From<[(S, S); N]> for Grok { 233 | fn from(arr: [(S, S); N]) -> Self { 234 | Self::from_iter(arr) 235 | } 236 | } 237 | 238 | /// Errors that can occur when using this library. 239 | #[derive(Clone, Debug, Eq, PartialEq)] 240 | #[non_exhaustive] 241 | pub enum Error { 242 | /// The recursion while compiling has exhausted the limit. 243 | RecursionTooDeep, 244 | /// After compiling, the resulting compiled regex pattern is empty. 245 | CompiledPatternIsEmpty(String), 246 | /// A corresponding pattern definition could not be found for the given name. 247 | DefinitionNotFound(String), 248 | /// If the compilation for a specific regex in the underlying engine failed. 249 | RegexCompilationFailed(String), 250 | /// Something is messed up during the compilation phase. 251 | GenericCompilationFailure(String), 252 | } 253 | 254 | impl StdError for Error { 255 | fn description(&self) -> &str { 256 | match *self { 257 | Error::RecursionTooDeep => "compilation recursion reached the limit", 258 | Error::CompiledPatternIsEmpty(_) => "compiled pattern is empty", 259 | Error::DefinitionNotFound(_) => "pattern definition not found while compiling", 260 | Error::RegexCompilationFailed(_) => "regex compilation in the engine failed", 261 | Error::GenericCompilationFailure(_) => { 262 | "something happened during the compilation phase" 263 | } 264 | } 265 | } 266 | 267 | fn cause(&self) -> Option<&dyn StdError> { 268 | None 269 | } 270 | } 271 | 272 | impl fmt::Display for Error { 273 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 274 | match *self { 275 | Error::RecursionTooDeep => write!( 276 | f, 277 | "Recursion while compiling reached the limit of {}", 278 | MAX_RECURSION 279 | ), 280 | Error::CompiledPatternIsEmpty(ref p) => write!( 281 | f, 282 | "The given pattern \"{}\" ended up compiling into an empty regex", 283 | p 284 | ), 285 | Error::DefinitionNotFound(ref d) => write!( 286 | f, 287 | "The given pattern definition name \"{}\" could not be found in the definition map", 288 | d 289 | ), 290 | Error::RegexCompilationFailed(ref r) => write!( 291 | f, 292 | "The given regex \"{}\" failed compilation in the underlying engine", 293 | r 294 | ), 295 | Error::GenericCompilationFailure(ref d) => write!( 296 | f, 297 | "Something unexpected happened during the compilation phase: \"{}\"", 298 | d 299 | ), 300 | } 301 | } 302 | } 303 | 304 | #[cfg(test)] 305 | mod tests { 306 | 307 | use super::*; 308 | 309 | #[test] 310 | fn test_simple_anonymous_pattern() { 311 | let mut grok = Grok::empty(); 312 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 313 | let pattern = grok 314 | .compile("%{USERNAME}", false) 315 | .expect("Error while compiling!"); 316 | 317 | let matches = pattern.match_against("root").expect("No matches found!"); 318 | assert_eq!("root", matches.get("USERNAME").unwrap()); 319 | assert_eq!(1, matches.len()); 320 | let matches = pattern 321 | .match_against("john doe") 322 | .expect("No matches found!"); 323 | assert_eq!("john", matches.get("USERNAME").unwrap()); 324 | assert_eq!(1, matches.len()); 325 | } 326 | 327 | #[test] 328 | fn test_from_iter() { 329 | let patterns = [("USERNAME", r"[a-zA-Z0-9._-]+")]; 330 | let mut grok = Grok::from_iter(patterns.into_iter()); 331 | let pattern = grok 332 | .compile("%{USERNAME}", false) 333 | .expect("Error while compiling!"); 334 | 335 | let matches = pattern.match_against("root").expect("No matches found!"); 336 | assert_eq!("root", matches.get("USERNAME").unwrap()); 337 | assert_eq!(1, matches.len()); 338 | let matches = pattern 339 | .match_against("john doe") 340 | .expect("No matches found!"); 341 | assert_eq!("john", matches.get("USERNAME").unwrap()); 342 | assert_eq!(1, matches.len()); 343 | } 344 | 345 | #[test] 346 | fn test_from() { 347 | let mut grok = Grok::from([("USERNAME", r"[a-zA-Z0-9._-]+")]); 348 | let pattern = grok 349 | .compile("%{USERNAME}", false) 350 | .expect("Error while compiling!"); 351 | 352 | let matches = pattern.match_against("root").expect("No matches found!"); 353 | assert_eq!("root", matches.get("USERNAME").unwrap()); 354 | assert_eq!(1, matches.len()); 355 | let matches = pattern 356 | .match_against("john doe") 357 | .expect("No matches found!"); 358 | assert_eq!("john", matches.get("USERNAME").unwrap()); 359 | assert_eq!(1, matches.len()); 360 | } 361 | 362 | #[test] 363 | fn test_simple_named_pattern() { 364 | let mut grok = Grok::empty(); 365 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 366 | let pattern = grok 367 | .compile("%{USERNAME:usr}", false) 368 | .expect("Error while compiling!"); 369 | 370 | let matches = pattern.match_against("root").expect("No matches found!"); 371 | assert_eq!("root", matches.get("usr").unwrap()); 372 | assert_eq!(1, matches.len()); 373 | let matches = pattern 374 | .match_against("john doe") 375 | .expect("No matches found!"); 376 | assert_eq!("john", matches.get("usr").unwrap()); 377 | assert_eq!(1, matches.len()); 378 | } 379 | 380 | #[test] 381 | fn test_alias_anonymous_pattern() { 382 | let mut grok = Grok::empty(); 383 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 384 | grok.add_pattern("USER", r"%{USERNAME}"); 385 | let pattern = grok 386 | .compile("%{USER}", false) 387 | .expect("Error while compiling!"); 388 | 389 | let matches = pattern.match_against("root").expect("No matches found!"); 390 | assert_eq!("root", matches.get("USER").unwrap()); 391 | let matches = pattern 392 | .match_against("john doe") 393 | .expect("No matches found!"); 394 | assert_eq!("john", matches.get("USER").unwrap()); 395 | } 396 | 397 | #[test] 398 | fn test_ailas_named_pattern() { 399 | let mut grok = Grok::empty(); 400 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 401 | grok.add_pattern("USER", r"%{USERNAME}"); 402 | let pattern = grok 403 | .compile("%{USER:usr}", false) 404 | .expect("Error while compiling!"); 405 | 406 | let matches = pattern.match_against("root").expect("No matches found!"); 407 | assert_eq!("root", matches.get("usr").unwrap()); 408 | let matches = pattern 409 | .match_against("john doe") 410 | .expect("No matches found!"); 411 | assert_eq!("john", matches.get("usr").unwrap()); 412 | } 413 | 414 | #[test] 415 | fn test_composite_or_pattern() { 416 | let mut grok = Grok::empty(); 417 | grok.add_pattern("MAC", r"(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})"); 418 | grok.add_pattern("CISCOMAC", r"(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})"); 419 | grok.add_pattern("WINDOWSMAC", r"(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})"); 420 | grok.add_pattern("COMMONMAC", r"(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})"); 421 | let pattern = grok 422 | .compile("%{MAC}", false) 423 | .expect("Error while compiling!"); 424 | 425 | let matches = pattern 426 | .match_against("5E:FF:56:A2:AF:15") 427 | .expect("No matches found!"); 428 | assert_eq!("5E:FF:56:A2:AF:15", matches.get("MAC").unwrap()); 429 | assert_eq!(4, matches.len()); 430 | let matches = pattern 431 | .match_against("hello! 5E:FF:56:A2:AF:15 what?") 432 | .expect("No matches found!"); 433 | assert_eq!("5E:FF:56:A2:AF:15", matches.get("MAC").unwrap()); 434 | assert_eq!(true, pattern.match_against("5E:FF").is_none()); 435 | } 436 | 437 | #[test] 438 | fn test_multiple_patterns() { 439 | let mut grok = Grok::empty(); 440 | grok.add_pattern("YEAR", r"(\d\d){1,2}"); 441 | grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b"); 442 | grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)"); 443 | let pattern = grok 444 | .compile("%{DAY} %{MONTH} %{YEAR}", false) 445 | .expect("Error while compiling!"); 446 | assert_eq!( 447 | pattern.capture_names().collect::>(), 448 | vec!["DAY", "MONTH", "YEAR"] 449 | ); 450 | 451 | let matches = pattern 452 | .match_against("Monday March 2012") 453 | .expect("No matches found!"); 454 | assert_eq!(matches.len(), 3); 455 | assert_eq!("Monday", matches.get("DAY").unwrap()); 456 | assert_eq!("March", matches.get("MONTH").unwrap()); 457 | assert_eq!("2012", matches.get("YEAR").unwrap()); 458 | assert_eq!(None, matches.get("unknown")); 459 | } 460 | 461 | #[test] 462 | fn test_with_alias_only() { 463 | let mut grok = Grok::empty(); 464 | grok.add_pattern("MAC", r"(?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})"); 465 | grok.add_pattern("CISCOMAC", r"(?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})"); 466 | grok.add_pattern("WINDOWSMAC", r"(?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})"); 467 | grok.add_pattern("COMMONMAC", r"(?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})"); 468 | let pattern = grok 469 | .compile("%{MAC:macaddr}", true) 470 | .expect("Error while compiling!"); 471 | 472 | let matches = pattern 473 | .match_against("5E:FF:56:A2:AF:15") 474 | .expect("No matches found!"); 475 | assert_eq!("5E:FF:56:A2:AF:15", matches.get("macaddr").unwrap()); 476 | assert_eq!(1, matches.len()); 477 | let matches = pattern 478 | .match_against("hello! 5E:FF:56:A2:AF:15 what?") 479 | .expect("No matches found!"); 480 | assert_eq!("5E:FF:56:A2:AF:15", matches.get("macaddr").unwrap()); 481 | assert_eq!(true, pattern.match_against("5E:FF").is_none()); 482 | } 483 | 484 | #[test] 485 | fn test_match_iterator() { 486 | let mut grok = Grok::empty(); 487 | grok.add_pattern("YEAR", r"(\d\d){1,2}"); 488 | grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b"); 489 | grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)"); 490 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 491 | grok.add_pattern("SPACE", r"\s*"); 492 | 493 | let pattern = grok 494 | .compile( 495 | "%{DAY:day} %{MONTH:month} %{YEAR:year}%{SPACE}%{USERNAME:user}?", 496 | true, 497 | ) 498 | .expect("Error while compiling!"); 499 | let matches = pattern 500 | .match_against("Monday March 2012") 501 | .expect("No matches found!"); 502 | assert_eq!(matches.len(), 4); 503 | let mut found = 0; 504 | for (k, v) in matches.iter() { 505 | match k { 506 | "day" => assert_eq!("Monday", v), 507 | "month" => assert_eq!("March", v), 508 | "year" => assert_eq!("2012", v), 509 | e => panic!("{:?}", e), 510 | } 511 | found += 1; 512 | } 513 | assert_eq!(3, found); 514 | } 515 | 516 | #[test] 517 | fn test_matches_into_iter() { 518 | let mut grok = Grok::empty(); 519 | grok.add_pattern("YEAR", r"(\d\d){1,2}"); 520 | grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b"); 521 | grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)"); 522 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 523 | grok.add_pattern("SPACE", r"\s*"); 524 | 525 | let pattern = grok 526 | .compile( 527 | "%{DAY:day} %{MONTH:month} %{YEAR:year}%{SPACE}%{USERNAME:user}?", 528 | true, 529 | ) 530 | .expect("Error while compiling!"); 531 | let matches = pattern 532 | .match_against("Monday March 2012") 533 | .expect("No matches found!"); 534 | assert_eq!(matches.len(), 4); 535 | let mut found = 0; 536 | for (k, v) in &matches { 537 | match k { 538 | "day" => assert_eq!("Monday", v), 539 | "month" => assert_eq!("March", v), 540 | "year" => assert_eq!("2012", v), 541 | e => panic!("{:?}", e), 542 | } 543 | found += 1; 544 | } 545 | assert_eq!(3, found); 546 | } 547 | 548 | #[test] 549 | fn test_loaded_default_patterns() { 550 | let mut grok = Grok::with_default_patterns(); 551 | let pattern = grok 552 | .compile("%{DAY} %{MONTH} %{YEAR}", false) 553 | .expect("Error while compiling!"); 554 | 555 | let matches = pattern 556 | .match_against("Monday March 2012") 557 | .expect("No matches found!"); 558 | assert_eq!("Monday", matches.get("DAY").unwrap()); 559 | assert_eq!("March", matches.get("MONTH").unwrap()); 560 | assert_eq!("2012", matches.get("YEAR").unwrap()); 561 | assert_eq!(None, matches.get("unknown")); 562 | } 563 | 564 | #[test] 565 | fn test_compilation_of_all_default_patterns() { 566 | let mut grok = Grok::default(); 567 | let mut num_checked = 0; 568 | let mut errors = vec![]; 569 | for &(key, _) in PATTERNS { 570 | let pattern = format!("%{{{}}}", key); 571 | match grok.compile(&pattern, false) { 572 | Ok(_) => (), 573 | Err(e) => errors.push((key, e)), 574 | } 575 | num_checked += 1; 576 | } 577 | assert!(num_checked > 0); 578 | if !errors.is_empty() { 579 | for (key, e) in errors { 580 | eprintln!("Pattern {} failed to compile: {}", key, e); 581 | } 582 | panic!("Not all patterns compiled successfully"); 583 | } 584 | } 585 | 586 | #[test] 587 | fn test_adhoc_pattern() { 588 | let mut grok = Grok::default(); 589 | let pattern = grok 590 | .compile(r"\[(?[^\]]+)\]", false) 591 | .expect("Error while compiling!"); 592 | 593 | let matches = pattern 594 | .match_against("[thread1]") 595 | .expect("No matches found!"); 596 | assert_eq!("thread1", matches.get("threadname").unwrap()); 597 | assert_eq!(matches.len(), 1); 598 | } 599 | 600 | #[test] 601 | fn test_adhoc_pattern_in_iter() { 602 | let mut grok = Grok::default(); 603 | let pattern = grok 604 | .compile(r"\[(?[^\]]+)\]", false) 605 | .expect("Error while compiling!"); 606 | 607 | let matches = pattern 608 | .match_against("[thread1]") 609 | .expect("No matches found!"); 610 | let mut found = 0; 611 | assert_eq!(matches.len(), 1); 612 | for (k, v) in matches.iter() { 613 | assert_eq!("threadname", k); 614 | assert_eq!("thread1", v); 615 | found += 1; 616 | } 617 | assert_eq!(1, found); 618 | } 619 | 620 | #[test] 621 | fn test_capture_names() { 622 | let mut grok = Grok::empty(); 623 | grok.add_pattern("YEAR", r"(\d\d){1,2}"); 624 | grok.add_pattern("MONTH", r"\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b"); 625 | grok.add_pattern("DAY", r"(?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?)"); 626 | grok.add_pattern("USERNAME", r"[a-zA-Z0-9._-]+"); 627 | grok.add_pattern("SPACE", r"\s*"); 628 | 629 | let pattern = grok 630 | .compile("%{YEAR}%{SPACE}%{USERNAME:user}?", false) 631 | .expect("Error while compiling!"); 632 | 633 | let expected = vec!["SPACE", "YEAR", "user"]; 634 | let actual = pattern.capture_names().collect::>(); 635 | assert_eq!(expected, actual); 636 | } 637 | 638 | #[test] 639 | fn test_capture_error() { 640 | let mut grok = Grok::with_default_patterns(); 641 | let pattern = grok 642 | .compile("Path: %{PATH}$", false) 643 | .expect("Error while compiling!"); 644 | let matches = pattern 645 | .match_against("Path: /AAAAA/BBBBB/CCCCC/DDDDDDDDDDDDDD EEEEEEEEEEEEEEEEEEEEEEEE/"); 646 | 647 | assert!(matches.is_none()); 648 | } 649 | } 650 | -------------------------------------------------------------------------------- /src/onig.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use onig::{MatchParam, Regex, Region, SearchOptions}; 3 | use std::collections::{btree_map, BTreeMap, HashMap}; 4 | 5 | /// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text. 6 | #[derive(Debug)] 7 | pub struct OnigPattern { 8 | regex: Regex, 9 | pub(crate) names: BTreeMap, 10 | } 11 | 12 | impl OnigPattern { 13 | /// Creates a new pattern from a raw regex string and an alias map to identify the 14 | /// fields properly. 15 | pub(crate) fn new(regex: &str, alias: &HashMap) -> Result { 16 | match Regex::new(regex) { 17 | Ok(r) => Ok({ 18 | let mut names = BTreeMap::new(); 19 | r.foreach_name(|cap_name, cap_idx| { 20 | let name = match alias.iter().find(|&(_k, v)| *v == cap_name) { 21 | Some(item) => item.0.clone(), 22 | None => String::from(cap_name), 23 | }; 24 | names.insert(name, cap_idx[0]); 25 | true 26 | }); 27 | Self { regex: r, names } 28 | }), 29 | Err(e) => Err(Error::RegexCompilationFailed(format!( 30 | "Regex compilation failed: {e:?}:\n{regex}" 31 | ))), 32 | } 33 | } 34 | 35 | /// Matches this compiled `Pattern` against the text and returns the matches. 36 | pub fn match_against<'a>(&'a self, text: &'a str) -> Option> { 37 | // Inlined version of the onig methods that cause an internal panic 38 | let this = &self.regex; 39 | let mut region = Region::new(); 40 | let to = text.len(); 41 | let options = SearchOptions::SEARCH_OPTION_NONE; 42 | let match_param = MatchParam::default(); 43 | let result = this.search_with_param(text, 0, to, options, Some(&mut region), match_param); 44 | 45 | match result { 46 | Ok(r) => r, 47 | Err(_) => None, 48 | } 49 | .map(|_| OnigMatches { 50 | text, 51 | region, 52 | pattern: self, 53 | }) 54 | } 55 | 56 | /// Returns all names this `Pattern` captures. 57 | pub fn capture_names(&self) -> impl Iterator { 58 | self.names.keys().map(|s| s.as_str()) 59 | } 60 | } 61 | 62 | /// The `Matches` represent matched results from a `Pattern` against a provided text. 63 | #[derive(Debug)] 64 | pub struct OnigMatches<'a> { 65 | text: &'a str, 66 | region: Region, 67 | pattern: &'a crate::onig::OnigPattern, 68 | } 69 | 70 | impl<'a> OnigMatches<'a> { 71 | /// Gets the value for the name (or) alias if found, `None` otherwise. 72 | pub fn get(&self, name_or_alias: &str) -> Option<&str> { 73 | match self.pattern.names.get(name_or_alias) { 74 | Some(found) => self 75 | .region 76 | .pos(*found as usize) 77 | .and_then(|(start, end)| Some(&self.text[start..end])), 78 | None => None, 79 | } 80 | } 81 | 82 | /// Returns the number of matches. 83 | pub fn len(&self) -> usize { 84 | debug_assert_eq!(self.region.len() - 1, self.pattern.names.len()); 85 | self.pattern.names.len() 86 | } 87 | 88 | /// Returns true if there are no matches, false otherwise. 89 | pub fn is_empty(&self) -> bool { 90 | self.len() == 0 91 | } 92 | 93 | /// Returns a tuple of key/value with all the matches found. 94 | /// 95 | /// Note that if no match is found, the value is empty. 96 | pub fn iter(&'a self) -> OnigMatchesIter<'a> { 97 | OnigMatchesIter { 98 | text: &self.text, 99 | region: &self.region, 100 | names: self.pattern.names.iter(), 101 | } 102 | } 103 | } 104 | 105 | impl<'a> IntoIterator for &'a OnigMatches<'a> { 106 | type Item = (&'a str, &'a str); 107 | type IntoIter = OnigMatchesIter<'a>; 108 | 109 | fn into_iter(self) -> Self::IntoIter { 110 | self.iter() 111 | } 112 | } 113 | 114 | /// An `Iterator` over all matches, accessible via `Matches`. 115 | pub struct OnigMatchesIter<'a> { 116 | text: &'a str, 117 | region: &'a Region, 118 | names: btree_map::Iter<'a, String, u32>, 119 | } 120 | 121 | impl<'a> Iterator for OnigMatchesIter<'a> { 122 | type Item = (&'a str, &'a str); 123 | 124 | fn next(&mut self) -> Option { 125 | for (k, v) in self.names.by_ref() { 126 | match self.region.pos(*v as usize) { 127 | Some((start, end)) => return Some((k.as_str(), &self.text[start..end])), 128 | None => { 129 | continue; 130 | } 131 | } 132 | } 133 | None 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/pcre2.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use pcre2::bytes::{Captures, Regex, RegexBuilder}; 3 | use std::collections::{btree_map, BTreeMap, HashMap}; 4 | 5 | /// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text. 6 | #[derive(Debug)] 7 | pub struct Pcre2Pattern { 8 | regex: Regex, 9 | names: BTreeMap, 10 | } 11 | 12 | impl Pcre2Pattern { 13 | /// Creates a new pattern from a raw regex string and an alias map to identify the 14 | /// fields properly. 15 | pub(crate) fn new(regex: &str, alias: &HashMap) -> Result { 16 | let mut builder = RegexBuilder::new(); 17 | builder.jit_if_available(true); 18 | builder.utf(true); 19 | match builder.build(regex) { 20 | Ok(r) => Ok({ 21 | let mut names = BTreeMap::new(); 22 | for (i, name) in r.capture_names().iter().enumerate() { 23 | if let Some(name) = name { 24 | let name = match alias.iter().find(|&(_k, v)| v == name) { 25 | Some(item) => item.0.clone(), 26 | None => String::from(name), 27 | }; 28 | names.insert(name, i); 29 | } 30 | } 31 | Self { regex: r, names } 32 | }), 33 | Err(e) => Err(Error::RegexCompilationFailed(format!( 34 | "Regex compilation failed: {e:?}:\n{regex}" 35 | ))), 36 | } 37 | } 38 | 39 | /// Matches this compiled `Pattern` against the text and returns the matches. 40 | pub fn match_against<'a>(&'a self, text: &'a str) -> Option> { 41 | self.regex 42 | .captures(text.as_bytes()) 43 | .ok() 44 | .flatten() 45 | .map(|caps| Pcre2Matches { 46 | captures: caps, 47 | pattern: self, 48 | }) 49 | } 50 | 51 | /// Returns all names this `Pattern` captures. 52 | pub fn capture_names(&self) -> impl Iterator { 53 | self.names.keys().map(|s| s.as_str()) 54 | } 55 | } 56 | 57 | /// The `Matches` represent matched results from a `Pattern` against a provided text. 58 | #[derive(Debug)] 59 | pub struct Pcre2Matches<'a> { 60 | captures: Captures<'a>, 61 | pattern: &'a Pcre2Pattern, 62 | } 63 | 64 | impl<'a> Pcre2Matches<'a> { 65 | /// Gets the value for the name (or) alias if found, `None` otherwise. 66 | pub fn get(&self, name_or_alias: &str) -> Option<&str> { 67 | self.pattern 68 | .names 69 | .get(name_or_alias) 70 | .and_then(|&idx| self.captures.get(idx)) 71 | .map(|m| std::str::from_utf8(m.as_bytes()).unwrap()) 72 | } 73 | 74 | /// Returns the number of matches. 75 | pub fn len(&self) -> usize { 76 | self.pattern.names.len() 77 | } 78 | 79 | /// Returns true if there are no matches, false otherwise. 80 | pub fn is_empty(&self) -> bool { 81 | self.len() == 0 82 | } 83 | 84 | /// Returns a tuple of key/value with all the matches found. 85 | /// 86 | /// Note that if no match is found, the value is empty. 87 | pub fn iter(&'a self) -> Pcre2MatchesIter<'a> { 88 | Pcre2MatchesIter { 89 | captures: &self.captures, 90 | names: self.pattern.names.iter(), 91 | } 92 | } 93 | } 94 | 95 | impl<'a> IntoIterator for &'a Pcre2Matches<'a> { 96 | type Item = (&'a str, &'a str); 97 | type IntoIter = Pcre2MatchesIter<'a>; 98 | 99 | fn into_iter(self) -> Self::IntoIter { 100 | self.iter() 101 | } 102 | } 103 | 104 | /// An `Iterator` over all matches, accessible via `Matches`. 105 | pub struct Pcre2MatchesIter<'a> { 106 | captures: &'a Captures<'a>, 107 | names: btree_map::Iter<'a, String, usize>, 108 | } 109 | 110 | impl<'a> Iterator for Pcre2MatchesIter<'a> { 111 | type Item = (&'a str, &'a str); 112 | 113 | fn next(&mut self) -> Option { 114 | for (k, &v) in self.names.by_ref() { 115 | if let Some(m) = self.captures.get(v) { 116 | return Some((k.as_str(), std::str::from_utf8(m.as_bytes()).unwrap())); 117 | } 118 | } 119 | None 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/regex.rs: -------------------------------------------------------------------------------- 1 | use crate::Error; 2 | use regex::{Captures, Regex}; 3 | use std::collections::{btree_map, BTreeMap, HashMap}; 4 | 5 | /// The `Pattern` represents a compiled regex, ready to be matched against arbitrary text. 6 | #[derive(Debug)] 7 | pub struct RegexPattern { 8 | regex: Regex, 9 | pub(crate) names: BTreeMap, 10 | } 11 | 12 | impl RegexPattern { 13 | /// Creates a new pattern from a raw regex string and an alias map to identify the 14 | /// fields properly. 15 | pub(crate) fn new(regex: &str, alias: &HashMap) -> Result { 16 | match Regex::new(regex) { 17 | Ok(r) => Ok({ 18 | let mut names = BTreeMap::new(); 19 | for (i, name) in r.capture_names().enumerate() { 20 | if let Some(name) = name { 21 | let name = match alias.iter().find(|&(_k, v)| *v == name) { 22 | Some(item) => item.0.clone(), 23 | None => String::from(name), 24 | }; 25 | names.insert(name, i); 26 | } 27 | } 28 | Self { regex: r, names } 29 | }), 30 | Err(e) => Err(Error::RegexCompilationFailed(format!( 31 | "Regex compilation failed: {e:?}:\n{regex}" 32 | ))), 33 | } 34 | } 35 | 36 | /// Matches this compiled `Pattern` against the text and returns the matches. 37 | pub fn match_against<'a>(&'a self, text: &'a str) -> Option> { 38 | self.regex.captures(text).map(|caps| RegexMatches { 39 | captures: caps, 40 | pattern: self, 41 | }) 42 | } 43 | 44 | /// Returns all names this `Pattern` captures. 45 | pub fn capture_names(&self) -> impl Iterator { 46 | self.names.keys().map(|s| s.as_str()) 47 | } 48 | } 49 | 50 | /// The `Matches` represent matched results from a `Pattern` against a provided text. 51 | #[derive(Debug)] 52 | pub struct RegexMatches<'a> { 53 | captures: Captures<'a>, 54 | pattern: &'a RegexPattern, 55 | } 56 | 57 | impl<'a> RegexMatches<'a> { 58 | /// Gets the value for the name (or) alias if found, `None` otherwise. 59 | pub fn get(&self, name_or_alias: &str) -> Option<&str> { 60 | self.pattern 61 | .names 62 | .get(name_or_alias) 63 | .and_then(|&idx| self.captures.get(idx)) 64 | .map(|m| m.as_str()) 65 | } 66 | 67 | /// Returns the number of matches. 68 | pub fn len(&self) -> usize { 69 | self.pattern.names.len() 70 | } 71 | 72 | /// Returns true if there are no matches, false otherwise. 73 | pub fn is_empty(&self) -> bool { 74 | self.len() == 0 75 | } 76 | 77 | /// Returns a tuple of key/value with all the matches found. 78 | /// 79 | /// Note that if no match is found, the value is empty. 80 | pub fn iter(&'a self) -> RegexMatchesIter<'a> { 81 | RegexMatchesIter { 82 | captures: &self.captures, 83 | names: self.pattern.names.iter(), 84 | } 85 | } 86 | } 87 | 88 | impl<'a> IntoIterator for &'a RegexMatches<'a> { 89 | type Item = (&'a str, &'a str); 90 | type IntoIter = RegexMatchesIter<'a>; 91 | 92 | fn into_iter(self) -> Self::IntoIter { 93 | self.iter() 94 | } 95 | } 96 | 97 | /// An `Iterator` over all matches, accessible via `Matches`. 98 | pub struct RegexMatchesIter<'a> { 99 | captures: &'a Captures<'a>, 100 | names: btree_map::Iter<'a, String, usize>, 101 | } 102 | 103 | impl<'a> Iterator for RegexMatchesIter<'a> { 104 | type Item = (&'a str, &'a str); 105 | 106 | fn next(&mut self) -> Option { 107 | for (k, &v) in self.names.by_ref() { 108 | if let Some(m) = self.captures.get(v) { 109 | return Some((k.as_str(), m.as_str())); 110 | } 111 | } 112 | None 113 | } 114 | } 115 | --------------------------------------------------------------------------------