├── .gitignore ├── Rakefile ├── NOTICE.TXT ├── .github ├── PULL_REQUEST_TEMPLATE.md ├── ISSUE_TEMPLATE.md └── CONTRIBUTING.md ├── .travis.yml ├── .ci └── performance │ ├── run.sh │ ├── docker-setup.sh │ └── docker-run.sh ├── Gemfile ├── spec ├── spec_helper.rb └── filters │ ├── grok_performance_spec.rb │ └── grok_spec.rb ├── CONTRIBUTORS ├── logstash-filter-grok.gemspec ├── README.md ├── CHANGELOG.md ├── LICENSE ├── docs └── index.asciidoc └── lib └── logstash └── filters └── grok.rb /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | Gemfile.lock 3 | .bundle 4 | vendor 5 | *.swp 6 | .idea 7 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "logstash/devutils/rake" 2 | require "logstash/devutils/rake" 3 | -------------------------------------------------------------------------------- /NOTICE.TXT: -------------------------------------------------------------------------------- 1 | Elasticsearch 2 | Copyright 2012-2015 Elasticsearch 3 | 4 | This product includes software developed by The Apache Software 5 | Foundation (http://www.apache.org/). -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for contributing to Logstash! If you haven't already signed our CLA, here's a handy link: https://www.elastic.co/contributor-agreement/ 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | import: 2 | - logstash-plugins/.ci:travis/travis.yml@1.x 3 | 4 | env: 5 | global: 6 | # disabled running performance tests on CI 7 | - HAS_PERFORMANCE_TESTS=0 8 | -------------------------------------------------------------------------------- /.ci/performance/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # This is intended to be run inside the docker container as the command of the docker-compose. 3 | 4 | env 5 | 6 | set -ex 7 | 8 | jruby -rbundler/setup -S rspec -fd --tag performance -------------------------------------------------------------------------------- /.ci/performance/docker-setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is intended to be run inside the docker container as the command of the docker-compose. 4 | set -ex 5 | 6 | # docker will look for: "./docker-compose.yml" (and "./docker-compose.override.yml") 7 | .ci/docker-setup.sh 8 | -------------------------------------------------------------------------------- /.ci/performance/docker-run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is intended to be run inside the docker container as the command of the docker-compose. 4 | set -ex 5 | 6 | cd .ci 7 | 8 | # docker will look for: "./docker-compose.yml" (and "./docker-compose.override.yml") 9 | docker-compose run logstash .ci/performance/run.sh 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please post all product and debugging questions on our [forum](https://discuss.elastic.co/c/logstash). Your questions will reach our wider community members there, and if we confirm that there is a bug, then we can open a new issue here. 2 | 3 | For all general issues, please provide the following details for fast resolution: 4 | 5 | - Version: 6 | - Operating System: 7 | - Config File (if you have sensitive info, please remove it): 8 | - Sample Data: 9 | - Steps to Reproduce: 10 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | logstash_path = ENV["LOGSTASH_PATH"] || "../../logstash" 6 | use_logstash_source = ENV["LOGSTASH_SOURCE"] && ENV["LOGSTASH_SOURCE"].to_s == "1" 7 | 8 | if Dir.exist?(logstash_path) && use_logstash_source 9 | gem 'logstash-core', :path => "#{logstash_path}/logstash-core" 10 | gem 'logstash-core-plugin-api', :path => "#{logstash_path}/logstash-core-plugin-api" 11 | end 12 | 13 | group :test do 14 | gem 'rspec-benchmark', :require => false if RUBY_VERSION >= '2.3' 15 | gem 'logstash-input-generator', :require => false 16 | gem 'logstash-output-null', :require => false 17 | end 18 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require "logstash/devutils/rspec/spec_helper" 3 | require "stud/temporary" 4 | 5 | module LogStash::Environment 6 | # running the grok code outside a logstash package means 7 | # LOGSTASH_HOME will not be defined, so let's set it here 8 | # before requiring the grok filter 9 | unless self.const_defined?(:LOGSTASH_HOME) 10 | LOGSTASH_HOME = File.expand_path("../../../", __FILE__) 11 | end 12 | 13 | # also :pattern_path method must exist so we define it too 14 | unless self.method_defined?(:pattern_path) 15 | def pattern_path(path) 16 | ::File.join(LOGSTASH_HOME, "patterns", path) 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | The following is a list of people who have contributed ideas, code, bug 2 | reports, or in general have helped logstash along its way. 3 | 4 | Contributors: 5 | * Aaron Mildenstein (untergeek) 6 | * Brad Fritz (bfritz) 7 | * Colin Surprenant (colinsurprenant) 8 | * Dr. Alexander Papaspyrou (lxndrp) 9 | * Ehtesh Choudhury (shurane) 10 | * Greg Brockman (gdb) 11 | * Guillaume ESPANEL (quatre) 12 | * Hugo Lopes Tavares (hltbra) 13 | * Jake Crosby (jakecr) 14 | * James Turnbull (jamtur01) 15 | * Jason Kendall (coolacid) 16 | * Jeff Forcier (bitprophet) 17 | * John E. Vincent (lusis) 18 | * Jordan Sissel (jordansissel) 19 | * João Duarte (jsvd) 20 | * Justin Lambert (jlambert121) 21 | * Kurt Hurtado (kurtado) 22 | * Martijn Heemels (Yggdrasil) 23 | * Neil Prosser (neilprosser) 24 | * Nick Ethier (nickethier) 25 | * Pete Fritchman (fetep) 26 | * Peter Fern (pdf) 27 | * Philippe Weber (wiibaa) 28 | * Pier-Hugues Pellerin (ph) 29 | * Richard Pijnenburg (electrical) 30 | * Suyog Rao (suyograo) 31 | * Yanis Guenane (Spredzy) 32 | * debadair 33 | * piavlo 34 | * yjpa7145 35 | 36 | Note: If you've sent us patches, bug reports, or otherwise contributed to 37 | Logstash, and you aren't on the list above and want to be, please let us know 38 | and we'll make sure you're here. Contributions from folks like you are what make 39 | open source awesome. 40 | -------------------------------------------------------------------------------- /logstash-filter-grok.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'logstash-filter-grok' 3 | s.version = '4.4.3' 4 | s.licenses = ['Apache License (2.0)'] 5 | s.summary = "Parses unstructured event data into fields" 6 | s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program" 7 | s.authors = ["Elastic"] 8 | s.email = 'info@elastic.co' 9 | s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" 10 | s.require_paths = ["lib"] 11 | 12 | # Files 13 | s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"] 14 | 15 | # Tests 16 | s.test_files = s.files.grep(%r{^(test|spec|features)/}) 17 | 18 | # Special flag to let us know this is actually a logstash plugin 19 | s.metadata = { "logstash_plugin" => "true", "logstash_group" => "filter" } 20 | 21 | # Gem dependencies 22 | s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99" 23 | s.add_runtime_dependency "logstash-core", ">= 5.6.0" 24 | s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.2' 25 | 26 | s.add_runtime_dependency 'jls-grok', '~> 0.11.3' 27 | s.add_runtime_dependency 'stud', '~> 0.0.22' 28 | s.add_runtime_dependency 'logstash-patterns-core', '>= 4.3.0', '< 5' 29 | 30 | s.add_development_dependency 'logstash-devutils' 31 | end 32 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Logstash 2 | 3 | All contributions are welcome: ideas, patches, documentation, bug reports, 4 | complaints, etc! 5 | 6 | Programming is not a required skill, and there are many ways to help out! 7 | It is more important to us that you are able to contribute. 8 | 9 | That said, some basic guidelines, which you are free to ignore :) 10 | 11 | ## Want to learn? 12 | 13 | Want to lurk about and see what others are doing with Logstash? 14 | 15 | * The irc channel (#logstash on irc.freenode.org) is a good place for this 16 | * The [forum](https://discuss.elastic.co/c/logstash) is also 17 | great for learning from others. 18 | 19 | ## Got Questions? 20 | 21 | Have a problem you want Logstash to solve for you? 22 | 23 | * You can ask a question in the [forum](https://discuss.elastic.co/c/logstash) 24 | * Alternately, you are welcome to join the IRC channel #logstash on 25 | irc.freenode.org and ask for help there! 26 | 27 | ## Have an Idea or Feature Request? 28 | 29 | * File a ticket on [GitHub](https://github.com/elastic/logstash/issues). Please remember that GitHub is used only for issues and feature requests. If you have a general question, the [forum](https://discuss.elastic.co/c/logstash) or IRC would be the best place to ask. 30 | 31 | ## Something Not Working? Found a Bug? 32 | 33 | If you think you found a bug, it probably is a bug. 34 | 35 | * If it is a general Logstash or a pipeline issue, file it in [Logstash GitHub](https://github.com/elasticsearch/logstash/issues) 36 | * If it is specific to a plugin, please file it in the respective repository under [logstash-plugins](https://github.com/logstash-plugins) 37 | * or ask the [forum](https://discuss.elastic.co/c/logstash). 38 | 39 | # Contributing Documentation and Code Changes 40 | 41 | If you have a bugfix or new feature that you would like to contribute to 42 | logstash, and you think it will take more than a few minutes to produce the fix 43 | (ie; write code), it is worth discussing the change with the Logstash users and developers first! You can reach us via [GitHub](https://github.com/elastic/logstash/issues), the [forum](https://discuss.elastic.co/c/logstash), or via IRC (#logstash on freenode irc) 44 | Please note that Pull Requests without tests will not be merged. If you would like to contribute but do not have experience with writing tests, please ping us on IRC/forum or create a PR and ask our help. 45 | 46 | ## Contributing to plugins 47 | 48 | Check our [documentation](https://www.elastic.co/guide/en/logstash/current/contributing-to-logstash.html) on how to contribute to plugins or write your own! It is super easy! 49 | 50 | ## Contribution Steps 51 | 52 | 1. Test your changes! [Run](https://github.com/elastic/logstash#testing) the test suite 53 | 2. Please make sure you have signed our [Contributor License 54 | Agreement](https://www.elastic.co/contributor-agreement/). We are not 55 | asking you to assign copyright to us, but to give us the right to distribute 56 | your code without restriction. We ask this of all contributors in order to 57 | assure our users of the origin and continuing existence of the code. You 58 | only need to sign the CLA once. 59 | 3. Send a pull request! Push your changes to your fork of the repository and 60 | [submit a pull 61 | request](https://help.github.com/articles/using-pull-requests). In the pull 62 | request, describe what your changes do and mention any bugs/issues related 63 | to the pull request. 64 | 65 | 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Logstash Plugin 2 | 3 | [![Travis Build Status](https://travis-ci.com/logstash-plugins/logstash-filter-grok.svg)](https://travis-ci.com/logstash-plugins/logstash-filter-grok) 4 | 5 | This is a plugin for [Logstash](https://github.com/elastic/logstash). 6 | 7 | It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way. 8 | 9 | ## Documentation 10 | 11 | Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/). 12 | 13 | - For formatting code or config example, you can use the asciidoc `[source,ruby]` directive 14 | - For more asciidoc formatting tips, see the excellent reference here https://github.com/elastic/docs#asciidoc-guide 15 | 16 | ## Need Help? 17 | 18 | Need help? Try #logstash on freenode IRC or the https://discuss.elastic.co/c/logstash discussion forum. 19 | 20 | ## Developing 21 | 22 | ### 1. Plugin Developement and Testing 23 | 24 | #### Code 25 | - To get started, you'll need JRuby with the Bundler gem installed. 26 | 27 | - Create a new plugin or clone and existing from the GitHub [logstash-plugins](https://github.com/logstash-plugins) organization. We also provide [example plugins](https://github.com/logstash-plugins?query=example). 28 | 29 | - Install dependencies 30 | ```sh 31 | bundle install 32 | ``` 33 | 34 | #### Test 35 | 36 | - Update your dependencies 37 | 38 | ```sh 39 | bundle install 40 | ``` 41 | 42 | - Run tests 43 | 44 | ```sh 45 | bundle exec rspec 46 | ``` 47 | 48 | ### 2. Running your unpublished Plugin in Logstash 49 | 50 | #### 2.1 Run in a local Logstash clone 51 | 52 | - Edit Logstash `Gemfile` and add the local plugin path, for example: 53 | ```ruby 54 | gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome" 55 | ``` 56 | - Install plugin 57 | ```sh 58 | # Logstash 2.3 and higher 59 | bin/logstash-plugin install --no-verify 60 | 61 | # Prior to Logstash 2.3 62 | bin/plugin install --no-verify 63 | 64 | ``` 65 | - Run Logstash with your plugin 66 | ```sh 67 | bin/logstash -e 'filter {awesome {}}' 68 | ``` 69 | At this point any modifications to the plugin code will be applied to this local Logstash setup. After modifying the plugin, simply rerun Logstash. 70 | 71 | #### 2.2 Run in an installed Logstash 72 | 73 | You can use the same **2.1** method to run your plugin in an installed Logstash by editing its `Gemfile` and pointing the `:path` to your local plugin development directory or you can build the gem and install it using: 74 | 75 | - Build your plugin gem 76 | ```sh 77 | gem build logstash-filter-awesome.gemspec 78 | ``` 79 | - Install the plugin from the Logstash home 80 | ```sh 81 | # Logstash 2.3 and higher 82 | bin/logstash-plugin install --no-verify 83 | 84 | # Prior to Logstash 2.3 85 | bin/plugin install --no-verify 86 | 87 | ``` 88 | - Start Logstash and proceed to test the plugin 89 | 90 | ## Contributing 91 | 92 | All contributions are welcome: ideas, patches, documentation, bug reports, complaints, and even something you drew up on a napkin. 93 | 94 | Programming is not a required skill. Whatever you've seen about open source and maintainers or community members saying "send patches or die" - you will not see that here. 95 | 96 | It is more important to the community that you are able to contribute. 97 | 98 | For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file. -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 4.4.3 2 | - Minor typos in docs examples [#176](https://github.com/logstash-plugins/logstash-filter-grok/pull/176) 3 | 4 | ## 4.4.2 5 | - Clarify the definition of matches that depend on previous captures [#169](https://github.com/logstash-plugins/logstash-filter-grok/pull/169) 6 | 7 | ## 4.4.1 8 | - Added preview of ECS v8 support using existing ECS v1 implementation [#175](https://github.com/logstash-plugins/logstash-filter-grok/pull/175) 9 | 10 | ## 4.4.0 11 | - Feat: ECS compatibility support [#162](https://github.com/logstash-plugins/logstash-filter-grok/pull/162) 12 | 13 | The filter supports using built-in pattern definitions that are fully Elastic Common Schema (ECS) compliant. 14 | 15 | ## 4.3.0 16 | - Added: added target support [#156](https://github.com/logstash-plugins/logstash-filter-grok/pull/156) 17 | 18 | ## 4.2.0 19 | - Added: support for timeout_scope [#153](https://github.com/logstash-plugins/logstash-filter-grok/pull/153) 20 | 21 | ## 4.1.1 22 | - Fix formatting for code sample [#148](https://github.com/logstash-plugins/logstash-filter-grok/pull/148) 23 | 24 | ## 4.1.0 25 | - Changed timeout handling using the Timeout class [#147](https://github.com/logstash-plugins/logstash-filter-grok/pull/147) 26 | 27 | ## 4.0.4 28 | - Added info and link to documentation for logstash-filter-dissect as another option for extracting unstructured event data into fields 29 | [#144](https://github.com/logstash-plugins/logstash-filter-grok/issues/144) 30 | 31 | ## 4.0.3 32 | - Fixed memory leak when run on JRuby 1.x (Logstash 5.x) [#135](https://github.com/logstash-plugins/logstash-filter-grok/issues/135) 33 | 34 | ## 4.0.2 35 | - Fixed resource leak where this plugin might get double initialized during plugin reload, leaking a thread + some objects 36 | 37 | ## 4.0.1 38 | - Fix a potential race 39 | 40 | ## 4.0.0 41 | - Major performance improvements due to reduced locking 42 | 43 | ## 3.4.5 44 | - version yanked due to breaking changes within .patch release cause logstash crashes in < 5.6 45 | 46 | ## 3.4.4 47 | - Update gemspec summary 48 | 49 | ## 3.4.3 50 | - Fix some documentation issues 51 | 52 | ## 3.4.1 53 | - Fix subdirectories in a pattern folder causing an exception in some cases 54 | 55 | ## 3.4.0 56 | - Add option to define patterns inline in the filter using `pattern_definitions` configuration. 57 | 58 | ## 3.3.1 59 | - Docs: indicate that grok patterns are loaded when the pipeline is created 60 | 61 | ## 3.3.0 62 | - Allow timeout enforcer to be disabled by setting timeout_millis to nil 63 | - Change default timeout_millis to 30s 64 | 65 | ## 3.2.4 66 | - Fix mutex interruption bug that could crash logstash. See: https://github.com/logstash-plugins/logstash-filter-grok/issues/97 67 | 68 | ## 3.2.3 69 | - No longer use 'trace' log level as it breaks rspec 70 | - Fix race conditions in timeout enforcer 71 | 72 | ## 3.2.3 73 | - Move one log message from info to debug to avoid noise 74 | 75 | ## 3.2.1 76 | - Fix race condition in TimeoutEnforcer that could cause crashes 77 | - Fix shutdown code to close cleanly and properly close the enforcer 78 | 79 | ## 3.2.0 80 | - Add new timeout options to cancel grok execution if a threshold time is exceeded 81 | 82 | ## 3.1.2 83 | - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99 84 | 85 | ## 3.1.1 86 | - Added metrics for failed, matched and number of patters per field. 87 | 88 | ## 3.1.0 89 | - breaking,config: Remove deprecated config `singles`. 90 | - breaking,config: Remove deprecated config `pattern`. Please use `match => { "message" => ""}` syntax. 91 | 92 | ## 3.0.1 93 | - internal: Republish all the gems under jruby. 94 | 95 | ## 3.0.0 96 | - internal,deps: Update the plugin to the version 2.0 of the plugin api, this change is required for Logstash 5.0 compatibility. See https://github.com/elastic/logstash/issues/5141 97 | 98 | ## 2.0.5 99 | - internal,deps: Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash 100 | 101 | ## 2.0.4 102 | - internal,deps: New dependency requirements for logstash-core for the 5.0 release 103 | 104 | ## 2.0.3 105 | - internal: fix fieldref assignment to avoid assumption on mutable object 106 | 107 | ## 2.0.0 108 | - internal: Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully, 109 | instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895 110 | - internal,deps: Dependency on logstash-core update to 2.0 111 | -------------------------------------------------------------------------------- /spec/filters/grok_performance_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require_relative "../spec_helper" 3 | 4 | begin 5 | require "rspec-benchmark" 6 | rescue LoadError # due testing against LS 5.x 7 | end 8 | RSpec.configure do |config| 9 | config.include RSpec::Benchmark::Matchers if defined? RSpec::Benchmark::Matchers 10 | end 11 | 12 | require "logstash/filters/grok" 13 | 14 | describe LogStash::Filters::Grok do 15 | 16 | subject do 17 | described_class.new(config).tap { |filter| filter.register } 18 | end 19 | 20 | EVENT_COUNT = 300_000 21 | 22 | describe "base-line performance", :performance => true do 23 | 24 | EXPECTED_MIN_RATE = 30_000 # per second - based on Travis CI (docker) numbers 25 | 26 | let(:config) do 27 | { 'match' => { "message" => "%{SYSLOGLINE}" }, 'overwrite' => [ "message" ] } 28 | end 29 | 30 | it "matches at least #{EXPECTED_MIN_RATE} events/second" do 31 | max_duration = EVENT_COUNT / EXPECTED_MIN_RATE 32 | message = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]" 33 | expect do 34 | duration = measure do 35 | EVENT_COUNT.times { subject.filter(LogStash::Event.new("message" => message)) } 36 | end 37 | puts "filters/grok parse rate: #{"%02.0f/sec" % (EVENT_COUNT / duration)}, elapsed: #{duration}s" 38 | end.to perform_under(max_duration).warmup(1).sample(2).times 39 | end 40 | 41 | end 42 | 43 | describe "timeout", :performance => true do 44 | 45 | ACCEPTED_TIMEOUT_DEGRADATION = 100 # in % (compared to timeout-less run) 46 | # TODO: with more real-world (pipeline) setup this usually gets bellow 10% on average 47 | 48 | MATCH_PATTERNS = { 49 | "message" => [ 50 | "foo0: %{NUMBER:bar}", "foo1: %{NUMBER:bar}", "foo2: %{NUMBER:bar}", "foo3: %{NUMBER:bar}", "foo4: %{NUMBER:bar}", 51 | "foo5: %{NUMBER:bar}", "foo6: %{NUMBER:bar}", "foo7: %{NUMBER:bar}", "foo8: %{NUMBER:bar}", "foo9: %{NUMBER:bar}", 52 | "%{SYSLOGLINE}" 53 | ] 54 | } 55 | 56 | SAMPLE_MESSAGE = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from aaaaaaaa.aaaaaa.net[111.111.11.1]".freeze 57 | 58 | TIMEOUT_MILLIS = 5_000 59 | 60 | let(:config_wout_timeout) do 61 | { 62 | 'match' => MATCH_PATTERNS, 63 | 'timeout_scope' => "event", 64 | 'timeout_millis' => 0 # 0 - disabled timeout 65 | } 66 | end 67 | 68 | let(:config_with_timeout) do 69 | { 70 | 'match' => MATCH_PATTERNS, 71 | 'timeout_scope' => "event", 72 | 'timeout_millis' => TIMEOUT_MILLIS 73 | } 74 | end 75 | 76 | SAMPLE_COUNT = 2 77 | 78 | it "has less than #{ACCEPTED_TIMEOUT_DEGRADATION}% overhead" do 79 | filter_wout_timeout = LogStash::Filters::Grok.new(config_wout_timeout).tap(&:register) 80 | wout_timeout_duration = do_sample_filter(filter_wout_timeout) # warmup 81 | puts "filters/grok(timeout => 0) warmed up in #{wout_timeout_duration}" 82 | before_sample! 83 | no_timeout_durations = Array.new(SAMPLE_COUNT).map do 84 | do_sample_filter(filter_wout_timeout) 85 | end 86 | puts "filters/grok(timeout => 0) took #{no_timeout_durations}" 87 | 88 | expected_duration = avg(no_timeout_durations) 89 | expected_duration += (expected_duration / 100) * ACCEPTED_TIMEOUT_DEGRADATION 90 | puts "expected_duration #{expected_duration}" 91 | 92 | filter_with_timeout = LogStash::Filters::Grok.new(config_with_timeout).tap(&:register) 93 | with_timeout_duration = do_sample_filter(filter_with_timeout) # warmup 94 | puts "filters/grok(timeout_scope => event) warmed up in #{with_timeout_duration}" 95 | 96 | try(3) do 97 | before_sample! 98 | durations = [] 99 | begin 100 | expect do 101 | do_sample_filter(filter_with_timeout).tap { |duration| durations << duration } 102 | end.to perform_under(expected_duration).sample(SAMPLE_COUNT).times 103 | ensure 104 | puts "filters/grok(timeout_scope => event) took #{durations}" 105 | end 106 | end 107 | end 108 | 109 | @private 110 | 111 | def do_sample_filter(filter) 112 | sample_event = { "message" => SAMPLE_MESSAGE } 113 | measure do 114 | for _ in (1..EVENT_COUNT) do # EVENT_COUNT.times without the block cost 115 | filter.filter(LogStash::Event.new(sample_event)) 116 | end 117 | end 118 | end 119 | 120 | end 121 | 122 | @private 123 | 124 | def measure 125 | start = Time.now 126 | yield 127 | Time.now - start 128 | end 129 | 130 | def avg(ary) 131 | ary.inject(0) { |m, i| m + i } / ary.size.to_f 132 | end 133 | 134 | def before_sample! 135 | 2.times { JRuby.gc } 136 | sleep TIMEOUT_MILLIS / 1000 137 | end 138 | 139 | def sleep(seconds) 140 | puts "sleeping for #{seconds} seconds (redundant - potential timeout propagation)" 141 | Kernel.sleep(seconds) 142 | end 143 | 144 | end -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2020 Elastic and contributors 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /docs/index.asciidoc: -------------------------------------------------------------------------------- 1 | :plugin: grok 2 | :type: filter 3 | 4 | /////////////////////////////////////////// 5 | START - GENERATED VARIABLES, DO NOT EDIT! 6 | /////////////////////////////////////////// 7 | :version: %VERSION% 8 | :release_date: %RELEASE_DATE% 9 | :changelog_url: %CHANGELOG_URL% 10 | :include_path: ../../../../logstash/docs/include 11 | /////////////////////////////////////////// 12 | END - GENERATED VARIABLES, DO NOT EDIT! 13 | /////////////////////////////////////////// 14 | 15 | [id="plugins-{type}s-{plugin}"] 16 | 17 | === Grok filter plugin 18 | 19 | include::{include_path}/plugin_header.asciidoc[] 20 | 21 | ==== Description 22 | 23 | Parse arbitrary text and structure it. 24 | 25 | Grok is a great way to parse unstructured log data into something structured and queryable. 26 | 27 | This tool is perfect for syslog logs, apache and other webserver logs, mysql 28 | logs, and in general, any log format that is generally written for humans 29 | and not computer consumption. 30 | 31 | Logstash ships with about 120 patterns by default. You can find them here: 32 | . You can add 33 | your own trivially. (See the `patterns_dir` setting) 34 | 35 | If you need help building patterns to match your logs, try the {kibana-ref}/xpack-grokdebugger.html[Grok debugger] in {kib}. 36 | 37 | ===== Grok or Dissect? Or both? 38 | 39 | The {logstash-ref}/plugins-filters-dissect.html[`dissect`] filter plugin 40 | is another way to extract unstructured event data into fields using delimiters. 41 | 42 | Dissect differs from Grok in that it does not use regular expressions and is faster. 43 | Dissect works well when data is reliably repeated. 44 | Grok is a better choice when the structure of your text varies from line to line. 45 | 46 | You can use both Dissect and Grok for a hybrid use case when a section of the 47 | line is reliably repeated, but the entire line is not. The Dissect filter can 48 | deconstruct the section of the line that is repeated. The Grok filter can process 49 | the remaining field values with more regex predictability. 50 | 51 | ==== Grok Basics 52 | 53 | Grok works by combining text patterns into something that matches your 54 | logs. 55 | 56 | The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}` 57 | 58 | The `SYNTAX` is the name of the pattern that will match your text. For 59 | example, `3.44` will be matched by the `NUMBER` pattern and `55.3.244.1` will 60 | be matched by the `IP` pattern. The syntax is how you match. 61 | 62 | The `SEMANTIC` is the identifier you give to the piece of text being matched. 63 | For example, `3.44` could be the duration of an event, so you could call it 64 | simply `duration`. Further, a string `55.3.244.1` might identify the `client` 65 | making a request. 66 | 67 | For the above example, your grok filter would look something like this: 68 | [source,ruby] 69 | %{NUMBER:duration} %{IP:client} 70 | 71 | Optionally you can add a data type conversion to your grok pattern. By default 72 | all semantics are saved as strings. If you wish to convert a semantic's data type, 73 | for example change a string to an integer then suffix it with the target data type. 74 | For example `%{NUMBER:num:int}` which converts the `num` semantic from a string to an 75 | integer. Currently the only supported conversions are `int` and `float`. 76 | 77 | .Examples: 78 | 79 | With that idea of a syntax and semantic, we can pull out useful fields from a 80 | sample log like this fictional http request log: 81 | [source,ruby] 82 | 55.3.244.1 GET /index.html 15824 0.043 83 | 84 | The pattern for this could be: 85 | [source,ruby] 86 | %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration} 87 | 88 | A more realistic example, let's read these logs from a file: 89 | [source,ruby] 90 | input { 91 | file { 92 | path => "/var/log/http.log" 93 | } 94 | } 95 | filter { 96 | grok { 97 | match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" } 98 | } 99 | } 100 | 101 | After the grok filter, the event will have a few extra fields in it: 102 | 103 | * `client: 55.3.244.1` 104 | * `method: GET` 105 | * `request: /index.html` 106 | * `bytes: 15824` 107 | * `duration: 0.043` 108 | 109 | ==== Regular Expressions 110 | 111 | Grok sits on top of regular expressions, so any regular expressions are valid 112 | in grok as well. The regular expression library is Oniguruma, and you can see 113 | the full supported regexp syntax https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Oniguruma 114 | site]. 115 | 116 | ==== Custom Patterns 117 | 118 | Sometimes logstash doesn't have a pattern you need. For this, you have 119 | a few options. 120 | 121 | First, you can use the Oniguruma syntax for named capture which will 122 | let you match a piece of text and save it as a field: 123 | [source,ruby] 124 | (?the pattern here) 125 | 126 | For example, postfix logs have a `queue id` that is an 10 or 11-character 127 | hexadecimal value. I can capture that easily like this: 128 | [source,ruby] 129 | (?[0-9A-F]{10,11}) 130 | 131 | Alternately, you can create a custom patterns file. 132 | 133 | * Create a directory called `patterns` with a file in it called `extra` 134 | (the file name doesn't matter, but name it meaningfully for yourself) 135 | * In that file, write the pattern you need as the pattern name, a space, then 136 | the regexp for that pattern. 137 | 138 | For example, doing the postfix queue id example as above: 139 | [source,ruby] 140 | # contents of ./patterns/postfix: 141 | POSTFIX_QUEUEID [0-9A-F]{10,11} 142 | 143 | Then use the `patterns_dir` setting in this plugin to tell logstash where 144 | your custom patterns directory is. Here's a full example with a sample log: 145 | 146 | [source,ruby] 147 | ----- 148 | Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com> 149 | ----- 150 | 151 | [source,ruby] 152 | ----- 153 | filter { 154 | grok { 155 | patterns_dir => ["./patterns"] 156 | match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" } 157 | } 158 | } 159 | ----- 160 | 161 | The above will match and result in the following fields: 162 | 163 | * `timestamp: Jan 1 06:25:43` 164 | * `logsource: mailserver14` 165 | * `program: postfix/cleanup` 166 | * `pid: 21403` 167 | * `queue_id: BEF25A72965` 168 | * `syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>` 169 | 170 | The `timestamp`, `logsource`, `program`, and `pid` fields come from the 171 | `SYSLOGBASE` pattern which itself is defined by other patterns. 172 | 173 | Another option is to define patterns _inline_ in the filter using `pattern_definitions`. 174 | This is mostly for convenience and allows user to define a pattern which can be used just in that 175 | filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. 176 | 177 | [id="plugins-{type}s-{plugin}-ecs"] 178 | ==== Migrating to Elastic Common Schema (ECS) 179 | 180 | To ease migration to the {ecs-ref}[Elastic Common Schema (ECS)], the filter 181 | plugin offers a new set of ECS-compliant patterns in addition to the existing 182 | patterns. The new ECS pattern definitions capture event field names that are 183 | compliant with the schema. 184 | 185 | The ECS pattern set has all of the pattern definitions from the legacy set, and is 186 | a drop-in replacement. Use the <> 187 | setting to switch modes. 188 | 189 | New features and enhancements will be added to the ECS-compliant files. The 190 | legacy patterns may still receive bug fixes which are backwards compatible. 191 | 192 | 193 | [id="plugins-{type}s-{plugin}-options"] 194 | ==== Grok Filter Configuration Options 195 | 196 | This plugin supports the following configuration options plus the <> described later. 197 | 198 | [cols="<,<,<",options="header",] 199 | |======================================================================= 200 | |Setting |Input type|Required 201 | | <> |<>|No 202 | | <> |<>|No 203 | | <> |<>|No 204 | | <> |<>|No 205 | | <> |<>|No 206 | | <> |<>|No 207 | | <> |<>|No 208 | | <> |<>|No 209 | | <> |<>|No 210 | | <> |<>|No 211 | | <> |<>|No 212 | | <> |<>|No 213 | | <> |<>|No 214 | |======================================================================= 215 | 216 | Also see <> for a list of options supported by all 217 | filter plugins. 218 | 219 |   220 | 221 | [id="plugins-{type}s-{plugin}-break_on_match"] 222 | ===== `break_on_match` 223 | 224 | * Value type is <> 225 | * Default value is `true` 226 | 227 | Break on first match. The first successful match by grok will result in the 228 | filter being finished. If you want grok to try all patterns (maybe you are 229 | parsing different things), then set this to false. 230 | 231 | [id="plugins-{type}s-{plugin}-ecs_compatibility"] 232 | ===== `ecs_compatibility` 233 | 234 | * Value type is <> 235 | * Supported values are: 236 | ** `disabled`: the plugin will load legacy (built-in) pattern definitions 237 | ** `v1`,`v8`: all patterns provided by the plugin will use ECS compliant captures 238 | * Default value depends on which version of Logstash is running: 239 | ** When Logstash provides a `pipeline.ecs_compatibility` setting, its value is used as the default 240 | ** Otherwise, the default value is `disabled`. 241 | 242 | Controls this plugin's compatibility with the {ecs-ref}[Elastic Common Schema (ECS)]. 243 | The value of this setting affects extracted event field names when a composite pattern (such as `HTTPD_COMMONLOG`) is matched. 244 | 245 | [id="plugins-{type}s-{plugin}-keep_empty_captures"] 246 | ===== `keep_empty_captures` 247 | 248 | * Value type is <> 249 | * Default value is `false` 250 | 251 | If `true`, keep empty captures as event fields. 252 | 253 | [id="plugins-{type}s-{plugin}-match"] 254 | ===== `match` 255 | 256 | * Value type is <> 257 | * Default value is `{}` 258 | 259 | A hash that defines the mapping of _where to look_, and with which patterns. 260 | 261 | For example, the following will match an existing value in the `message` field for the given pattern, and if a match is found will add the field `duration` to the event with the captured value: 262 | [source,ruby] 263 | filter { 264 |     grok { 265 | match => { 266 | "message" => "Duration: %{NUMBER:duration}" 267 | } 268 | } 269 | } 270 | 271 | If you need to match multiple patterns against a single field, the value can be an array of patterns: 272 | [source,ruby] 273 | filter { 274 | grok { 275 | match => { 276 | "message" => [ 277 | "Duration: %{NUMBER:duration}", 278 | "Speed: %{NUMBER:speed}" 279 | ] 280 | } 281 | } 282 | } 283 | 284 | To perform matches on multiple fields just use multiple entries in the `match` hash: 285 | 286 | [source,ruby] 287 | filter { 288 | grok { 289 | match => { 290 | "speed" => "Speed: %{NUMBER:speed}" 291 | "duration" => "Duration: %{NUMBER:duration}" 292 | } 293 | } 294 | } 295 | 296 | However, if one pattern depends on a field created by a previous pattern, separate these into two separate grok filters: 297 | 298 | 299 | [source,ruby] 300 | filter { 301 | grok { 302 | match => { 303 | "message" => "Hi, the rest of the message is: %{GREEDYDATA:rest}" 304 | } 305 | } 306 | grok { 307 | match => { 308 | "rest" => "a number %{NUMBER:number}, and a word %{WORD:word}" 309 | } 310 | } 311 | } 312 | 313 | 314 | [id="plugins-{type}s-{plugin}-named_captures_only"] 315 | ===== `named_captures_only` 316 | 317 | * Value type is <> 318 | * Default value is `true` 319 | 320 | If `true`, only store named captures from grok. 321 | 322 | [id="plugins-{type}s-{plugin}-overwrite"] 323 | ===== `overwrite` 324 | 325 | * Value type is <> 326 | * Default value is `[]` 327 | 328 | The fields to overwrite. 329 | 330 | This allows you to overwrite a value in a field that already exists. 331 | 332 | For example, if you have a syslog line in the `message` field, you can 333 | overwrite the `message` field with part of the match like so: 334 | [source,ruby] 335 | filter { 336 | grok { 337 | match => { "message" => "%{SYSLOGBASE} %{DATA:message}" } 338 | overwrite => [ "message" ] 339 | } 340 | } 341 | 342 | In this case, a line like `May 29 16:37:11 sadness logger: hello world` 343 | will be parsed and `hello world` will overwrite the original message. 344 | 345 | If you are using a field reference in `overwrite`, you must use the field 346 | reference in the pattern. Example: 347 | [source,ruby] 348 | filter { 349 | grok { 350 | match => { "somefield" => "%{NUMBER} %{GREEDYDATA:[nested][field][test]}" } 351 | overwrite => [ "[nested][field][test]" ] 352 | } 353 | } 354 | 355 | 356 | [id="plugins-{type}s-{plugin}-pattern_definitions"] 357 | ===== `pattern_definitions` 358 | 359 | * Value type is <> 360 | * Default value is `{}` 361 | 362 | A hash of pattern-name and pattern tuples defining custom patterns to be used by 363 | the current filter. Patterns matching existing names will override the pre-existing 364 | definition. Think of this as inline patterns available just for this definition of 365 | grok 366 | 367 | [id="plugins-{type}s-{plugin}-patterns_dir"] 368 | ===== `patterns_dir` 369 | 370 | * Value type is <> 371 | * Default value is `[]` 372 | 373 | 374 | Logstash ships by default with a bunch of patterns, so you don't 375 | necessarily need to define this yourself unless you are adding additional 376 | patterns. You can point to multiple pattern directories using this setting. 377 | Note that Grok will read all files in the directory matching the patterns_files_glob 378 | and assume it's a pattern file (including any tilde backup files). 379 | [source,ruby] 380 | patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"] 381 | 382 | Pattern files are plain text with format: 383 | [source,ruby] 384 | NAME PATTERN 385 | 386 | For example: 387 | [source,ruby] 388 | NUMBER \d+ 389 | 390 | The patterns are loaded when the pipeline is created. 391 | 392 | [id="plugins-{type}s-{plugin}-patterns_files_glob"] 393 | ===== `patterns_files_glob` 394 | 395 | * Value type is <> 396 | * Default value is `"*"` 397 | 398 | Glob pattern, used to select the pattern files in the directories 399 | specified by patterns_dir 400 | 401 | [id="plugins-{type}s-{plugin}-tag_on_failure"] 402 | ===== `tag_on_failure` 403 | 404 | * Value type is <> 405 | * Default value is `["_grokparsefailure"]` 406 | 407 | Append values to the `tags` field when there has been no 408 | successful match 409 | 410 | [id="plugins-{type}s-{plugin}-tag_on_timeout"] 411 | ===== `tag_on_timeout` 412 | 413 | * Value type is <> 414 | * Default value is `"_groktimeout"` 415 | 416 | Tag to apply if a grok regexp times out. 417 | 418 | [id="plugins-{type}s-{plugin}-target"] 419 | ===== `target` 420 | 421 | * Value type is <> 422 | * There is no default value for this setting 423 | 424 | Define target namespace for placing matches. 425 | 426 | [id="plugins-{type}s-{plugin}-timeout_millis"] 427 | ===== `timeout_millis` 428 | 429 | * Value type is <> 430 | * Default value is `30000` 431 | 432 | Attempt to terminate regexps after this amount of time. 433 | This applies per pattern if multiple patterns are applied 434 | This will never timeout early, but may take a little longer to timeout. 435 | Actual timeout is approximate based on a 250ms quantization. 436 | Set to 0 to disable timeouts 437 | 438 | [id="plugins-{type}s-{plugin}-timeout_scope"] 439 | ===== `timeout_scope` 440 | 441 | * Value type is <> 442 | * Default value is `"pattern"` 443 | * Supported values are `"pattern"` and `"event"` 444 | 445 | When multiple patterns are provided to <>, 446 | the timeout has historically applied to _each_ pattern, incurring overhead 447 | for each and every pattern that is attempted; when the grok filter is 448 | configured with `timeout_scope => event`, the plugin instead enforces 449 | a single timeout across all attempted matches on the event, so it can 450 | achieve similar safeguard against runaway matchers with significantly 451 | less overhead. 452 | 453 | It's usually better to scope the timeout for the whole event. 454 | 455 | 456 | [id="plugins-{type}s-{plugin}-common-options"] 457 | include::{include_path}/{type}.asciidoc[] 458 | -------------------------------------------------------------------------------- /lib/logstash/filters/grok.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require "logstash/filters/base" 3 | require "logstash/namespace" 4 | require "logstash/environment" 5 | require "logstash/patterns/core" 6 | require 'logstash/plugin_mixins/ecs_compatibility_support' 7 | require "grok-pure" # rubygem 'jls-grok' 8 | require "timeout" 9 | 10 | # Parse arbitrary text and structure it. 11 | # 12 | # Grok is currently the best way in Logstash to parse unstructured log 13 | # data into something structured and queryable. 14 | # 15 | # This tool is perfect for syslog logs, apache and other webserver logs, mysql 16 | # logs, and in general, any log format that is generally written for humans 17 | # and not computer consumption. 18 | # 19 | # Logstash ships with about 120 patterns by default. You can find them here: 20 | # . You can add 21 | # your own trivially. (See the `patterns_dir` setting) 22 | # 23 | # If you need help building patterns to match your logs, you will find the 24 | # and applications quite useful! 25 | # 26 | # ==== Grok Basics 27 | # 28 | # Grok works by combining text patterns into something that matches your 29 | # logs. 30 | # 31 | # The syntax for a grok pattern is `%{SYNTAX:SEMANTIC}` 32 | # 33 | # The `SYNTAX` is the name of the pattern that will match your text. For 34 | # example, `3.44` will be matched by the `NUMBER` pattern and `55.3.244.1` will 35 | # be matched by the `IP` pattern. The syntax is how you match. 36 | # 37 | # The `SEMANTIC` is the identifier you give to the piece of text being matched. 38 | # For example, `3.44` could be the duration of an event, so you could call it 39 | # simply `duration`. Further, a string `55.3.244.1` might identify the `client` 40 | # making a request. 41 | # 42 | # For the above example, your grok filter would look something like this: 43 | # [source,ruby] 44 | # %{NUMBER:duration} %{IP:client} 45 | # 46 | # Optionally you can add a data type conversion to your grok pattern. By default 47 | # all semantics are saved as strings. If you wish to convert a semantic's data type, 48 | # for example change a string to an integer then suffix it with the target data type. 49 | # For example `%{NUMBER:num:int}` which converts the `num` semantic from a string to an 50 | # integer. Currently the only supported conversions are `int` and `float`. 51 | # 52 | # .Examples: 53 | # 54 | # With that idea of a syntax and semantic, we can pull out useful fields from a 55 | # sample log like this fictional http request log: 56 | # [source,ruby] 57 | # 55.3.244.1 GET /index.html 15824 0.043 58 | # 59 | # The pattern for this could be: 60 | # [source,ruby] 61 | # %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration} 62 | # 63 | # A more realistic example, let's read these logs from a file: 64 | # [source,ruby] 65 | # input { 66 | # file { 67 | # path => "/var/log/http.log" 68 | # } 69 | # } 70 | # filter { 71 | # grok { 72 | # match => { "message" => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}" } 73 | # } 74 | # } 75 | # 76 | # After the grok filter, the event will have a few extra fields in it: 77 | # 78 | # * `client: 55.3.244.1` 79 | # * `method: GET` 80 | # * `request: /index.html` 81 | # * `bytes: 15824` 82 | # * `duration: 0.043` 83 | # 84 | # ==== Regular Expressions 85 | # 86 | # Grok sits on top of regular expressions, so any regular expressions are valid 87 | # in grok as well. The regular expression library is Oniguruma, and you can see 88 | # the full supported regexp syntax https://github.com/kkos/oniguruma/blob/master/doc/RE[on the Oniguruma 89 | # site]. 90 | # 91 | # ==== Custom Patterns 92 | # 93 | # Sometimes logstash doesn't have a pattern you need. For this, you have 94 | # a few options. 95 | # 96 | # First, you can use the Oniguruma syntax for named capture which will 97 | # let you match a piece of text and save it as a field: 98 | # [source,ruby] 99 | # (?the pattern here) 100 | # 101 | # For example, postfix logs have a `queue id` that is an 10 or 11-character 102 | # hexadecimal value. I can capture that easily like this: 103 | # [source,ruby] 104 | # (?[0-9A-F]{10,11}) 105 | # 106 | # Alternately, you can create a custom patterns file. 107 | # 108 | # * Create a directory called `patterns` with a file in it called `extra` 109 | # (the file name doesn't matter, but name it meaningfully for yourself) 110 | # * In that file, write the pattern you need as the pattern name, a space, then 111 | # the regexp for that pattern. 112 | # 113 | # For example, doing the postfix queue id example as above: 114 | # [source,ruby] 115 | # # contents of ./patterns/postfix: 116 | # POSTFIX_QUEUEID [0-9A-F]{10,11} 117 | # 118 | # Then use the `patterns_dir` setting in this plugin to tell logstash where 119 | # your custom patterns directory is. Here's a full example with a sample log: 120 | # [source,ruby] 121 | # Jan 1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com> 122 | # [source,ruby] 123 | # filter { 124 | # grok { 125 | # patterns_dir => ["./patterns"] 126 | # match => { "message" => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:syslog_message}" } 127 | # } 128 | # } 129 | # 130 | # The above will match and result in the following fields: 131 | # 132 | # * `timestamp: Jan 1 06:25:43` 133 | # * `logsource: mailserver14` 134 | # * `program: postfix/cleanup` 135 | # * `pid: 21403` 136 | # * `queue_id: BEF25A72965` 137 | # * `syslog_message: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>` 138 | # 139 | # The `timestamp`, `logsource`, `program`, and `pid` fields come from the 140 | # `SYSLOGBASE` pattern which itself is defined by other patterns. 141 | # 142 | # Another option is to define patterns _inline_ in the filter using `pattern_definitions`. 143 | # This is mostly for convenience and allows user to define a pattern which can be used just in that 144 | # filter. This newly defined patterns in `pattern_definitions` will not be available outside of that particular `grok` filter. 145 | # 146 | class LogStash::Filters::Grok < LogStash::Filters::Base 147 | include LogStash::PluginMixins::ECSCompatibilitySupport 148 | 149 | config_name "grok" 150 | 151 | # A hash of matches of field => value 152 | # 153 | # For example: 154 | # [source,ruby] 155 | # filter { 156 | # grok { match => { "message" => "Duration: %{NUMBER:duration}" } } 157 | # } 158 | # 159 | # If you need to match multiple patterns against a single field, the value can be an array of patterns 160 | # [source,ruby] 161 | # filter { 162 | # grok { match => { "message" => [ "Duration: %{NUMBER:duration}", "Speed: %{NUMBER:speed}" ] } } 163 | # } 164 | 165 | # 166 | config :match, :validate => :hash, :default => {} 167 | 168 | # 169 | # Logstash ships by default with a bunch of patterns, so you don't 170 | # necessarily need to define this yourself unless you are adding additional 171 | # patterns. You can point to multiple pattern directories using this setting. 172 | # Note that Grok will read all files in the directory matching the patterns_files_glob 173 | # and assume it's a pattern file (including any tilde backup files). 174 | # [source,ruby] 175 | # patterns_dir => ["/opt/logstash/patterns", "/opt/logstash/extra_patterns"] 176 | # 177 | # Pattern files are plain text with format: 178 | # [source,ruby] 179 | # NAME PATTERN 180 | # 181 | # For example: 182 | # [source,ruby] 183 | # NUMBER \d+ 184 | # 185 | # The patterns are loaded when the pipeline is created. 186 | config :patterns_dir, :validate => :array, :default => [] 187 | 188 | # A hash of pattern-name and pattern tuples defining custom patterns to be used by 189 | # the current filter. Patterns matching existing names will override the pre-existing 190 | # definition. Think of this as inline patterns available just for this definition of 191 | # grok 192 | config :pattern_definitions, :validate => :hash, :default => {} 193 | 194 | # Glob pattern, used to select the pattern files in the directories 195 | # specified by patterns_dir 196 | config :patterns_files_glob, :validate => :string, :default => "*" 197 | 198 | # Break on first match. The first successful match by grok will result in the 199 | # filter being finished. If you want grok to try all patterns (maybe you are 200 | # parsing different things), then set this to false. 201 | config :break_on_match, :validate => :boolean, :default => true 202 | 203 | # If `true`, only store named captures from grok. 204 | config :named_captures_only, :validate => :boolean, :default => true 205 | 206 | # If `true`, keep empty captures as event fields. 207 | config :keep_empty_captures, :validate => :boolean, :default => false 208 | 209 | # Define the target field for placing the matched captures. 210 | # If this setting is omitted, data gets stored at the root (top level) of the event. 211 | config :target, :validate => :string 212 | 213 | # Append values to the `tags` field when there has been no 214 | # successful match 215 | config :tag_on_failure, :validate => :array, :default => ["_grokparsefailure"] 216 | 217 | # Attempt to terminate regexps after this amount of time. 218 | # This applies per pattern if multiple patterns are applied 219 | # This will never timeout early, but may take a little longer to timeout. 220 | # Actual timeout is approximate based on a 250ms quantization. 221 | # Set to 0 to disable timeouts 222 | config :timeout_millis, :validate => :number, :default => 30000 223 | 224 | # When multiple patterns are provided to `match`, 225 | # the timeout has historically applied to _each_ pattern, incurring overhead 226 | # for each and every pattern that is attempted; when the grok filter is 227 | # configured with `timeout_scope => 'event'`, the plugin instead enforces 228 | # a single timeout across all attempted matches on the event, so it can 229 | # achieve similar safeguard against runaway matchers with significantly 230 | # less overhead. 231 | # It's usually better to scope the timeout for the whole event. 232 | config :timeout_scope, :validate => %w(pattern event), :default => "pattern" 233 | 234 | # Tag to apply if a grok regexp times out. 235 | config :tag_on_timeout, :validate => :string, :default => '_groktimeout' 236 | 237 | # The fields to overwrite. 238 | # 239 | # This allows you to overwrite a value in a field that already exists. 240 | # 241 | # For example, if you have a syslog line in the `message` field, you can 242 | # overwrite the `message` field with part of the match like so: 243 | # [source,ruby] 244 | # filter { 245 | # grok { 246 | # match => { "message" => "%{SYSLOGBASE} %{DATA:message}" } 247 | # overwrite => [ "message" ] 248 | # } 249 | # } 250 | # 251 | # In this case, a line like `May 29 16:37:11 sadness logger: hello world` 252 | # will be parsed and `hello world` will overwrite the original message. 253 | config :overwrite, :validate => :array, :default => [] 254 | 255 | def register 256 | # a cache of capture name handler methods. 257 | @handlers = {} 258 | 259 | @patternfiles = [] 260 | # Have (default) patterns_path show first. Last-in pattern definitions wins 261 | # this will let folks redefine built-in patterns at runtime 262 | @patternfiles += patterns_files_from_paths(patterns_path, "*") 263 | @patternfiles += patterns_files_from_paths(@patterns_dir, @patterns_files_glob) 264 | 265 | @patterns = Hash.new { |h,k| h[k] = [] } 266 | 267 | @logger.debug("Match data", :match => @match) 268 | 269 | @metric_match_fields = metric.namespace(:patterns_per_field) 270 | 271 | @match.each do |field, patterns| 272 | patterns = [patterns] if patterns.is_a?(String) 273 | @metric_match_fields.gauge(field, patterns.length) 274 | 275 | @logger.trace? && @logger.trace("Grok compile", :field => field, :patterns => patterns) 276 | patterns.each do |pattern| 277 | @logger.debug? && @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) 278 | grok = Grok.new 279 | grok.logger = @logger 280 | add_patterns_from_files(@patternfiles, grok) 281 | add_patterns_from_inline_definition(@pattern_definitions, grok) 282 | grok.compile(pattern, @named_captures_only) 283 | @patterns[field] << grok 284 | end 285 | end # @match.each 286 | @match_counter = metric.counter(:matches) 287 | @failure_counter = metric.counter(:failures) 288 | 289 | @target = "[#{@target.strip}]" if @target && @target !~ /\[.*?\]/ 290 | 291 | @timeout = @timeout_millis > 0.0 ? RubyTimeout.new(@timeout_millis) : NoopTimeout::INSTANCE 292 | @matcher = ( @timeout_scope.eql?('event') ? EventTimeoutMatcher : PatternTimeoutMatcher ).new(self) 293 | end # def register 294 | 295 | def filter(event) 296 | matched = false 297 | 298 | @logger.debug? && @logger.debug("Running grok filter", :event => event.to_hash) 299 | 300 | @patterns.each do |field, groks| 301 | if match(groks, field, event) 302 | matched = true 303 | break if @break_on_match 304 | end 305 | end 306 | 307 | if matched 308 | @match_counter.increment(1) 309 | filter_matched(event) 310 | else 311 | @failure_counter.increment(1) 312 | @tag_on_failure.each {|tag| event.tag(tag)} 313 | end 314 | 315 | @logger.debug? && @logger.debug("Event now: ", :event => event.to_hash) 316 | rescue GrokTimeoutException => e 317 | @logger.warn(e.message) 318 | metric.increment(:timeouts) 319 | event.tag(@tag_on_timeout) 320 | end # def filter 321 | 322 | def close 323 | end 324 | 325 | private 326 | 327 | # The default pattern paths, depending on environment. 328 | def patterns_path 329 | patterns_path = [] 330 | case ecs_compatibility 331 | when :disabled 332 | patterns_path << LogStash::Patterns::Core.path # :legacy 333 | when :v1 334 | patterns_path << LogStash::Patterns::Core.path('ecs-v1') 335 | when :v8 336 | @logger.warn("ECS v8 support is a preview of the unreleased ECS v8, and uses the v1 patterns. When Version 8 of the Elastic Common Schema becomes available, this plugin will need to be updated") 337 | patterns_path << LogStash::Patterns::Core.path('ecs-v1') 338 | else 339 | fail(NotImplementedError, "ECS #{ecs_compatibility} is not supported by this plugin.") 340 | end 341 | # allow plugin to be instantiated outside the LS environment (in tests) 342 | if defined? LogStash::Environment.pattern_path 343 | patterns_path << LogStash::Environment.pattern_path("*") 344 | end 345 | patterns_path 346 | end 347 | 348 | def match(groks, field, event) 349 | input = event.get(field) 350 | if input.is_a?(Array) 351 | success = false 352 | input.each do |input| 353 | success |= match_against_groks(groks, field, input, event) 354 | end 355 | return success 356 | else 357 | match_against_groks(groks, field, input, event) 358 | end 359 | rescue StandardError => e 360 | @logger.warn("Grok regexp threw exception", :message => e.message, :exception => e.class, :backtrace => e.backtrace) 361 | return false 362 | end 363 | 364 | def match_against_groks(groks, field, input, event) 365 | # Convert anything else to string (number, hash, etc) 366 | context = GrokContext.new(field, input.to_s) 367 | @matcher.match(context, groks, event, @break_on_match) 368 | end 369 | 370 | # Internal (base) helper to handle the global timeout switch. 371 | # @private 372 | class Matcher 373 | 374 | def initialize(filter) 375 | @filter = filter 376 | end 377 | 378 | def match(context, groks, event, break_on_match) 379 | matched = false 380 | 381 | groks.each do |grok| 382 | context.set_grok(grok) 383 | 384 | matched = execute(context, grok) 385 | if matched 386 | grok.capture(matched) { |field, value| @filter.handle(field, value, event) } 387 | break if break_on_match 388 | end 389 | end 390 | 391 | matched 392 | end 393 | 394 | protected 395 | 396 | def execute(context, grok) 397 | grok.execute(context.input) 398 | end 399 | 400 | end 401 | 402 | # @private 403 | class EventTimeoutMatcher < Matcher 404 | # @override 405 | def match(context, groks, event, break_on_match) 406 | @filter.with_timeout(context) { super } 407 | end 408 | end 409 | 410 | # @private 411 | class PatternTimeoutMatcher < Matcher 412 | # @override 413 | def execute(context, grok) 414 | @filter.with_timeout(context) { super } 415 | end 416 | end 417 | 418 | def handle(field, value, event) 419 | return if (value.nil? || (value.is_a?(String) && value.empty?)) unless @keep_empty_captures 420 | 421 | target_field = @target ? "#{@target}[#{field}]" : field 422 | 423 | if @overwrite.include?(field) 424 | event.set(target_field, value) 425 | else 426 | v = event.get(target_field) 427 | if v.nil? 428 | event.set(target_field, value) 429 | elsif v.is_a?(Array) 430 | # do not replace the code below with: 431 | # event[field] << value 432 | # this assumes implementation specific feature of returning a mutable object 433 | # from a field ref which should not be assumed and will change in the future. 434 | v << value 435 | event.set(target_field, v) 436 | elsif v.is_a?(String) 437 | # Promote to array since we aren't overwriting. 438 | event.set(target_field, [v, value]) 439 | else 440 | @logger.debug("Not adding matched value - found existing (#{v.class})", :field => target_field, :value => value) 441 | end 442 | end 443 | end 444 | public :handle 445 | 446 | def patterns_files_from_paths(paths, glob) 447 | patternfiles = [] 448 | @logger.debug("Grok patterns path", :paths => paths) 449 | paths.each do |path| 450 | if File.directory?(path) 451 | path = File.join(path, glob) 452 | end 453 | 454 | Dir.glob(path).each do |file| 455 | @logger.trace("Grok loading patterns from file", :path => file) 456 | if File.directory?(file) 457 | @logger.debug("Skipping path because it is a directory", :path => file) 458 | else 459 | patternfiles << file 460 | end 461 | end 462 | end 463 | patternfiles 464 | end # def patterns_files_from_paths 465 | 466 | def add_patterns_from_files(paths, grok) 467 | paths.each do |path| 468 | if !File.exists?(path) 469 | raise "Grok pattern file does not exist: #{path}" 470 | end 471 | grok.add_patterns_from_file(path) 472 | end 473 | end # def add_patterns_from_files 474 | 475 | def add_patterns_from_inline_definition(pattern_definitions, grok) 476 | pattern_definitions.each do |name, pattern| 477 | next if pattern.nil? 478 | grok.add_pattern(name, pattern.chomp) 479 | end 480 | end 481 | 482 | class TimeoutError < RuntimeError; end 483 | 484 | class GrokTimeoutException < Exception 485 | attr_reader :grok, :field, :value 486 | 487 | def initialize(grok, field, value) 488 | @grok = grok 489 | @field = field 490 | @value = value 491 | end 492 | 493 | def message 494 | "Timeout executing grok '#{@grok.pattern}' against field '#{field}' with value '#{trunc_value}'!" 495 | end 496 | 497 | def trunc_value 498 | if value.size <= 255 # If no more than 255 chars 499 | value 500 | else 501 | "Value too large to output (#{value.bytesize} bytes)! First 255 chars are: #{value[0..255]}" 502 | end 503 | end 504 | end 505 | 506 | def with_timeout(context, &block) 507 | @timeout.exec(&block) 508 | rescue TimeoutError => error 509 | handle_timeout(context, error) 510 | end 511 | public :with_timeout 512 | 513 | def handle_timeout(context, error) 514 | raise GrokTimeoutException.new(context.grok, context.field, context.input) 515 | end 516 | 517 | # @private 518 | class GrokContext 519 | attr_reader :grok, :field, :input 520 | 521 | def initialize(field, input) 522 | @field = field 523 | @input = input 524 | end 525 | 526 | def set_grok(grok) 527 | @grok = grok 528 | end 529 | end 530 | 531 | # @private 532 | class NoopTimeout 533 | INSTANCE = new 534 | 535 | def exec 536 | yield 537 | end 538 | end 539 | 540 | # @private 541 | class RubyTimeout 542 | def initialize(timeout_millis) 543 | # divide by float to allow fractional seconds, the Timeout class timeout value is in seconds but the underlying 544 | # executor resolution is in microseconds so fractional second parameter down to microseconds is possible. 545 | # see https://github.com/jruby/jruby/blob/9.2.7.0/core/src/main/java/org/jruby/ext/timeout/Timeout.java#L125 546 | @timeout_seconds = timeout_millis / 1000.0 547 | end 548 | 549 | def exec(&block) 550 | Timeout.timeout(@timeout_seconds, TimeoutError, &block) 551 | end 552 | end 553 | end # class LogStash::Filters::Grok 554 | -------------------------------------------------------------------------------- /spec/filters/grok_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require_relative "../spec_helper" 3 | require 'logstash/plugin_mixins/ecs_compatibility_support/spec_helper' 4 | require "logstash/filters/grok" 5 | 6 | describe LogStash::Filters::Grok do 7 | subject { described_class.new(config) } 8 | let(:config) { {} } 9 | let(:event) { LogStash::Event.new(data) } 10 | let(:data) { { "message" => message } } 11 | 12 | def self.sample(message, &block) 13 | # mod = RSpec::Core::MemoizedHelpers.module_for(self) 14 | # mod.attr_reader :message 15 | # # mod.__send__(:define_method, :message) { message } 16 | # it("matches: #{message}") { @message = message; block.call } 17 | describe message do 18 | let(:message) { message } 19 | it("groks", &block) 20 | end 21 | end 22 | 23 | describe "in ecs mode", :ecs_compatibility_support, :aggregate_failures do 24 | ecs_compatibility_matrix(:disabled, :v1, :v8 => :v1) do |ecs_select| 25 | before(:each) do 26 | allow_any_instance_of(described_class).to receive(:ecs_compatibility).and_return(ecs_compatibility) 27 | subject.register 28 | subject.filter(event) 29 | end 30 | 31 | describe "simple syslog line" do 32 | let(:message) { 'Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]' } 33 | 34 | context 'with overwrite' do 35 | let(:config) { { "match" => { "message" => "%{SYSLOGLINE}" }, "overwrite" => [ "message" ] } } 36 | 37 | it "matches pattern" do 38 | expect( event.get("tags") ).to be nil 39 | expect( event.get ecs_select[disabled: "[logsource]", v1: "[host][hostname]"] ).to eq "evita" 40 | expect( event.get("timestamp") ).to eq "Mar 16 00:01:25" 41 | expect( event.get("message") ).to eql "connect from camomile.cloud9.net[168.100.1.3]" 42 | expect( event.get ecs_select[disabled: "[program]", v1: "[process][name]"] ).to eq "postfix/smtpd" 43 | expect( event.get(ecs_select[disabled: "[pid]", v1: "[process][pid]"]).to_s ).to eq "1713" 44 | end 45 | end 46 | 47 | context 'with target' do 48 | let(:config) { { "match" => { "message" => "%{SYSLOGLINE}" }, "target" => "grok" } } 49 | 50 | it "matches pattern" do 51 | expect( event.get("message") ).to eql message 52 | expect( event.get("tags") ).to be nil 53 | expect( event.get("grok") ).to_not be nil 54 | expect( event.get("[grok][timestamp]") ).to eql "Mar 16 00:01:25" 55 | expect( event.get("[grok][message]") ).to eql "connect from camomile.cloud9.net[168.100.1.3]" 56 | expect( event.get(ecs_select[disabled: "[grok][pid]", v1: "[grok][process][pid]"]).to_s ).to eq "1713" 57 | 58 | end 59 | end 60 | 61 | context 'with [deep] target' do 62 | let(:config) { { "match" => { "message" => "%{SYSLOGLINE}" }, "target" => "[@metadata][grok]" } } 63 | 64 | it "matches pattern" do 65 | expect( event.get("message") ).to eql message 66 | expect( event.get("tags") ).to be nil 67 | expect( event.get("grok") ).to be nil 68 | expect( event.get ecs_select[disabled: "[@metadata][grok][logsource]", v1: "[@metadata][grok][host][hostname]"] ).to eq "evita" 69 | expect( event.get("[@metadata][grok][message]") ).to eql "connect from camomile.cloud9.net[168.100.1.3]" 70 | expect( event.get(ecs_select[disabled: "[@metadata][grok][pid]", v1: "[@metadata][grok][process][pid]"]).to_s ).to eq "1713" 71 | end 72 | end 73 | end 74 | 75 | describe "ietf 5424 syslog line" do 76 | let(:config) { { "match" => { "message" => "%{SYSLOG5424LINE}" } } } 77 | 78 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\"bar\"][id2 baz=\"something\"] Hello, syslog." do 79 | expect( event.get("tags") ).to be nil 80 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "191" 81 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 82 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2009-06-30T18:30:00+02:00" 83 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "paxton.local" 84 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 85 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq "4123" 86 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 87 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq "[id1 foo=\"bar\"][id2 baz=\"something\"]" 88 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Hello, syslog." 89 | end 90 | 91 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - [id1 foo=\"bar\"] No process ID." do 92 | expect( event.get("tags") ).to be nil 93 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "191" 94 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 95 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2009-06-30T18:30:00+02:00" 96 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "paxton.local" 97 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 98 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ) ).to eq nil 99 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 100 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq "[id1 foo=\"bar\"]" 101 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "No process ID." 102 | end 103 | 104 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - No structured data." do 105 | expect( event.get("tags") ).to be nil 106 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "191" 107 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 108 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2009-06-30T18:30:00+02:00" 109 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "paxton.local" 110 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 111 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq '4123' 112 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 113 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 114 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "No structured data." 115 | end 116 | 117 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug - - - No PID or SD." do 118 | expect( event.get("tags") ).to be nil 119 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "191" 120 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 121 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2009-06-30T18:30:00+02:00" 122 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "paxton.local" 123 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 124 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ) ).to eq nil 125 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 126 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 127 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "No PID or SD." 128 | end 129 | 130 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Missing structured data." do 131 | expect( event.get("tags") ).to be nil 132 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq '4123' 133 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 134 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 135 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Missing structured data." 136 | end 137 | 138 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - - Additional spaces." do 139 | expect( event.get("tags") ).to be nil 140 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 141 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq '4123' 142 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 143 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 144 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Additional spaces." 145 | end 146 | 147 | sample "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - Additional spaces and missing SD." do 148 | expect( event.get("tags") ).to be nil 149 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "grokdebug" 150 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq "4123" 151 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 152 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 153 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Additional spaces and missing SD." 154 | end 155 | 156 | sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 dnsmasq-dhcp 8048 - - Appname contains a dash" do 157 | expect( event.get("tags") ).to be nil 158 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "30" 159 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 160 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2014-04-04T16:44:07+02:00" 161 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "osctrl01" 162 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq "dnsmasq-dhcp" 163 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq "8048" 164 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 165 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 166 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Appname contains a dash" 167 | end 168 | 169 | sample "<30>1 2014-04-04T16:44:07+02:00 osctrl01 - 8048 - - Appname is nil" do 170 | expect( event.get("tags") ).to be nil 171 | expect( event.get(ecs_select[disabled: "[syslog5424_pri]", v1: "[log][syslog][priority]"]).to_s ).to eq "30" 172 | expect( event.get ecs_select[disabled: "[syslog5424_ver]", v1: "[system][syslog][version]"] ).to eq "1" 173 | expect( event.get ecs_select[disabled: "[syslog5424_ts]", v1: "[timestamp]"] ).to eq "2014-04-04T16:44:07+02:00" 174 | expect( event.get ecs_select[disabled: "[syslog5424_host]", v1: "[host][hostname]"] ).to eq "osctrl01" 175 | expect( event.get ecs_select[disabled: "[syslog5424_app]", v1: "[process][name]"] ).to eq nil 176 | expect( event.get(ecs_select[disabled: "[syslog5424_proc]", v1: "[process][pid]"] ).to_s ).to eq "8048" 177 | expect( event.get ecs_select[disabled: "[syslog5424_msgid]", v1: "[log][syslog][msgid]"] ).to eq nil 178 | expect( event.get ecs_select[disabled: "[syslog5424_sd]", v1: "[system][syslog][structured_data]"] ).to eq nil 179 | expect( event.get ecs_select[disabled: "[syslog5424_msg]", v1: "[message][1]"] ).to eq "Appname is nil" 180 | end 181 | end 182 | end 183 | end 184 | 185 | describe "non ecs" do 186 | before(:each) do 187 | subject.register 188 | subject.filter(event) 189 | end 190 | 191 | describe "parsing an event with multiple messages (array of strings)" do 192 | let(:config) { { "match" => { "message" => "(?:hello|world) %{NUMBER:num}" } } } 193 | let(:message) { [ "hello 12345", "world 23456" ] } 194 | 195 | it "matches them all" do 196 | expect( event.get("num") ).to eql [ "12345", "23456" ] 197 | end 198 | end 199 | 200 | describe "coercing matched values" do 201 | let(:config) { { "match" => { "message" => "%{NUMBER:foo:int} %{NUMBER:bar:float}" } } } 202 | let(:message) { '400 454.33' } 203 | 204 | it "coerces matched values" do 205 | expect( event.get("foo") ).to be_a Integer 206 | expect( event.get("foo") ).to eql 400 207 | expect( event.get("bar") ).to be_a Float 208 | expect( event.get("bar") ).to eql 454.33 209 | end 210 | end 211 | 212 | describe "in-line pattern definitions" do 213 | let(:config) { { "match" => { "message" => "%{FIZZLE=\\d+}" }, "named_captures_only" => false } } 214 | 215 | sample "hello 1234" do 216 | expect( event.get("FIZZLE") ).to eql '1234' 217 | end 218 | end 219 | 220 | describe "processing selected fields" do 221 | let(:config) { 222 | { 223 | 'match' => { "message" => "%{WORD:word}", "examplefield" => "%{NUMBER:num}" }, 224 | 'break_on_match' => false 225 | } 226 | } 227 | let(:data) { { "message" => "hello world", "examplefield" => "12345" } } 228 | 229 | it "processes declared matches" do 230 | expect( event.get("word") ).to eql 'hello' 231 | expect( event.get("num") ).to eql '12345' 232 | end 233 | end 234 | 235 | describe "adding fields on match" do 236 | let(:config) { 237 | { 238 | 'match' => { "message" => "matchme %{NUMBER:fancy}" }, 239 | 'add_field' => [ "new_field", "%{fancy}" ] 240 | } 241 | } 242 | 243 | sample "matchme 1234" do 244 | expect( event.get("tags") ).to be nil 245 | expect( event.get("new_field") ).to eql "1234" 246 | end 247 | 248 | sample "this will not be matched" do 249 | expect( event.get("tags") ).to include("_grokparsefailure") 250 | expect( event ).not_to include 'new_field' 251 | end 252 | end 253 | 254 | context "empty fields" do 255 | describe "drop by default" do 256 | let(:config) { 257 | { 258 | 'match' => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" } 259 | } 260 | } 261 | 262 | sample "1=test" do 263 | expect( event.get("tags") ).to be nil 264 | expect( event ).to include 'foo1' 265 | 266 | # Since 'foo2' was not captured, it must not be present in the event. 267 | expect( event ).not_to include 'foo2' 268 | end 269 | end 270 | 271 | describe "keep if keep_empty_captures is true" do 272 | let(:config) { 273 | { 274 | 'match' => { "message" => "1=%{WORD:foo1} *(2=%{WORD:foo2})?" }, 275 | 'keep_empty_captures' => true 276 | } 277 | } 278 | 279 | sample "1=test" do 280 | expect( event.get("tags") ).to be nil 281 | # use .to_hash for this test, for now, because right now 282 | # the Event.include? returns false for missing fields as well 283 | # as for fields with nil values. 284 | expect( event.to_hash ).to include 'foo1' 285 | expect( event.to_hash ).to include 'foo2' 286 | end 287 | end 288 | end 289 | 290 | describe "when named_captures_only == false" do 291 | let(:config) { 292 | { 293 | 'match' => { "message" => "Hello %{WORD}. %{WORD:foo}" }, 294 | 'named_captures_only' => false 295 | } 296 | } 297 | 298 | sample "Hello World, yo!" do 299 | expect( event ).to include 'WORD' 300 | expect( event.get("WORD") ).to eql "World" 301 | expect( event ).to include 'foo' 302 | expect( event.get("foo") ).to eql "yo" 303 | end 304 | end 305 | 306 | describe "using oniguruma named captures (?regex)" do 307 | context "plain regexp" do 308 | let(:config) { 309 | { 310 | 'match' => { "message" => "(?\\w+)" } 311 | } 312 | } 313 | 314 | sample "hello world" do 315 | expect( event.get("tags") ).to be nil 316 | expect( event.get("foo") ).to eql "hello" 317 | end 318 | end 319 | 320 | context "grok patterns" do 321 | let(:config) { 322 | { 323 | 'match' => { "message" => "(?%{DATE_EU} %{TIME})" } 324 | } 325 | } 326 | 327 | sample "fancy 12-12-12 12:12:12" do 328 | expect( event.get("tags") ).to be nil 329 | expect( event.get("timestamp") ).to eql "12-12-12 12:12:12" 330 | end 331 | end 332 | end 333 | 334 | describe "grok on integer types" do 335 | let(:config) { 336 | { 337 | 'match' => { "status" => "^403$" }, 'add_tag' => "four_oh_three" 338 | } 339 | } 340 | let(:data) { Hash({ "status" => 403 }) } 341 | 342 | it "parses" do 343 | expect( event.get("tags") ).not_to include "_grokparsefailure" 344 | expect( event.get("tags") ).to include "four_oh_three" 345 | end 346 | end 347 | 348 | describe "grok on float types" do 349 | let(:config) { 350 | { 351 | 'match' => { "version" => "^1.0$" }, 'add_tag' => "one_point_oh" 352 | } 353 | } 354 | let(:data) { Hash({ "version" => 1.0 }) } 355 | 356 | it "parses" do 357 | expect( event.get("tags") ).not_to include "_grokparsefailure" 358 | expect( event.get("tags") ).to include "one_point_oh" 359 | end 360 | end 361 | 362 | describe "grok on %{LOGLEVEL}" do 363 | let(:config) { 364 | { 365 | 'match' => { "message" => "%{LOGLEVEL:level}: error!" } 366 | } 367 | } 368 | 369 | log_level_names = %w( 370 | trace Trace TRACE 371 | debug Debug DEBUG 372 | notice Notice Notice 373 | info Info INFO 374 | warn warning Warn Warning WARN WARNING 375 | err error Err Error ERR ERROR 376 | crit critical Crit Critical CRIT CRITICAL 377 | fatal Fatal FATAL 378 | severe Severe SEVERE 379 | emerg emergency Emerg Emergency EMERG EMERGENCY 380 | ) 381 | log_level_names.each do |level_name| 382 | sample "#{level_name}: error!" do 383 | expect( event.get("level") ).to eql level_name 384 | end 385 | end 386 | end 387 | 388 | describe "timeout on failure" do 389 | let(:config) { 390 | { 391 | 'match' => { "message" => "(.*a){30}" }, 392 | 'timeout_millis' => 100 393 | } 394 | } 395 | 396 | sample "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" do 397 | expect( event.get("tags") ).to include("_groktimeout") 398 | expect( event.get("tags") ).not_to include("_grokparsefailure") 399 | end 400 | end 401 | 402 | describe "no timeout on failure with multiple patterns (when timeout not grouped)" do 403 | let(:config) { 404 | { 405 | 'match' => { 406 | "message" => [ 407 | "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}", 408 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 409 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 410 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 411 | "(.*a){20}" 412 | ] 413 | }, 414 | 'timeout_millis' => 750, 415 | 'timeout_scope' => 'pattern' 416 | } 417 | } 418 | 419 | sample( 'b' * 15 + 'c' * 15 + 'd' * 15 + 'e' * 15 + ' ' + 'a' * 20 ) do 420 | expect( event.get("tags") ).to be nil 421 | end 422 | end 423 | 424 | describe "timeout on grouped (multi-pattern) failure" do 425 | let(:config) { 426 | { 427 | 'match' => { 428 | "message" => [ 429 | "(.*f){20}", "(.*e){20}", "(.*d){20}", "(.*c){20}", "(.*b){20}", 430 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 431 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 432 | "(.*a){25}", "(.*a){24}", "(.*a){23}", "(.*a){22}", "(.*a){21}", 433 | "(.*a){20}" 434 | ] 435 | }, 436 | 'timeout_millis' => 750, 437 | 'timeout_scope' => 'event' 438 | } 439 | } 440 | 441 | sample( 'b' * 15 + 'c' * 15 + 'd' * 15 + 'e' * 15 + ' ' + 'a' * 20 ) do 442 | expect( event.get("tags") ).to include("_groktimeout") 443 | expect( event.get("tags") ).not_to include("_grokparsefailure") 444 | end 445 | end 446 | 447 | describe "tagging on failure" do 448 | let(:config) { 449 | { 450 | 'match' => { "message" => "matchme %{NUMBER:fancy}" }, 451 | 'tag_on_failure' => 'not_a_match' 452 | } 453 | } 454 | 455 | sample "matchme 1234" do 456 | expect( event.get("tags") ).to be nil 457 | end 458 | 459 | sample "this will not be matched" do 460 | expect( event.get("tags") ).to include("not_a_match") 461 | end 462 | end 463 | 464 | describe "captures named fields even if the whole text matches" do 465 | let(:config) { 466 | { 467 | 'match' => { "message" => "%{DATE_EU:stimestamp}" } 468 | } 469 | } 470 | 471 | sample "11/01/01" do 472 | expect( event.get("stimestamp") ).to eql "11/01/01" 473 | end 474 | end 475 | 476 | describe "allow dashes in capture names" do 477 | let(:config) { 478 | { 479 | 'match' => { "message" => "%{WORD:foo-bar}" } 480 | } 481 | } 482 | 483 | sample "hello world" do 484 | expect( event.get("foo-bar") ).to eql "hello" 485 | end 486 | end 487 | 488 | describe "single value match with duplicate-named fields in pattern" do 489 | let(:config) { 490 | { 491 | 'match' => { "message" => "%{INT:foo}|%{WORD:foo}" } 492 | } 493 | } 494 | 495 | sample "hello world" do 496 | expect( event.get("foo") ).to be_a(String) 497 | end 498 | 499 | sample "123 world" do 500 | expect( event.get("foo") ).to be_a(String) 501 | end 502 | end 503 | 504 | 505 | describe "break_on_match default should be true" do 506 | let(:config) { 507 | { 508 | 'match' => { "message" => "%{INT:foo}", "somefield" => "%{INT:bar}" } 509 | } 510 | } 511 | let(:data) { Hash("message" => "hello world 123", "somefield" => "testme abc 999") } 512 | 513 | it 'exits filter after first match' do 514 | expect( event.get("foo") ).to eql '123' 515 | expect( event.get("bar") ).to be nil 516 | end 517 | end 518 | 519 | describe "break_on_match when set to false" do 520 | let(:config) { 521 | { 522 | 'match' => { "message" => "%{INT:foo}", "somefield" => "%{INT:bar}" }, 523 | 'break_on_match' => false 524 | } 525 | } 526 | let(:data) { Hash("message" => "hello world 123", "somefield" => "testme abc 999") } 527 | 528 | it 'should try all patterns' do 529 | expect( event.get("foo") ).to eql '123' 530 | expect( event.get("bar") ).to eql '999' 531 | end 532 | end 533 | 534 | context "break_on_match default for array input with single grok pattern" do 535 | let(:config) { 536 | { 537 | 'match' => { "message" => "%{INT:foo}" }, 538 | 'break_on_match' => false 539 | } 540 | } 541 | 542 | describe 'fully matching input' do 543 | let(:data) { Hash("message" => ["hello world 123", "line 23"]) } # array input -- 544 | it 'matches' do 545 | expect( event.get("foo") ).to eql ["123", "23"] 546 | expect( event.get("tags") ).to be nil 547 | end 548 | end 549 | 550 | describe 'partially matching input' do 551 | let(:data) { Hash("message" => ["hello world 123", "abc"]) } # array input, one of them matches 552 | it 'matches' do 553 | expect( event.get("foo") ).to eql "123" 554 | expect( event.get("tags") ).to be nil 555 | end 556 | end 557 | end 558 | 559 | describe "break_on_match = true (default) for array input with multiple grok pattern" do 560 | let(:config) { 561 | { 562 | 'match' => { "message" => ["%{INT:foo}", "%{WORD:bar}"] } 563 | } 564 | } 565 | 566 | describe 'matching input' do 567 | let(:data) { Hash("message" => ["hello world 123", "line 23"]) } # array input -- 568 | it 'matches' do 569 | expect( event.get("foo") ).to eql ["123", "23"] 570 | expect( event.get("bar") ).to be nil 571 | expect( event.get("tags") ).to be nil 572 | end 573 | end 574 | 575 | describe 'partially matching input' do 576 | let(:data) { Hash("message" => ["hello world", "line 23"]) } # array input, one of them matches 577 | it 'matches' do 578 | expect( event.get("bar") ).to eql 'hello' 579 | expect( event.get("foo") ).to eql "23" 580 | expect( event.get("tags") ).to be nil 581 | end 582 | end 583 | end 584 | 585 | describe "break_on_match = false for array input with multiple grok pattern" do 586 | let(:config) { 587 | { 588 | 'match' => { "message" => ["%{INT:foo}", "%{WORD:bar}"] }, 589 | 'break_on_match' => false 590 | } 591 | } 592 | 593 | describe 'fully matching input' do 594 | let(:data) { Hash("message" => ["hello world 123", "line 23"]) } # array input -- 595 | it 'matches' do 596 | expect( event.get("foo") ).to eql ["123", "23"] 597 | expect( event.get("bar") ).to eql ["hello", "line"] 598 | expect( event.get("tags") ).to be nil 599 | end 600 | end 601 | 602 | describe 'partially matching input' do 603 | let(:data) { Hash("message" => ["hello world", "line 23"]) } # array input, one of them matches 604 | it 'matches' do 605 | expect( event.get("bar") ).to eql ["hello", "line"] 606 | expect( event.get("foo") ).to eql "23" 607 | expect( event.get("tags") ).to be nil 608 | end 609 | end 610 | end 611 | 612 | describe "grok with unicode" do 613 | let(:config) { 614 | { 615 | #'match' => { "message" => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" } 616 | 'match' => { "message" => "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) %{GREEDYDATA:syslog_message}" } 617 | } 618 | } 619 | 620 | sample "<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: email@domain.no" do 621 | expect( event.get("tags") ).to be nil 622 | expect( event.get("syslog_pri") ).to eql "22" 623 | expect( event.get("syslog_program") ).to eql "postfix/policy-spf" 624 | end 625 | end 626 | 627 | describe "grok with nil coerced value" do 628 | let(:config) { 629 | { 630 | 'match' => { "message" => "test (N/A|%{BASE10NUM:duration:float}ms)" } 631 | } 632 | } 633 | 634 | sample "test 28.4ms" do 635 | expect( event.get("duration") ).to eql 28.4 636 | expect( event.get("tags") ).to be nil 637 | end 638 | 639 | sample "test N/A" do 640 | expect( event.to_hash ).not_to include("duration") 641 | expect( event.get("tags") ).to be nil 642 | end 643 | 644 | sample "test abc" do 645 | expect( event.get("duration") ).to be nil 646 | expect( event.get("tags") ).to eql ["_grokparsefailure"] 647 | end 648 | end 649 | 650 | describe "grok with nil coerced value and keep_empty_captures" do 651 | let(:config) { 652 | { 653 | 'match' => { "message" => "test (N/A|%{BASE10NUM:duration:float}ms)" }, 654 | 'keep_empty_captures' => true 655 | } 656 | } 657 | 658 | sample "test N/A" do 659 | expect( event.to_hash ).to include("duration") 660 | expect( event.get("tags") ).to be nil 661 | end 662 | end 663 | 664 | describe "grok with no coercion" do 665 | let(:config) { 666 | { 667 | 'match' => { "message" => "test (N/A|%{BASE10NUM:duration}ms)" }, 668 | } 669 | } 670 | 671 | sample "test 28.4ms" do 672 | expect( event.get("duration") ).to eql '28.4' 673 | expect( event.get("tags") ).to be nil 674 | end 675 | 676 | sample "test N/A" do 677 | expect( event.get("duration") ).to be nil 678 | expect( event.get("tags") ).to be nil 679 | end 680 | end 681 | 682 | describe "opening/closing" do 683 | let(:config) { { "match" => {"message" => "A"} } } 684 | let(:message) { 'AAA' } 685 | 686 | it "should close cleanly" do 687 | expect { subject.do_close }.not_to raise_error 688 | end 689 | end 690 | 691 | describe "after grok when the event is JSON serialised the field values are unchanged" do 692 | let(:config) { 693 | { 694 | 'match' => ["message", "Failed password for (invalid user |)%{USERNAME:username} from %{IP:src_ip} port %{BASE10NUM:port}"], 695 | 'remove_field' => ["message","severity"], 696 | 'add_tag' => ["ssh_failure"] 697 | } 698 | } 699 | 700 | sample('{"facility":"auth","message":"Failed password for testuser from 1.1.1.1 port 22"}') do 701 | expect( event.get("username") ).to eql "testuser" 702 | expect( event.get("port") ).to eql "22" 703 | expect( event.get("src_ip") ).to eql "1.1.1.1" 704 | expect( LogStash::Json.dump(event.get('username')) ).to eql "\"testuser\"" 705 | 706 | expect( event.to_json ).to match %r|"src_ip":"1.1.1.1"| 707 | expect( event.to_json ).to match %r|"@timestamp":"#{Regexp.escape(event.get('@timestamp').to_s)}"| 708 | expect( event.to_json ).to match %r|"port":"22"| 709 | expect( event.to_json ).to match %r|"@version":"1"| 710 | expect( event.to_json ).to match %r|"username"|i 711 | expect( event.to_json ).to match %r|"testuser"| 712 | expect( event.to_json ).to match %r|"tags":\["ssh_failure"\]| 713 | end 714 | end 715 | 716 | describe "grok with inline pattern definition successfully extracts fields" do 717 | let(:config) { 718 | { 719 | 'match' => { "message" => "%{APACHE_TIME:timestamp} %{LOGLEVEL:level} %{MY_PATTERN:hindsight}" }, 720 | 'pattern_definitions' => { 721 | "APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}", 722 | "MY_PATTERN" => "%{YEAR}" 723 | } 724 | } 725 | } 726 | 727 | sample "Mon Dec 26 16:22:08 2016 error 2020" do 728 | expect( event.get("timestamp") ).to eql "Mon Dec 26 16:22:08 2016" 729 | expect( event.get("level") ).to eql "error" 730 | expect( event.get("hindsight") ).to eql "2020" 731 | end 732 | end 733 | 734 | describe "grok with inline pattern definition overwrites existing pattern definition" do 735 | let(:config) { 736 | { 737 | 'match' => { "message" => "%{APACHE_TIME:timestamp} %{LOGLEVEL:level}" }, 738 | # loglevel was previously ([Aa]lert|ALERT|[Tt]... 739 | 'pattern_definitions' => { 740 | "APACHE_TIME" => "%{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR}", 741 | "LOGLEVEL" => "%{NUMBER}" 742 | } 743 | } 744 | } 745 | 746 | sample "Mon Dec 26 16:22:08 2016 9999" do 747 | expect( event.get("timestamp") ).to eql "Mon Dec 26 16:22:08 2016" 748 | expect( event.get("level") ).to eql "9999" 749 | end 750 | end 751 | 752 | context 'when timeouts are explicitly disabled' do 753 | let(:config) do 754 | { 755 | "timeout_millis" => 0 756 | } 757 | end 758 | 759 | context 'when given a pathological input', slow: true do 760 | let(:message) { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"} 761 | let(:config) { super().merge("match" => { "message" => "(.*a){30}" }) } 762 | 763 | it 'blocks for at least 3 seconds' do 764 | blocking_exception_class = Class.new(::Exception) # avoid RuntimeError 765 | expect do 766 | Timeout.timeout(3, blocking_exception_class) do 767 | subject.filter(event) 768 | end 769 | end.to raise_exception(blocking_exception_class) 770 | end 771 | end 772 | end 773 | end 774 | 775 | end 776 | 777 | describe LogStash::Filters::Grok do 778 | 779 | subject(:grok_filter) { described_class.new(config) } 780 | let(:config) { {} } 781 | 782 | context 'when initialized with `ecs_compatibility => v8`' do 783 | let(:config) { super().merge("ecs_compatibility" => "v8", "match" => ["message", "%{SYSLOGLINE}"]) } 784 | context '#register' do 785 | let(:logger_stub) { double('Logger').as_null_object } 786 | before(:each) { allow_any_instance_of(described_class).to receive(:logger).and_return(logger_stub)} 787 | 788 | it 'logs a helpful warning about the unreleased v8' do 789 | grok_filter.register 790 | 791 | expect(logger_stub).to have_received(:warn).with(a_string_including "preview of the unreleased ECS v8") 792 | end 793 | end 794 | end 795 | end 796 | 797 | describe LogStash::Filters::Grok do 798 | describe "(LEGACY)" do 799 | describe "patterns in the 'patterns/' dir override core patterns" do 800 | 801 | let(:pattern_dir) { File.join(LogStash::Environment::LOGSTASH_HOME, "patterns") } 802 | let(:has_pattern_dir?) { Dir.exist?(pattern_dir) } 803 | 804 | before do 805 | FileUtils.mkdir(pattern_dir) unless has_pattern_dir? 806 | @file = File.new(File.join(pattern_dir, 'grok.pattern'), 'w+') 807 | @file.write('WORD \b[2-5]\b') 808 | @file.close 809 | end 810 | 811 | let(:config) do 812 | 'filter { grok { match => { "message" => "%{WORD:word}" } } }' 813 | end 814 | 815 | sample({"message" => 'hello'}) do 816 | expect(subject.get("tags")).to eql ["_grokparsefailure"] 817 | end 818 | 819 | after do 820 | File.unlink @file 821 | FileUtils.rm_rf(pattern_dir) if has_pattern_dir? 822 | end 823 | end 824 | 825 | describe "patterns in custom dir override those in 'patterns/' dir" do 826 | 827 | let(:tmpdir) { Stud::Temporary.directory } 828 | let(:pattern_dir) { File.join(LogStash::Environment::LOGSTASH_HOME, "patterns") } 829 | let(:has_pattern_dir?) { Dir.exist?(pattern_dir) } 830 | 831 | before do 832 | FileUtils.mkdir(pattern_dir) unless has_pattern_dir? 833 | @file1 = File.new(File.join(pattern_dir, 'grok.pattern'), 'w+') 834 | @file1.write('WORD \b[2-5]\b') 835 | @file1.close 836 | @file2 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') 837 | @file2.write('WORD \b[0-1]\b') 838 | @file2.close 839 | end 840 | 841 | let(:config) do 842 | "filter { grok { patterns_dir => \"#{tmpdir}\" match => { \"message\" => \"%{WORD:word}\" } } }" 843 | end 844 | 845 | sample({"message" => '0'}) do 846 | expect(subject.get("tags")).to be nil 847 | end 848 | 849 | after do 850 | File.unlink @file1 851 | File.unlink @file2 852 | FileUtils.remove_entry tmpdir 853 | FileUtils.rm_rf(pattern_dir) unless has_pattern_dir? 854 | end 855 | end 856 | 857 | describe "patterns with file glob" do 858 | 859 | let(:tmpdir) { Stud::Temporary.directory } 860 | 861 | before do 862 | @file3 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') 863 | @file3.write('WORD \b[0-1]\b') 864 | @file3.close 865 | @file4 = File.new(File.join(tmpdir, 'grok.pattern.old'), 'w+') 866 | @file4.write('WORD \b[2-5]\b') 867 | @file4.close 868 | end 869 | 870 | let(:config) do 871 | "filter { grok { patterns_dir => \"#{tmpdir}\" patterns_files_glob => \"*.pattern\" match => { \"message\" => \"%{WORD:word}\" } } }" 872 | end 873 | 874 | sample({"message" => '0'}) do 875 | expect(subject.get("tags")).to be nil 876 | end 877 | 878 | after do 879 | File.unlink @file3 880 | File.unlink @file4 881 | FileUtils.remove_entry tmpdir 882 | end 883 | end 884 | 885 | describe "patterns with file glob on directory that contains subdirectories" do 886 | 887 | let(:tmpdir) { Stud::Temporary.directory } 888 | 889 | before do 890 | @file3 = File.new(File.join(tmpdir, 'grok.pattern'), 'w+') 891 | @file3.write('WORD \b[0-1]\b') 892 | @file3.close 893 | Dir.mkdir(File.join(tmpdir, "subdir")) 894 | end 895 | 896 | let(:config) do 897 | "filter { grok { patterns_dir => \"#{tmpdir}\" patterns_files_glob => \"*\" match => { \"message\" => \"%{WORD:word}\" } } }" 898 | end 899 | 900 | sample({"message" => '0'}) do 901 | expect(subject.get("tags")).to be nil 902 | end 903 | 904 | after do 905 | File.unlink @file3 906 | FileUtils.remove_entry tmpdir 907 | end 908 | end 909 | 910 | describe "LOGSTASH-1547 - break_on_match should work on fields with multiple patterns" do 911 | config <<-CONFIG 912 | filter { 913 | grok { 914 | match => { "message" => ["%{GREEDYDATA:name1}beard", "tree%{GREEDYDATA:name2}"] } 915 | break_on_match => false 916 | } 917 | } 918 | CONFIG 919 | 920 | sample "treebranch" do 921 | expect(subject.get("name2")).to eql "branch" 922 | end 923 | 924 | sample "bushbeard" do 925 | expect(subject.get("name1")).to eql "bush" 926 | end 927 | 928 | sample "treebeard" do 929 | expect(subject.get("name1")).to eql "tree" 930 | expect(subject.get("name2")).to eql "beard" 931 | end 932 | end 933 | end 934 | end 935 | --------------------------------------------------------------------------------