├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── README.rst ├── pygrok ├── __init__.py ├── patterns │ ├── aws │ ├── bacula │ ├── bro │ ├── exim │ ├── firewalls │ ├── grok-patterns │ ├── haproxy │ ├── java │ ├── junos │ ├── linux-syslog │ ├── mcollective │ ├── mcollective-patterns │ ├── mongodb │ ├── nagios │ ├── postgresql │ ├── rails │ ├── redis │ └── ruby └── pygrok.py ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── test_patterns └── pats └── test_pygrok.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # Environments 56 | .env 57 | .venv 58 | env/ 59 | venv/ 60 | ENV/ 61 | env.bak/ 62 | venv.bak/ 63 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '2.6' 4 | - '2.7' 5 | - '3.2' 6 | - '3.3' 7 | - '3.4' 8 | - '3.5' 9 | sudo: false 10 | # running in the container-based infrastructure without access to sudo 11 | install: pip install -r requirements.txt 12 | deploy: 13 | provider: pypi 14 | user: garyelephant 15 | # automatically deploy a new version of this package each time you add a new tag on master 16 | on: 17 | tags: true 18 | all_branches: true 19 | password: 20 | secure: FTrWF35Rkog2UVN5tUz+I5cs1Sni9m8ctX+GGLWOjYoRzxgrzYb70AOyNAQCmI6ZD67gNyYQ3O8o2ukx2/YxxGfMaK6BKvGg7LXeLLafJKXstiM8Kb0dtBe+lNS4Ct0KYRXpGIqMz2Ak/Gviagx/S5jXgSOv66VzIzj9n1bDp2U= 21 | script: 22 | - python setup.py install 23 | - python tests/test_pygrok.py 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | include tests/* 4 | include pygrok/patterns/* 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pygrok [![Build Status](https://travis-ci.org/garyelephant/pygrok.svg?branch=master)](https://travis-ci.org/garyelephant/pygrok) 2 | ====== 3 | 4 | [![Join the chat at https://gitter.im/garyelephant/pygrok](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/garyelephant/pygrok?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 5 | 6 | A Python library to parse strings and extract information from structured/unstructured data 7 | 8 | What can I use Grok for? 9 | ------------------------ 10 | * parsing and matching patterns in a string(log, message etc.) 11 | * relieving from complex regular expressions. 12 | * extracting information from structured/unstructured data 13 | 14 | Installation 15 | ------------ 16 | 17 | ```Bash 18 | $ pip install pygrok 19 | ``` 20 | 21 | or download, uncompress and install pygrok from [here](https://github.com/garyelephant/pygrok/releases/latest): 22 | 23 | ```Bash 24 | $ tar zxvf pygrok-xx.tar.gz 25 | $ cd pygrok_dir 26 | $ sudo python setup.py install 27 | ``` 28 | 29 | Getting Started 30 | --------------- 31 | ```Python 32 | from pygrok import Grok 33 | text = 'gary is male, 25 years old and weighs 68.5 kilograms' 34 | pattern = '%{WORD:name} is %{WORD:gender}, %{NUMBER:age} years old and weighs %{NUMBER:weight} kilograms' 35 | grok = Grok(pattern) 36 | print grok.match(text) 37 | 38 | # {'gender': 'male', 'age': '25', 'name': 'gary', 'weight': '68.5'} 39 | ``` 40 | 41 | Pretty Cool ! 42 | 43 | Numbers can be converted from string to `int` or `float` if you use `%{pattern:name:type}` syntax, such as `%{NUMBER:age:int}` 44 | ```Python 45 | from pygrok import Grok 46 | text = 'gary is male, 25 years old and weighs 68.5 kilograms' 47 | pattern = '%{WORD:name} is %{WORD:gender}, %{NUMBER:age:int} years old and weighs %{NUMBER:weight:float} kilograms' 48 | grok = Grok(pattern) 49 | print grok.match(text) 50 | 51 | # {'gender': 'male', 'age': 25, 'name': 'gary', 'weight': 68.5} 52 | ``` 53 | Now `age` is of type `int` and `weight` is of type `float`. 54 | 55 | Awesome ! 56 | 57 | Some of the pattern you can use are listed here: 58 | ``` 59 | `WORD` means \b\w+\b in regular expression. 60 | `NUMBER` means (?:%{BASE10NUM}) 61 | `BASE10NUM` means (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 62 | 63 | other patterns such as `IP`, `HOSTNAME`, `URIPATH`, `DATE`, `TIMESTAMP_ISO8601`, `COMMONAPACHELOG`.. 64 | ``` 65 | See All patterns [here](./pygrok/patterns) 66 | 67 | You can also have custom pattern, see [these codes](https://github.com/garyelephant/pygrok/blob/master/tests/test_pygrok.py#L97). 68 | 69 | 70 | More details 71 | ------------ 72 | Beause python re module does not support regular expression syntax atomic grouping(?>),so pygrok requires [regex](https://pypi.python.org/pypi/regex/2014.06.28) to be installed. 73 | 74 | pygrok is inspired by [Grok](https://github.com/jordansissel/grok) developed by Jordan Sissel. 75 | This is not a wrapper of Jordan Sissel's Grok and totally implemented by me. 76 | 77 | Grok is a simple software that allows you to easily parse strings, logs and other files. With grok, you can turn unstructured log and event data into structured data.Pygrok does the same thing. 78 | 79 | I recommend you to have a look at [logstash filter grok](https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html), it explains how Grok-like thing work. 80 | 81 | pattern files come from [logstash filter grok's pattern files](https://github.com/logstash-plugins/logstash-patterns-core/tree/master/patterns) 82 | 83 | Contribute 84 | --- 85 | * You are encouraged to [fork](https://github.com/garyelephant/pygrok/fork), improve the code, then make a pull request. 86 | * [Issue tracker](https://github.com/garyelephant/pygrok/issues) 87 | 88 | Get Help 89 | --- 90 | mail:garygaowork@gmail.com 91 | twitter:@garyelephant 92 | 93 | Contributors 94 | --- 95 | Thanks to [all contributors](https://github.com/garyelephant/pygrok/graphs/contributors) 96 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pygrok |Build Status| 2 | ===================== 3 | 4 | |Join the chat at https://gitter.im/garyelephant/pygrok| 5 | 6 | A Python library to parse strings and extract information from 7 | structured/unstructured data 8 | 9 | What can I use Grok for? 10 | ------------------------ 11 | 12 | - parsing and matching patterns in a string(log, message etc.) 13 | - relieving from complex regular expressions. 14 | - extracting information from structured/unstructured data 15 | 16 | Installation 17 | ------------ 18 | 19 | .. code:: Bash 20 | 21 | $ pip install pygrok 22 | 23 | or download, uncompress and install pygrok from 24 | `here `__: 25 | 26 | .. code:: Bash 27 | 28 | $ tar zxvf pygrok-xx.tar.gz 29 | $ cd pygrok_dir 30 | $ sudo python setup.py install 31 | 32 | Getting Started 33 | --------------- 34 | 35 | .. code:: Python 36 | 37 | from pygrok import Grok 38 | text = 'gary is male, 25 years old and weighs 68.5 kilograms' 39 | pattern = '%{WORD:name} is %{WORD:gender}, %{NUMBER:age} years old and weighs %{NUMBER:weight} kilograms' 40 | grok = Grok(pattern) 41 | print grok.match(text) 42 | 43 | # {'gender': 'male', 'age': '25', 'name': 'gary', 'weight': '68.5'} 44 | 45 | Pretty Cool ! 46 | 47 | Numbers can be converted from string to ``int`` or ``float`` if you use 48 | ``%{pattern:name:type}`` syntax, such as ``%{NUMBER:age:int}`` 49 | 50 | .. code:: Python 51 | 52 | from pygrok import Grok 53 | text = 'gary is male, 25 years old and weighs 68.5 kilograms' 54 | pattern = '%{WORD:name} is %{WORD:gender}, %{NUMBER:age:int} years old and weighs %{NUMBER:weight:float} kilograms' 55 | grok = Grok(pattern) 56 | print grok.match(text, pattern) 57 | 58 | # {'gender': 'male', 'age': 25, 'name': 'gary', 'weight': 68.5} 59 | 60 | Now ``age`` is of type ``int`` and ``weight`` is of type ``float``. 61 | 62 | Awesome ! 63 | 64 | Some of the pattern you can use are listed here: 65 | 66 | :: 67 | 68 | `WORD` means \b\w+\b in regular expression. 69 | `NUMBER` means (?:%{BASE10NUM}) 70 | `BASE10NUM` means (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 71 | 72 | other patterns such as `IP`, `HOSTNAME`, `URIPATH`, `DATE`, `TIMESTAMP_ISO8601`, `COMMONAPACHELOG`.. 73 | 74 | See All patterns `here <./pygrok/patterns>`__ 75 | 76 | You can also have custom pattern, see `these 77 | codes `__. 78 | 79 | More details 80 | ------------ 81 | 82 | Beause python re module does not support regular expression syntax 83 | atomic grouping(?>),so pygrok requires 84 | `regex `__ to be 85 | installed. 86 | 87 | pygrok is inspired by `Grok `__ 88 | developed by Jordan Sissel. This is not a wrapper of Jordan Sissel's 89 | Grok and totally implemented by me. 90 | 91 | Grok is a simple software that allows you to easily parse strings, logs 92 | and other files. With grok, you can turn unstructured log and event data 93 | into structured data.Pygrok does the same thing. 94 | 95 | I recommend you to have a look at `logstash filter 96 | grok `__, 97 | it explains how Grok-like thing work. 98 | 99 | pattern files come from `logstash filter grok's pattern 100 | files `__ 101 | 102 | Contribute 103 | ---------- 104 | 105 | - You are encouraged to 106 | `fork `__, improve the 107 | code, then make a pull request. 108 | - `Issue tracker `__ 109 | 110 | Get Help 111 | -------- 112 | 113 | :: 114 | 115 | mail:garygaowork@gmail.com 116 | twitter:@garyelephant 117 | 118 | Contributors 119 | ------------ 120 | 121 | Thanks to `all 122 | contributors `__ 123 | 124 | .. |Build Status| image:: https://travis-ci.org/garyelephant/pygrok.svg?branch=master 125 | :target: https://travis-ci.org/garyelephant/pygrok 126 | .. |Join the chat at https://gitter.im/garyelephant/pygrok| image:: https://badges.gitter.im/Join%20Chat.svg 127 | :target: https://gitter.im/garyelephant/pygrok?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge 128 | -------------------------------------------------------------------------------- /pygrok/__init__.py: -------------------------------------------------------------------------------- 1 | from .pygrok import * 2 | -------------------------------------------------------------------------------- /pygrok/patterns/aws: -------------------------------------------------------------------------------- 1 | S3_REQUEST_LINE (?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) 2 | 3 | S3_ACCESS_LOG %{WORD:owner} %{NOTSPACE:bucket} \[%{HTTPDATE:timestamp}\] %{IP:clientip} %{NOTSPACE:requester} %{NOTSPACE:request_id} %{NOTSPACE:operation} %{NOTSPACE:key} (?:"%{S3_REQUEST_LINE}"|-) (?:%{INT:response:int}|-) (?:-|%{NOTSPACE:error_code}) (?:%{INT:bytes:int}|-) (?:%{INT:object_size:int}|-) (?:%{INT:request_time_ms:int}|-) (?:%{INT:turnaround_time_ms:int}|-) (?:%{QS:referrer}|-) (?:"?%{QS:agent}"?|-) (?:-|%{NOTSPACE:version_id}) 4 | 5 | ELB_URIPATHPARAM %{URIPATH:path}(?:%{URIPARAM:params})? 6 | 7 | ELB_URI %{URIPROTO:proto}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST:urihost})?(?:%{ELB_URIPATHPARAM})? 8 | 9 | ELB_REQUEST_LINE (?:%{WORD:verb} %{ELB_URI:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest}) 10 | 11 | ELB_ACCESS_LOG %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:elb} %{IP:clientip}:%{INT:clientport:int} (?:(%{IP:backendip}:?:%{INT:backendport:int})|-) %{NUMBER:request_processing_time:float} %{NUMBER:backend_processing_time:float} %{NUMBER:response_processing_time:float} %{INT:response:int} %{INT:backend_response:int} %{INT:received_bytes:int} %{INT:bytes:int} "%{ELB_REQUEST_LINE}" 12 | -------------------------------------------------------------------------------- /pygrok/patterns/bacula: -------------------------------------------------------------------------------- 1 | BACULA_TIMESTAMP %{MONTHDAY}-%{MONTH} %{HOUR}:%{MINUTE} 2 | BACULA_HOST [a-zA-Z0-9-]+ 3 | BACULA_VOLUME %{USER} 4 | BACULA_DEVICE %{USER} 5 | BACULA_DEVICEPATH %{UNIXPATH} 6 | BACULA_CAPACITY %{INT}{1,3}(,%{INT}{3})* 7 | BACULA_VERSION %{USER} 8 | BACULA_JOB %{USER} 9 | 10 | BACULA_LOG_MAX_CAPACITY User defined maximum volume capacity %{BACULA_CAPACITY} exceeded on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) 11 | BACULA_LOG_END_VOLUME End of medium on Volume \"%{BACULA_VOLUME:volume}\" Bytes=%{BACULA_CAPACITY} Blocks=%{BACULA_CAPACITY} at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. 12 | BACULA_LOG_NEW_VOLUME Created new Volume \"%{BACULA_VOLUME:volume}\" in catalog. 13 | BACULA_LOG_NEW_LABEL Labeled new Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\). 14 | BACULA_LOG_WROTE_LABEL Wrote label to prelabeled Volume \"%{BACULA_VOLUME:volume}\" on device \"%{BACULA_DEVICE}\" \(%{BACULA_DEVICEPATH}\) 15 | BACULA_LOG_NEW_MOUNT New volume \"%{BACULA_VOLUME:volume}\" mounted on device \"%{BACULA_DEVICE:device}\" \(%{BACULA_DEVICEPATH}\) at %{MONTHDAY}-%{MONTH}-%{YEAR} %{HOUR}:%{MINUTE}. 16 | BACULA_LOG_NOOPEN \s+Cannot open %{DATA}: ERR=%{GREEDYDATA:berror} 17 | BACULA_LOG_NOOPENDIR \s+Could not open directory %{DATA}: ERR=%{GREEDYDATA:berror} 18 | BACULA_LOG_NOSTAT \s+Could not stat %{DATA}: ERR=%{GREEDYDATA:berror} 19 | BACULA_LOG_NOJOBS There are no more Jobs associated with Volume \"%{BACULA_VOLUME:volume}\". Marking it purged. 20 | BACULA_LOG_ALL_RECORDS_PRUNED All records pruned from Volume \"%{BACULA_VOLUME:volume}\"; marking it \"Purged\" 21 | BACULA_LOG_BEGIN_PRUNE_JOBS Begin pruning Jobs older than %{INT} month %{INT} days . 22 | BACULA_LOG_BEGIN_PRUNE_FILES Begin pruning Files. 23 | BACULA_LOG_PRUNED_JOBS Pruned %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. 24 | BACULA_LOG_PRUNED_FILES Pruned Files from %{INT} Jobs* for client %{BACULA_HOST:client} from catalog. 25 | BACULA_LOG_ENDPRUNE End auto prune. 26 | BACULA_LOG_STARTJOB Start Backup JobId %{INT}, Job=%{BACULA_JOB:job} 27 | BACULA_LOG_STARTRESTORE Start Restore Job %{BACULA_JOB:job} 28 | BACULA_LOG_USEDEVICE Using Device \"%{BACULA_DEVICE:device}\" 29 | BACULA_LOG_DIFF_FS \s+%{UNIXPATH} is a different filesystem. Will not descend from %{UNIXPATH} into it. 30 | BACULA_LOG_JOBEND Job write elapsed time = %{DATA:elapsed}, Transfer rate = %{NUMBER} (K|M|G)? Bytes/second 31 | BACULA_LOG_NOPRUNE_JOBS No Jobs found to prune. 32 | BACULA_LOG_NOPRUNE_FILES No Files found to prune. 33 | BACULA_LOG_VOLUME_PREVWRITTEN Volume \"%{BACULA_VOLUME:volume}\" previously written, moving to end of data. 34 | BACULA_LOG_READYAPPEND Ready to append to end of Volume \"%{BACULA_VOLUME:volume}\" size=%{INT} 35 | BACULA_LOG_CANCELLING Cancelling duplicate JobId=%{INT}. 36 | BACULA_LOG_MARKCANCEL JobId %{INT}, Job %{BACULA_JOB:job} marked to be canceled. 37 | BACULA_LOG_CLIENT_RBJ shell command: run ClientRunBeforeJob \"%{GREEDYDATA:runjob}\" 38 | BACULA_LOG_VSS (Generate )?VSS (Writer)? 39 | BACULA_LOG_MAXSTART Fatal error: Job canceled because max start delay time exceeded. 40 | BACULA_LOG_DUPLICATE Fatal error: JobId %{INT:duplicate} already running. Duplicate job not allowed. 41 | BACULA_LOG_NOJOBSTAT Fatal error: No Job status returned from FD. 42 | BACULA_LOG_FATAL_CONN Fatal error: bsock.c:133 Unable to connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(?%{GREEDYDATA}) 43 | BACULA_LOG_NO_CONNECT Warning: bsock.c:127 Could not connect to (Client: %{BACULA_HOST:client}|Storage daemon) on %{HOSTNAME}:%{POSINT}. ERR=(?%{GREEDYDATA}) 44 | BACULA_LOG_NO_AUTH Fatal error: Unable to authenticate with File daemon at %{HOSTNAME}. Possible causes: 45 | BACULA_LOG_NOSUIT No prior or suitable Full backup found in catalog. Doing FULL backup. 46 | BACULA_LOG_NOPRIOR No prior Full backup Job record found. 47 | 48 | BACULA_LOG_JOB (Error: )?Bacula %{BACULA_HOST} %{BACULA_VERSION} \(%{BACULA_VERSION}\): 49 | 50 | BACULA_LOGLINE %{BACULA_TIMESTAMP:bts} %{BACULA_HOST:hostname} JobId %{INT:jobid}: (%{BACULA_LOG_MAX_CAPACITY}|%{BACULA_LOG_END_VOLUME}|%{BACULA_LOG_NEW_VOLUME}|%{BACULA_LOG_NEW_LABEL}|%{BACULA_LOG_WROTE_LABEL}|%{BACULA_LOG_NEW_MOUNT}|%{BACULA_LOG_NOOPEN}|%{BACULA_LOG_NOOPENDIR}|%{BACULA_LOG_NOSTAT}|%{BACULA_LOG_NOJOBS}|%{BACULA_LOG_ALL_RECORDS_PRUNED}|%{BACULA_LOG_BEGIN_PRUNE_JOBS}|%{BACULA_LOG_BEGIN_PRUNE_FILES}|%{BACULA_LOG_PRUNED_JOBS}|%{BACULA_LOG_PRUNED_FILES}|%{BACULA_LOG_ENDPRUNE}|%{BACULA_LOG_STARTJOB}|%{BACULA_LOG_STARTRESTORE}|%{BACULA_LOG_USEDEVICE}|%{BACULA_LOG_DIFF_FS}|%{BACULA_LOG_JOBEND}|%{BACULA_LOG_NOPRUNE_JOBS}|%{BACULA_LOG_NOPRUNE_FILES}|%{BACULA_LOG_VOLUME_PREVWRITTEN}|%{BACULA_LOG_READYAPPEND}|%{BACULA_LOG_CANCELLING}|%{BACULA_LOG_MARKCANCEL}|%{BACULA_LOG_CLIENT_RBJ}|%{BACULA_LOG_VSS}|%{BACULA_LOG_MAXSTART}|%{BACULA_LOG_DUPLICATE}|%{BACULA_LOG_NOJOBSTAT}|%{BACULA_LOG_FATAL_CONN}|%{BACULA_LOG_NO_CONNECT}|%{BACULA_LOG_NO_AUTH}|%{BACULA_LOG_NOSUIT}|%{BACULA_LOG_JOB}|%{BACULA_LOG_NOPRIOR}) 51 | -------------------------------------------------------------------------------- /pygrok/patterns/bro: -------------------------------------------------------------------------------- 1 | # https://www.bro.org/sphinx/script-reference/log-files.html 2 | 3 | # http.log 4 | BRO_HTTP %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{INT:trans_depth}\t%{GREEDYDATA:method}\t%{GREEDYDATA:domain}\t%{GREEDYDATA:uri}\t%{GREEDYDATA:referrer}\t%{GREEDYDATA:user_agent}\t%{NUMBER:request_body_len}\t%{NUMBER:response_body_len}\t%{GREEDYDATA:status_code}\t%{GREEDYDATA:status_msg}\t%{GREEDYDATA:info_code}\t%{GREEDYDATA:info_msg}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:bro_tags}\t%{GREEDYDATA:username}\t%{GREEDYDATA:password}\t%{GREEDYDATA:proxied}\t%{GREEDYDATA:orig_fuids}\t%{GREEDYDATA:orig_mime_types}\t%{GREEDYDATA:resp_fuids}\t%{GREEDYDATA:resp_mime_types} 5 | 6 | # dns.log 7 | BRO_DNS %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{INT:trans_id}\t%{GREEDYDATA:query}\t%{GREEDYDATA:qclass}\t%{GREEDYDATA:qclass_name}\t%{GREEDYDATA:qtype}\t%{GREEDYDATA:qtype_name}\t%{GREEDYDATA:rcode}\t%{GREEDYDATA:rcode_name}\t%{GREEDYDATA:AA}\t%{GREEDYDATA:TC}\t%{GREEDYDATA:RD}\t%{GREEDYDATA:RA}\t%{GREEDYDATA:Z}\t%{GREEDYDATA:answers}\t%{GREEDYDATA:TTLs}\t%{GREEDYDATA:rejected} 8 | 9 | # conn.log 10 | BRO_CONN %{NUMBER:ts}\t%{NOTSPACE:uid}\t%{IP:orig_h}\t%{INT:orig_p}\t%{IP:resp_h}\t%{INT:resp_p}\t%{WORD:proto}\t%{GREEDYDATA:service}\t%{NUMBER:duration}\t%{NUMBER:orig_bytes}\t%{NUMBER:resp_bytes}\t%{GREEDYDATA:conn_state}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:missed_bytes}\t%{GREEDYDATA:history}\t%{GREEDYDATA:orig_pkts}\t%{GREEDYDATA:orig_ip_bytes}\t%{GREEDYDATA:resp_pkts}\t%{GREEDYDATA:resp_ip_bytes}\t%{GREEDYDATA:tunnel_parents} 11 | 12 | # files.log 13 | BRO_FILES %{NUMBER:ts}\t%{NOTSPACE:fuid}\t%{IP:tx_hosts}\t%{IP:rx_hosts}\t%{NOTSPACE:conn_uids}\t%{GREEDYDATA:source}\t%{GREEDYDATA:depth}\t%{GREEDYDATA:analyzers}\t%{GREEDYDATA:mime_type}\t%{GREEDYDATA:filename}\t%{GREEDYDATA:duration}\t%{GREEDYDATA:local_orig}\t%{GREEDYDATA:is_orig}\t%{GREEDYDATA:seen_bytes}\t%{GREEDYDATA:total_bytes}\t%{GREEDYDATA:missing_bytes}\t%{GREEDYDATA:overflow_bytes}\t%{GREEDYDATA:timedout}\t%{GREEDYDATA:parent_fuid}\t%{GREEDYDATA:md5}\t%{GREEDYDATA:sha1}\t%{GREEDYDATA:sha256}\t%{GREEDYDATA:extracted} 14 | -------------------------------------------------------------------------------- /pygrok/patterns/exim: -------------------------------------------------------------------------------- 1 | EXIM_MSGID [0-9A-Za-z]{6}-[0-9A-Za-z]{6}-[0-9A-Za-z]{2} 2 | EXIM_FLAGS (<=|[-=>*]>|[*]{2}|==) 3 | EXIM_DATE %{YEAR:exim_year}-%{MONTHNUM:exim_month}-%{MONTHDAY:exim_day} %{TIME:exim_time} 4 | EXIM_PID \[%{POSINT}\] 5 | EXIM_QT ((\d+y)?(\d+w)?(\d+d)?(\d+h)?(\d+m)?(\d+s)?) 6 | EXIM_EXCLUDE_TERMS (Message is frozen|(Start|End) queue run| Warning: | retry time not reached | no (IP address|host name) found for (IP address|host) | unexpected disconnection while reading SMTP command | no immediate delivery: |another process is handling this message) 7 | EXIM_REMOTE_HOST (H=(%{NOTSPACE:remote_hostname} )?(\(%{NOTSPACE:remote_heloname}\) )?\[%{IP:remote_host}\]) 8 | EXIM_INTERFACE (I=\[%{IP:exim_interface}\](:%{NUMBER:exim_interface_port})) 9 | EXIM_PROTOCOL (P=%{NOTSPACE:protocol}) 10 | EXIM_MSG_SIZE (S=%{NUMBER:exim_msg_size}) 11 | EXIM_HEADER_ID (id=%{NOTSPACE:exim_header_id}) 12 | EXIM_SUBJECT (T=%{QS:exim_subject}) 13 | 14 | -------------------------------------------------------------------------------- /pygrok/patterns/firewalls: -------------------------------------------------------------------------------- 1 | # NetScreen firewall logs 2 | NETSCREENSESSIONLOG %{SYSLOGTIMESTAMP:date} %{IPORHOST:device} %{IPORHOST}: NetScreen device_id=%{WORD:device_id}%{DATA}: start_time=%{QUOTEDSTRING:start_time} duration=%{INT:duration} policy_id=%{INT:policy_id} service=%{DATA:service} proto=%{INT:proto} src zone=%{WORD:src_zone} dst zone=%{WORD:dst_zone} action=%{WORD:action} sent=%{INT:sent} rcvd=%{INT:rcvd} src=%{IPORHOST:src_ip} dst=%{IPORHOST:dst_ip} src_port=%{INT:src_port} dst_port=%{INT:dst_port} src-xlated ip=%{IPORHOST:src_xlated_ip} port=%{INT:src_xlated_port} dst-xlated ip=%{IPORHOST:dst_xlated_ip} port=%{INT:dst_xlated_port} session_id=%{INT:session_id} reason=%{GREEDYDATA:reason} 3 | 4 | #== Cisco ASA == 5 | CISCO_TAGGED_SYSLOG ^<%{POSINT:syslog_pri}>%{CISCOTIMESTAMP:timestamp}( %{SYSLOGHOST:sysloghost})? ?: %%{CISCOTAG:ciscotag}: 6 | CISCOTIMESTAMP %{MONTH} +%{MONTHDAY}(?: %{YEAR})? %{TIME} 7 | CISCOTAG [A-Z0-9]+-%{INT}-(?:[A-Z0-9_]+) 8 | # Common Particles 9 | CISCO_ACTION Built|Teardown|Deny|Denied|denied|requested|permitted|denied by ACL|discarded|est-allowed|Dropping|created|deleted 10 | CISCO_REASON Duplicate TCP SYN|Failed to locate egress interface|Invalid transport field|No matching connection|DNS Response|DNS Query|(?:%{WORD}\s*)* 11 | CISCO_DIRECTION Inbound|inbound|Outbound|outbound 12 | CISCO_INTERVAL first hit|%{INT}-second interval 13 | CISCO_XLATE_TYPE static|dynamic 14 | # ASA-1-104001 15 | CISCOFW104001 \((?:Primary|Secondary)\) Switching to ACTIVE - %{GREEDYDATA:switch_reason} 16 | # ASA-1-104002 17 | CISCOFW104002 \((?:Primary|Secondary)\) Switching to STANDBY - %{GREEDYDATA:switch_reason} 18 | # ASA-1-104003 19 | CISCOFW104003 \((?:Primary|Secondary)\) Switching to FAILED\. 20 | # ASA-1-104004 21 | CISCOFW104004 \((?:Primary|Secondary)\) Switching to OK\. 22 | # ASA-1-105003 23 | CISCOFW105003 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} waiting 24 | # ASA-1-105004 25 | CISCOFW105004 \((?:Primary|Secondary)\) Monitoring on [Ii]nterface %{GREEDYDATA:interface_name} normal 26 | # ASA-1-105005 27 | CISCOFW105005 \((?:Primary|Secondary)\) Lost Failover communications with mate on [Ii]nterface %{GREEDYDATA:interface_name} 28 | # ASA-1-105008 29 | CISCOFW105008 \((?:Primary|Secondary)\) Testing [Ii]nterface %{GREEDYDATA:interface_name} 30 | # ASA-1-105009 31 | CISCOFW105009 \((?:Primary|Secondary)\) Testing on [Ii]nterface %{GREEDYDATA:interface_name} (?:Passed|Failed) 32 | # ASA-2-106001 33 | CISCOFW106001 %{CISCO_DIRECTION:direction} %{WORD:protocol} connection %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{GREEDYDATA:tcp_flags} on interface %{GREEDYDATA:interface} 34 | # ASA-2-106006, ASA-2-106007, ASA-2-106010 35 | CISCOFW106006_106007_106010 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} (?:from|src) %{IP:src_ip}/%{INT:src_port}(\(%{DATA:src_fwuser}\))? (?:to|dst) %{IP:dst_ip}/%{INT:dst_port}(\(%{DATA:dst_fwuser}\))? (?:on interface %{DATA:interface}|due to %{CISCO_REASON:reason}) 36 | # ASA-3-106014 37 | CISCOFW106014 %{CISCO_ACTION:action} %{CISCO_DIRECTION:direction} %{WORD:protocol} src %{DATA:src_interface}:%{IP:src_ip}(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{IP:dst_ip}(\(%{DATA:dst_fwuser}\))? \(type %{INT:icmp_type}, code %{INT:icmp_code}\) 38 | # ASA-6-106015 39 | CISCOFW106015 %{CISCO_ACTION:action} %{WORD:protocol} \(%{DATA:policy_id}\) from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} flags %{DATA:tcp_flags} on interface %{GREEDYDATA:interface} 40 | # ASA-1-106021 41 | CISCOFW106021 %{CISCO_ACTION:action} %{WORD:protocol} reverse path check from %{IP:src_ip} to %{IP:dst_ip} on interface %{GREEDYDATA:interface} 42 | # ASA-4-106023 43 | CISCOFW106023 %{CISCO_ACTION:action}( protocol)? %{WORD:protocol} src %{DATA:src_interface}:%{DATA:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? dst %{DATA:dst_interface}:%{DATA:dst_ip}(/%{INT:dst_port})?(\(%{DATA:dst_fwuser}\))?( \(type %{INT:icmp_type}, code %{INT:icmp_code}\))? by access-group "?%{DATA:policy_id}"? \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 44 | # ASA-4-106100, ASA-4-106102, ASA-4-106103 45 | CISCOFW106100_2_3 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} for user '%{DATA:src_fwuser}' %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\) -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\) hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 46 | # ASA-5-106100 47 | CISCOFW106100 access-list %{NOTSPACE:policy_id} %{CISCO_ACTION:action} %{WORD:protocol} %{DATA:src_interface}/%{IP:src_ip}\(%{INT:src_port}\)(\(%{DATA:src_fwuser}\))? -> %{DATA:dst_interface}/%{IP:dst_ip}\(%{INT:dst_port}\)(\(%{DATA:src_fwuser}\))? hit-cnt %{INT:hit_count} %{CISCO_INTERVAL:interval} \[%{DATA:hashcode1}, %{DATA:hashcode2}\] 48 | # ASA-6-110002 49 | CISCOFW110002 %{CISCO_REASON:reason} for %{WORD:protocol} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} 50 | # ASA-6-302010 51 | CISCOFW302010 %{INT:connection_count} in use, %{INT:connection_count_max} most used 52 | # ASA-6-302013, ASA-6-302014, ASA-6-302015, ASA-6-302016 53 | CISCOFW302013_302014_302015_302016 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection %{INT:connection_id} for %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port}( \(%{IP:src_mapped_ip}/%{INT:src_mapped_port}\))?(\(%{DATA:src_fwuser}\))? to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}( \(%{IP:dst_mapped_ip}/%{INT:dst_mapped_port}\))?(\(%{DATA:dst_fwuser}\))?( duration %{TIME:duration} bytes %{INT:bytes})?(?: %{CISCO_REASON:reason})?( \(%{DATA:user}\))? 54 | # ASA-6-302020, ASA-6-302021 55 | CISCOFW302020_302021 %{CISCO_ACTION:action}(?: %{CISCO_DIRECTION:direction})? %{WORD:protocol} connection for faddr %{IP:dst_ip}/%{INT:icmp_seq_num}(?:\(%{DATA:fwuser}\))? gaddr %{IP:src_xlated_ip}/%{INT:icmp_code_xlated} laddr %{IP:src_ip}/%{INT:icmp_code}( \(%{DATA:user}\))? 56 | # ASA-6-305011 57 | CISCOFW305011 %{CISCO_ACTION:action} %{CISCO_XLATE_TYPE:xlate_type} %{WORD:protocol} translation from %{DATA:src_interface}:%{IP:src_ip}(/%{INT:src_port})?(\(%{DATA:src_fwuser}\))? to %{DATA:src_xlated_interface}:%{IP:src_xlated_ip}/%{DATA:src_xlated_port} 58 | # ASA-3-313001, ASA-3-313004, ASA-3-313008 59 | CISCOFW313001_313004_313008 %{CISCO_ACTION:action} %{WORD:protocol} type=%{INT:icmp_type}, code=%{INT:icmp_code} from %{IP:src_ip} on interface %{DATA:interface}( to %{IP:dst_ip})? 60 | # ASA-4-313005 61 | CISCOFW313005 %{CISCO_REASON:reason} for %{WORD:protocol} error message: %{WORD:err_protocol} src %{DATA:err_src_interface}:%{IP:err_src_ip}(\(%{DATA:err_src_fwuser}\))? dst %{DATA:err_dst_interface}:%{IP:err_dst_ip}(\(%{DATA:err_dst_fwuser}\))? \(type %{INT:err_icmp_type}, code %{INT:err_icmp_code}\) on %{DATA:interface} interface\. Original IP payload: %{WORD:protocol} src %{IP:orig_src_ip}/%{INT:orig_src_port}(\(%{DATA:orig_src_fwuser}\))? dst %{IP:orig_dst_ip}/%{INT:orig_dst_port}(\(%{DATA:orig_dst_fwuser}\))? 62 | # ASA-5-321001 63 | CISCOFW321001 Resource '%{WORD:resource_name}' limit of %{POSINT:resource_limit} reached for system 64 | # ASA-4-402117 65 | CISCOFW402117 %{WORD:protocol}: Received a non-IPSec packet \(protocol= %{WORD:orig_protocol}\) from %{IP:src_ip} to %{IP:dst_ip} 66 | # ASA-4-402119 67 | CISCOFW402119 %{WORD:protocol}: Received an %{WORD:orig_protocol} packet \(SPI= %{DATA:spi}, sequence number= %{DATA:seq_num}\) from %{IP:src_ip} \(user= %{DATA:user}\) to %{IP:dst_ip} that failed anti-replay checking 68 | # ASA-4-419001 69 | CISCOFW419001 %{CISCO_ACTION:action} %{WORD:protocol} packet from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port}, reason: %{GREEDYDATA:reason} 70 | # ASA-4-419002 71 | CISCOFW419002 %{CISCO_REASON:reason} from %{DATA:src_interface}:%{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} with different initial sequence number 72 | # ASA-4-500004 73 | CISCOFW500004 %{CISCO_REASON:reason} for protocol=%{WORD:protocol}, from %{IP:src_ip}/%{INT:src_port} to %{IP:dst_ip}/%{INT:dst_port} 74 | # ASA-6-602303, ASA-6-602304 75 | CISCOFW602303_602304 %{WORD:protocol}: An %{CISCO_DIRECTION:direction} %{GREEDYDATA:tunnel_type} SA \(SPI= %{DATA:spi}\) between %{IP:src_ip} and %{IP:dst_ip} \(user= %{DATA:user}\) has been %{CISCO_ACTION:action} 76 | # ASA-7-710001, ASA-7-710002, ASA-7-710003, ASA-7-710005, ASA-7-710006 77 | CISCOFW710001_710002_710003_710005_710006 %{WORD:protocol} (?:request|access) %{CISCO_ACTION:action} from %{IP:src_ip}/%{INT:src_port} to %{DATA:dst_interface}:%{IP:dst_ip}/%{INT:dst_port} 78 | # ASA-6-713172 79 | CISCOFW713172 Group = %{GREEDYDATA:group}, IP = %{IP:src_ip}, Automatic NAT Detection Status:\s+Remote end\s*%{DATA:is_remote_natted}\s*behind a NAT device\s+This\s+end\s*%{DATA:is_local_natted}\s*behind a NAT device 80 | # ASA-4-733100 81 | CISCOFW733100 \[\s*%{DATA:drop_type}\s*\] drop %{DATA:drop_rate_id} exceeded. Current burst rate is %{INT:drop_rate_current_burst} per second, max configured rate is %{INT:drop_rate_max_burst}; Current average rate is %{INT:drop_rate_current_avg} per second, max configured rate is %{INT:drop_rate_max_avg}; Cumulative total count is %{INT:drop_total_count} 82 | #== End Cisco ASA == 83 | 84 | # Shorewall firewall logs 85 | SHOREWALL (%{SYSLOGTIMESTAMP:timestamp}) (%{WORD:nf_host}) kernel:.*Shorewall:(%{WORD:nf_action1})?:(%{WORD:nf_action2})?.*IN=(%{USERNAME:nf_in_interface})?.*(OUT= *MAC=(%{COMMONMAC:nf_dst_mac}):(%{COMMONMAC:nf_src_mac})?|OUT=%{USERNAME:nf_out_interface}).*SRC=(%{IPV4:nf_src_ip}).*DST=(%{IPV4:nf_dst_ip}).*LEN=(%{WORD:nf_len}).?*TOS=(%{WORD:nf_tos}).?*PREC=(%{WORD:nf_prec}).?*TTL=(%{INT:nf_ttl}).?*ID=(%{INT:nf_id}).?*PROTO=(%{WORD:nf_protocol}).?*SPT=(%{INT:nf_src_port}?.*DPT=%{INT:nf_dst_port}?.*) 86 | #== End Shorewall 87 | -------------------------------------------------------------------------------- /pygrok/patterns/grok-patterns: -------------------------------------------------------------------------------- 1 | USERNAME [a-zA-Z0-9._-]+ 2 | USER %{USERNAME} 3 | EMAILLOCALPART [a-zA-Z][a-zA-Z0-9_.+-=:]+ 4 | EMAILADDRESS %{EMAILLOCALPART}@%{HOSTNAME} 5 | HTTPDUSER %{EMAILADDRESS}|%{USER} 6 | INT (?:[+-]?(?:[0-9]+)) 7 | BASE10NUM (?[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+))) 8 | NUMBER (?:%{BASE10NUM}) 9 | BASE16NUM (?(?"(?>\\.|[^\\"]+)+"|""|(?>'(?>\\.|[^\\']+)+')|''|(?>`(?>\\.|[^\\`]+)+`)|``)) 20 | UUID [A-Fa-f0-9]{8}-(?:[A-Fa-f0-9]{4}-){3}[A-Fa-f0-9]{12} 21 | 22 | # Networking 23 | MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC}) 24 | CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4}) 25 | WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2}) 26 | COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2}) 27 | IPV6 ((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)? 28 | IPV4 (?[A-Za-z]+:|\\)(?:\\[^\\?*]*)+ 39 | URIPROTO [A-Za-z]+(\+[A-Za-z+]+)? 40 | URIHOST %{IPORHOST}(?::%{POSINT:port})? 41 | # uripath comes loosely from RFC1738, but mostly from what Firefox 42 | # doesn't turn into %XX 43 | URIPATH (?:/[A-Za-z0-9$.+!*'(){},~:;=@#%_\-]*)+ 44 | #URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)? 45 | URIPARAM \?[A-Za-z0-9$.+!*'|(){},~@#%&/=:;_?\-\[\]<>]* 46 | URIPATHPARAM %{URIPATH}(?:%{URIPARAM})? 47 | URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})? 48 | 49 | # Months: January, Feb, 3, 03, 12, December 50 | MONTH \b(?:Jan(?:uary|uar)?|Feb(?:ruary|ruar)?|M(?:a|ä)?r(?:ch|z)?|Apr(?:il)?|Ma(?:y|i)?|Jun(?:e|i)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|O(?:c|k)?t(?:ober)?|Nov(?:ember)?|De(?:c|z)(?:ember)?)\b 51 | MONTHNUM (?:0?[1-9]|1[0-2]) 52 | MONTHNUM2 (?:0[1-9]|1[0-2]) 53 | MONTHDAY (?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9]) 54 | 55 | # Days: Monday, Tue, Thu, etc... 56 | DAY (?:Mon(?:day)?|Tue(?:sday)?|Wed(?:nesday)?|Thu(?:rsday)?|Fri(?:day)?|Sat(?:urday)?|Sun(?:day)?) 57 | 58 | # Years? 59 | YEAR (?>\d\d){1,2} 60 | HOUR (?:2[0123]|[01]?[0-9]) 61 | MINUTE (?:[0-5][0-9]) 62 | # '60' is a leap second in most time standards and thus is valid. 63 | SECOND (?:(?:[0-5]?[0-9]|60)(?:[:.,][0-9]+)?) 64 | TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) 65 | # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) 66 | DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} 67 | DATE_EU %{MONTHDAY}[./-]%{MONTHNUM}[./-]%{YEAR} 68 | ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) 69 | ISO8601_SECOND (?:%{SECOND}|60) 70 | TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? 71 | DATE %{DATE_US}|%{DATE_EU} 72 | DATESTAMP %{DATE}[- ]%{TIME} 73 | TZ (?:[PMCE][SD]T|UTC) 74 | DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ} 75 | DATESTAMP_RFC2822 %{DAY}, %{MONTHDAY} %{MONTH} %{YEAR} %{TIME} %{ISO8601_TIMEZONE} 76 | DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR} 77 | DATESTAMP_EVENTLOG %{YEAR}%{MONTHNUM2}%{MONTHDAY}%{HOUR}%{MINUTE}%{SECOND} 78 | HTTPDERROR_DATE %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{YEAR} 79 | 80 | # Syslog Dates: Month Day HH:MM:SS 81 | SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME} 82 | PROG [\x21-\x5a\x5c\x5e-\x7e]+ 83 | SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])? 84 | SYSLOGHOST %{IPORHOST} 85 | SYSLOGFACILITY <%{NONNEGINT:facility}.%{NONNEGINT:priority}> 86 | HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{INT} 87 | 88 | # Shortcuts 89 | QS %{QUOTEDSTRING} 90 | 91 | # Log formats 92 | SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}: 93 | COMMONAPACHELOG %{IPORHOST:clientip} %{HTTPDUSER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) 94 | COMBINEDAPACHELOG %{COMMONAPACHELOG} %{QS:referrer} %{QS:agent} 95 | HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} 96 | HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel}\] \[pid %{POSINT:pid}:tid %{NUMBER:tid}\]( \(%{POSINT:proxy_errorcode}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} 97 | HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} 98 | 99 | 100 | # Log Levels 101 | LOGLEVEL ([Aa]lert|ALERT|[Tt]race|TRACE|[Dd]ebug|DEBUG|[Nn]otice|NOTICE|[Ii]nfo|INFO|[Ww]arn?(?:ing)?|WARN?(?:ING)?|[Ee]rr?(?:or)?|ERR?(?:OR)?|[Cc]rit?(?:ical)?|CRIT?(?:ICAL)?|[Ff]atal|FATAL|[Ss]evere|SEVERE|EMERG(?:ENCY)?|[Ee]merg(?:ency)?) 102 | -------------------------------------------------------------------------------- /pygrok/patterns/haproxy: -------------------------------------------------------------------------------- 1 | ## These patterns were tested w/ haproxy-1.4.15 2 | 3 | ## Documentation of the haproxy log formats can be found at the following links: 4 | ## http://code.google.com/p/haproxy-docs/wiki/HTTPLogFormat 5 | ## http://code.google.com/p/haproxy-docs/wiki/TCPLogFormat 6 | 7 | HAPROXYTIME (?!<[0-9])%{HOUR:haproxy_hour}:%{MINUTE:haproxy_minute}(?::%{SECOND:haproxy_second})(?![0-9]) 8 | HAPROXYDATE %{MONTHDAY:haproxy_monthday}/%{MONTH:haproxy_month}/%{YEAR:haproxy_year}:%{HAPROXYTIME:haproxy_time}.%{INT:haproxy_milliseconds} 9 | 10 | # Override these default patterns to parse out what is captured in your haproxy.cfg 11 | HAPROXYCAPTUREDREQUESTHEADERS %{DATA:captured_request_headers} 12 | HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:captured_response_headers} 13 | 14 | # Example: 15 | # These haproxy config lines will add data to the logs that are captured 16 | # by the patterns below. Place them in your custom patterns directory to 17 | # override the defaults. 18 | # 19 | # capture request header Host len 40 20 | # capture request header X-Forwarded-For len 50 21 | # capture request header Accept-Language len 50 22 | # capture request header Referer len 200 23 | # capture request header User-Agent len 200 24 | # 25 | # capture response header Content-Type len 30 26 | # capture response header Content-Encoding len 10 27 | # capture response header Cache-Control len 200 28 | # capture response header Last-Modified len 200 29 | # 30 | # HAPROXYCAPTUREDREQUESTHEADERS %{DATA:request_header_host}\|%{DATA:request_header_x_forwarded_for}\|%{DATA:request_header_accept_language}\|%{DATA:request_header_referer}\|%{DATA:request_header_user_agent} 31 | # HAPROXYCAPTUREDRESPONSEHEADERS %{DATA:response_header_content_type}\|%{DATA:response_header_content_encoding}\|%{DATA:response_header_cache_control}\|%{DATA:response_header_last_modified} 32 | 33 | # parse a haproxy 'httplog' line 34 | HAPROXYHTTPBASE %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_request}/%{INT:time_queue}/%{INT:time_backend_connect}/%{INT:time_backend_response}/%{NOTSPACE:time_duration} %{INT:http_status_code} %{NOTSPACE:bytes_read} %{DATA:captured_request_cookie} %{DATA:captured_response_cookie} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} (\{%{HAPROXYCAPTUREDREQUESTHEADERS}\})?( )?(\{%{HAPROXYCAPTUREDRESPONSEHEADERS}\})?( )?"(|(%{WORD:http_verb} (%{URIPROTO:http_proto}://)?(?:%{USER:http_user}(?::[^@]*)?@)?(?:%{URIHOST:http_host})?(?:%{URIPATHPARAM:http_request})?( HTTP/%{NUMBER:http_version})?))?" 35 | 36 | HAPROXYHTTP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{HAPROXYHTTPBASE} 37 | 38 | # parse a haproxy 'tcplog' line 39 | HAPROXYTCP (?:%{SYSLOGTIMESTAMP:syslog_timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) %{IPORHOST:syslog_server} %{SYSLOGPROG}: %{IP:client_ip}:%{INT:client_port} \[%{HAPROXYDATE:accept_date}\] %{NOTSPACE:frontend_name} %{NOTSPACE:backend_name}/%{NOTSPACE:server_name} %{INT:time_queue}/%{INT:time_backend_connect}/%{NOTSPACE:time_duration} %{NOTSPACE:bytes_read} %{NOTSPACE:termination_state} %{INT:actconn}/%{INT:feconn}/%{INT:beconn}/%{INT:srvconn}/%{NOTSPACE:retries} %{INT:srv_queue}/%{INT:backend_queue} 40 | -------------------------------------------------------------------------------- /pygrok/patterns/java: -------------------------------------------------------------------------------- 1 | JAVACLASS (?:[a-zA-Z$_][a-zA-Z$_0-9]*\.)*[a-zA-Z$_][a-zA-Z$_0-9]* 2 | #Space is an allowed character to match special cases like 'Native Method' or 'Unknown Source' 3 | JAVAFILE (?:[A-Za-z0-9_. -]+) 4 | #Allow special method 5 | JAVAMETHOD (?:()|[a-zA-Z$_][a-zA-Z$_0-9]*) 6 | #Line number is optional in special cases 'Native method' or 'Unknown source' 7 | JAVASTACKTRACEPART %{SPACE}at %{JAVACLASS:class}\.%{JAVAMETHOD:method}\(%{JAVAFILE:file}(?::%{NUMBER:line})?\) 8 | # Java Logs 9 | JAVATHREAD (?:[A-Z]{2}-Processor[\d]+) 10 | JAVACLASS (?:[a-zA-Z0-9-]+\.)+[A-Za-z0-9$]+ 11 | JAVAFILE (?:[A-Za-z0-9_.-]+) 12 | JAVASTACKTRACEPART at %{JAVACLASS:class}\.%{WORD:method}\(%{JAVAFILE:file}:%{NUMBER:line}\) 13 | JAVALOGMESSAGE (.*) 14 | # MMM dd, yyyy HH:mm:ss eg: Jan 9, 2014 7:13:13 AM 15 | CATALINA_DATESTAMP %{MONTH} %{MONTHDAY}, 20%{YEAR} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) (?:AM|PM) 16 | # yyyy-MM-dd HH:mm:ss,SSS ZZZ eg: 2014-01-09 17:32:25,527 -0800 17 | TOMCAT_DATESTAMP 20%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:?%{MINUTE}(?::?%{SECOND}) %{ISO8601_TIMEZONE} 18 | CATALINALOG %{CATALINA_DATESTAMP:timestamp} %{JAVACLASS:class} %{JAVALOGMESSAGE:logmessage} 19 | # 2014-01-09 20:03:28,269 -0800 | ERROR | com.example.service.ExampleService - something compeletely unexpected happened... 20 | TOMCATLOG %{TOMCAT_DATESTAMP:timestamp} \| %{LOGLEVEL:level} \| %{JAVACLASS:class} - %{JAVALOGMESSAGE:logmessage} 21 | -------------------------------------------------------------------------------- /pygrok/patterns/junos: -------------------------------------------------------------------------------- 1 | # JUNOS 11.4 RT_FLOW patterns 2 | RT_FLOW_EVENT (RT_FLOW_SESSION_CREATE|RT_FLOW_SESSION_CLOSE|RT_FLOW_SESSION_DENY) 3 | 4 | RT_FLOW1 %{RT_FLOW_EVENT:event}: %{GREEDYDATA:close-reason}: %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} \d+\(%{DATA:sent}\) \d+\(%{DATA:received}\) %{INT:elapsed-time} .* 5 | 6 | RT_FLOW2 %{RT_FLOW_EVENT:event}: session created %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{IP:nat-src-ip}/%{INT:nat-src-port}->%{IP:nat-dst-ip}/%{INT:nat-dst-port} %{DATA:src-nat-rule-name} %{DATA:dst-nat-rule-name} %{INT:protocol-id} %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} %{INT:session-id} .* 7 | 8 | RT_FLOW3 %{RT_FLOW_EVENT:event}: session denied %{IP:src-ip}/%{INT:src-port}->%{IP:dst-ip}/%{INT:dst-port} %{DATA:service} %{INT:protocol-id}\(\d\) %{DATA:policy-name} %{DATA:from-zone} %{DATA:to-zone} .* 9 | 10 | -------------------------------------------------------------------------------- /pygrok/patterns/linux-syslog: -------------------------------------------------------------------------------- 1 | SYSLOG5424PRINTASCII [!-~]+ 2 | 3 | SYSLOGBASE2 (?:%{SYSLOGTIMESTAMP:timestamp}|%{TIMESTAMP_ISO8601:timestamp8601}) (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource}+(?: %{SYSLOGPROG}:|) 4 | SYSLOGPAMSESSION %{SYSLOGBASE} (?=%{GREEDYDATA:message})%{WORD:pam_module}\(%{DATA:pam_caller}\): session %{WORD:pam_session_state} for user %{USERNAME:username}(?: by %{GREEDYDATA:pam_by})? 5 | 6 | CRON_ACTION [A-Z ]+ 7 | CRONLOG %{SYSLOGBASE} \(%{USER:user}\) %{CRON_ACTION:action} \(%{DATA:message}\) 8 | 9 | SYSLOGLINE %{SYSLOGBASE2} %{GREEDYDATA:message} 10 | 11 | # IETF 5424 syslog(8) format (see http://www.rfc-editor.org/info/rfc5424) 12 | SYSLOG5424PRI <%{NONNEGINT:syslog5424_pri}> 13 | SYSLOG5424SD \[%{DATA}\]+ 14 | SYSLOG5424BASE %{SYSLOG5424PRI}%{NONNEGINT:syslog5424_ver} +(?:%{TIMESTAMP_ISO8601:syslog5424_ts}|-) +(?:%{HOSTNAME:syslog5424_host}|-) +(-|%{SYSLOG5424PRINTASCII:syslog5424_app}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_proc}) +(-|%{SYSLOG5424PRINTASCII:syslog5424_msgid}) +(?:%{SYSLOG5424SD:syslog5424_sd}|-|) 15 | 16 | SYSLOG5424LINE %{SYSLOG5424BASE} +%{GREEDYDATA:syslog5424_msg} 17 | -------------------------------------------------------------------------------- /pygrok/patterns/mcollective: -------------------------------------------------------------------------------- 1 | MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}: 2 | -------------------------------------------------------------------------------- /pygrok/patterns/mcollective-patterns: -------------------------------------------------------------------------------- 1 | # Remember, these can be multi-line events. 2 | MCOLLECTIVE ., \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\]%{SPACE}%{LOGLEVEL:event_level} 3 | 4 | MCOLLECTIVEAUDIT %{TIMESTAMP_ISO8601:timestamp}: 5 | -------------------------------------------------------------------------------- /pygrok/patterns/mongodb: -------------------------------------------------------------------------------- 1 | MONGO_LOG %{SYSLOGTIMESTAMP:timestamp} \[%{WORD:component}\] %{GREEDYDATA:message} 2 | MONGO_QUERY \{ (?<={ ).*(?= } ntoreturn:) \} 3 | MONGO_SLOWQUERY %{WORD} %{MONGO_WORDDASH:database}\.%{MONGO_WORDDASH:collection} %{WORD}: %{MONGO_QUERY:query} %{WORD}:%{NONNEGINT:ntoreturn} %{WORD}:%{NONNEGINT:ntoskip} %{WORD}:%{NONNEGINT:nscanned}.*nreturned:%{NONNEGINT:nreturned}..+ (?[0-9]+)ms 4 | MONGO_WORDDASH \b[\w-]+\b 5 | MONGO3_SEVERITY \w 6 | MONGO3_COMPONENT %{WORD}|- 7 | MONGO3_LOG %{TIMESTAMP_ISO8601:timestamp} %{MONGO3_SEVERITY:severity} %{MONGO3_COMPONENT:component}%{SPACE}(?:\[%{DATA:context}\])? %{GREEDYDATA:message} 8 | -------------------------------------------------------------------------------- /pygrok/patterns/nagios: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | ################################################################################## 3 | # Chop Nagios log files to smithereens! 4 | # 5 | # A set of GROK filters to process logfiles generated by Nagios. 6 | # While it does not, this set intends to cover all possible Nagios logs. 7 | # 8 | # Some more work needs to be done to cover all External Commands: 9 | # http://old.nagios.org/developerinfo/externalcommands/commandlist.php 10 | # 11 | # If you need some support on these rules please contact: 12 | # Jelle Smet http://smetj.net 13 | # 14 | ################################################################################# 15 | ################################################################################# 16 | 17 | NAGIOSTIME \[%{NUMBER:nagios_epoch}\] 18 | 19 | ############################################### 20 | ######## Begin nagios log types 21 | ############################################### 22 | NAGIOS_TYPE_CURRENT_SERVICE_STATE CURRENT SERVICE STATE 23 | NAGIOS_TYPE_CURRENT_HOST_STATE CURRENT HOST STATE 24 | 25 | NAGIOS_TYPE_SERVICE_NOTIFICATION SERVICE NOTIFICATION 26 | NAGIOS_TYPE_HOST_NOTIFICATION HOST NOTIFICATION 27 | 28 | NAGIOS_TYPE_SERVICE_ALERT SERVICE ALERT 29 | NAGIOS_TYPE_HOST_ALERT HOST ALERT 30 | 31 | NAGIOS_TYPE_SERVICE_FLAPPING_ALERT SERVICE FLAPPING ALERT 32 | NAGIOS_TYPE_HOST_FLAPPING_ALERT HOST FLAPPING ALERT 33 | 34 | NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT SERVICE DOWNTIME ALERT 35 | NAGIOS_TYPE_HOST_DOWNTIME_ALERT HOST DOWNTIME ALERT 36 | 37 | NAGIOS_TYPE_PASSIVE_SERVICE_CHECK PASSIVE SERVICE CHECK 38 | NAGIOS_TYPE_PASSIVE_HOST_CHECK PASSIVE HOST CHECK 39 | 40 | NAGIOS_TYPE_SERVICE_EVENT_HANDLER SERVICE EVENT HANDLER 41 | NAGIOS_TYPE_HOST_EVENT_HANDLER HOST EVENT HANDLER 42 | 43 | NAGIOS_TYPE_EXTERNAL_COMMAND EXTERNAL COMMAND 44 | NAGIOS_TYPE_TIMEPERIOD_TRANSITION TIMEPERIOD TRANSITION 45 | ############################################### 46 | ######## End nagios log types 47 | ############################################### 48 | 49 | ############################################### 50 | ######## Begin external check types 51 | ############################################### 52 | NAGIOS_EC_DISABLE_SVC_CHECK DISABLE_SVC_CHECK 53 | NAGIOS_EC_ENABLE_SVC_CHECK ENABLE_SVC_CHECK 54 | NAGIOS_EC_DISABLE_HOST_CHECK DISABLE_HOST_CHECK 55 | NAGIOS_EC_ENABLE_HOST_CHECK ENABLE_HOST_CHECK 56 | NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT PROCESS_SERVICE_CHECK_RESULT 57 | NAGIOS_EC_PROCESS_HOST_CHECK_RESULT PROCESS_HOST_CHECK_RESULT 58 | NAGIOS_EC_SCHEDULE_SERVICE_DOWNTIME SCHEDULE_SERVICE_DOWNTIME 59 | NAGIOS_EC_SCHEDULE_HOST_DOWNTIME SCHEDULE_HOST_DOWNTIME 60 | NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS DISABLE_HOST_SVC_NOTIFICATIONS 61 | NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS ENABLE_HOST_SVC_NOTIFICATIONS 62 | NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS DISABLE_HOST_NOTIFICATIONS 63 | NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS ENABLE_HOST_NOTIFICATIONS 64 | NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS DISABLE_SVC_NOTIFICATIONS 65 | NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS ENABLE_SVC_NOTIFICATIONS 66 | ############################################### 67 | ######## End external check types 68 | ############################################### 69 | NAGIOS_WARNING Warning:%{SPACE}%{GREEDYDATA:nagios_message} 70 | 71 | NAGIOS_CURRENT_SERVICE_STATE %{NAGIOS_TYPE_CURRENT_SERVICE_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 72 | NAGIOS_CURRENT_HOST_STATE %{NAGIOS_TYPE_CURRENT_HOST_STATE:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statetype};%{DATA:nagios_statecode};%{GREEDYDATA:nagios_message} 73 | 74 | NAGIOS_SERVICE_NOTIFICATION %{NAGIOS_TYPE_SERVICE_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 75 | NAGIOS_HOST_NOTIFICATION %{NAGIOS_TYPE_HOST_NOTIFICATION:nagios_type}: %{DATA:nagios_notifyname};%{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_contact};%{GREEDYDATA:nagios_message} 76 | 77 | NAGIOS_SERVICE_ALERT %{NAGIOS_TYPE_SERVICE_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 78 | NAGIOS_HOST_ALERT %{NAGIOS_TYPE_HOST_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{NUMBER:nagios_attempt};%{GREEDYDATA:nagios_message} 79 | 80 | NAGIOS_SERVICE_FLAPPING_ALERT %{NAGIOS_TYPE_SERVICE_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 81 | NAGIOS_HOST_FLAPPING_ALERT %{NAGIOS_TYPE_HOST_FLAPPING_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_message} 82 | 83 | NAGIOS_SERVICE_DOWNTIME_ALERT %{NAGIOS_TYPE_SERVICE_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 84 | NAGIOS_HOST_DOWNTIME_ALERT %{NAGIOS_TYPE_HOST_DOWNTIME_ALERT:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 85 | 86 | NAGIOS_PASSIVE_SERVICE_CHECK %{NAGIOS_TYPE_PASSIVE_SERVICE_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 87 | NAGIOS_PASSIVE_HOST_CHECK %{NAGIOS_TYPE_PASSIVE_HOST_CHECK:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_comment} 88 | 89 | NAGIOS_SERVICE_EVENT_HANDLER %{NAGIOS_TYPE_SERVICE_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 90 | NAGIOS_HOST_EVENT_HANDLER %{NAGIOS_TYPE_HOST_EVENT_HANDLER:nagios_type}: %{DATA:nagios_hostname};%{DATA:nagios_state};%{DATA:nagios_statelevel};%{DATA:nagios_event_handler_name} 91 | 92 | NAGIOS_TIMEPERIOD_TRANSITION %{NAGIOS_TYPE_TIMEPERIOD_TRANSITION:nagios_type}: %{DATA:nagios_service};%{DATA:nagios_unknown1};%{DATA:nagios_unknown2} 93 | 94 | #################### 95 | #### External checks 96 | #################### 97 | 98 | #Disable host & service check 99 | NAGIOS_EC_LINE_DISABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 100 | NAGIOS_EC_LINE_DISABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 101 | 102 | #Enable host & service check 103 | NAGIOS_EC_LINE_ENABLE_SVC_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_CHECK:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service} 104 | NAGIOS_EC_LINE_ENABLE_HOST_CHECK %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_CHECK:nagios_command};%{DATA:nagios_hostname} 105 | 106 | #Process host & service check 107 | NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_SERVICE_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_service};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 108 | NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_PROCESS_HOST_CHECK_RESULT:nagios_command};%{DATA:nagios_hostname};%{DATA:nagios_state};%{GREEDYDATA:nagios_check_result} 109 | 110 | #Disable host & service notifications 111 | NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 112 | NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 113 | NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_DISABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} 114 | 115 | #Enable host & service notifications 116 | NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_SVC_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 117 | NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_HOST_NOTIFICATIONS:nagios_command};%{GREEDYDATA:nagios_hostname} 118 | NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_ENABLE_SVC_NOTIFICATIONS:nagios_command};%{DATA:nagios_hostname};%{GREEDYDATA:nagios_service} 119 | 120 | #Schedule host & service downtime 121 | NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME %{NAGIOS_TYPE_EXTERNAL_COMMAND:nagios_type}: %{NAGIOS_EC_SCHEDULE_HOST_DOWNTIME:nagios_command};%{DATA:nagios_hostname};%{NUMBER:nagios_start_time};%{NUMBER:nagios_end_time};%{NUMBER:nagios_fixed};%{NUMBER:nagios_trigger_id};%{NUMBER:nagios_duration};%{DATA:author};%{DATA:comment} 122 | 123 | #End matching line 124 | NAGIOSLOGLINE %{NAGIOSTIME} (?:%{NAGIOS_WARNING}|%{NAGIOS_CURRENT_SERVICE_STATE}|%{NAGIOS_CURRENT_HOST_STATE}|%{NAGIOS_SERVICE_NOTIFICATION}|%{NAGIOS_HOST_NOTIFICATION}|%{NAGIOS_SERVICE_ALERT}|%{NAGIOS_HOST_ALERT}|%{NAGIOS_SERVICE_FLAPPING_ALERT}|%{NAGIOS_HOST_FLAPPING_ALERT}|%{NAGIOS_SERVICE_DOWNTIME_ALERT}|%{NAGIOS_HOST_DOWNTIME_ALERT}|%{NAGIOS_PASSIVE_SERVICE_CHECK}|%{NAGIOS_PASSIVE_HOST_CHECK}|%{NAGIOS_SERVICE_EVENT_HANDLER}|%{NAGIOS_HOST_EVENT_HANDLER}|%{NAGIOS_TIMEPERIOD_TRANSITION}|%{NAGIOS_EC_LINE_DISABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_ENABLE_SVC_CHECK}|%{NAGIOS_EC_LINE_DISABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_ENABLE_HOST_CHECK}|%{NAGIOS_EC_LINE_PROCESS_HOST_CHECK_RESULT}|%{NAGIOS_EC_LINE_PROCESS_SERVICE_CHECK_RESULT}|%{NAGIOS_EC_LINE_SCHEDULE_HOST_DOWNTIME}|%{NAGIOS_EC_LINE_DISABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_HOST_NOTIFICATIONS}|%{NAGIOS_EC_LINE_DISABLE_SVC_NOTIFICATIONS}|%{NAGIOS_EC_LINE_ENABLE_SVC_NOTIFICATIONS}) 125 | -------------------------------------------------------------------------------- /pygrok/patterns/postgresql: -------------------------------------------------------------------------------- 1 | # Default postgresql pg_log format pattern 2 | POSTGRESQL %{DATESTAMP:timestamp} %{TZ} %{DATA:user_id} %{GREEDYDATA:connection_id} %{POSINT:pid} 3 | 4 | -------------------------------------------------------------------------------- /pygrok/patterns/rails: -------------------------------------------------------------------------------- 1 | RUUID \h{32} 2 | # rails controller with action 3 | RCONTROLLER (?[^#]+)#(?\w+) 4 | 5 | # this will often be the only line: 6 | RAILS3HEAD (?m)Started %{WORD:verb} "%{URIPATHPARAM:request}" for %{IPORHOST:clientip} at (?%{YEAR}-%{MONTHNUM}-%{MONTHDAY} %{HOUR}:%{MINUTE}:%{SECOND} %{ISO8601_TIMEZONE}) 7 | # for some a strange reason, params are stripped of {} - not sure that's a good idea. 8 | RPROCESSING \W*Processing by %{RCONTROLLER} as (?\S+)(?:\W*Parameters: {%{DATA:params}}\W*)? 9 | RAILS3FOOT Completed %{NUMBER:response}%{DATA} in %{NUMBER:totalms}ms %{RAILS3PROFILE}%{GREEDYDATA} 10 | RAILS3PROFILE (?:\(Views: %{NUMBER:viewms}ms \| ActiveRecord: %{NUMBER:activerecordms}ms|\(ActiveRecord: %{NUMBER:activerecordms}ms)? 11 | 12 | # putting it all together 13 | RAILS3 %{RAILS3HEAD}(?:%{RPROCESSING})?(?(?:%{DATA}\n)*)(?:%{RAILS3FOOT})? 14 | -------------------------------------------------------------------------------- /pygrok/patterns/redis: -------------------------------------------------------------------------------- 1 | REDISTIMESTAMP %{MONTHDAY} %{MONTH} %{TIME} 2 | REDISLOG \[%{POSINT:pid}\] %{REDISTIMESTAMP:timestamp} \* 3 | 4 | -------------------------------------------------------------------------------- /pygrok/patterns/ruby: -------------------------------------------------------------------------------- 1 | RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO) 2 | RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- +%{DATA:progname}: %{GREEDYDATA:message} 3 | -------------------------------------------------------------------------------- /pygrok/pygrok.py: -------------------------------------------------------------------------------- 1 | try: 2 | import regex as re 3 | except ImportError as e: 4 | # If you import re, grok_match can't handle regular expression containing atomic group(?>) 5 | import re 6 | import codecs 7 | import os 8 | import pkg_resources 9 | 10 | DEFAULT_PATTERNS_DIRS = [pkg_resources.resource_filename(__name__, 'patterns')] 11 | 12 | 13 | class Grok(object): 14 | def __init__(self, pattern, custom_patterns_dir=None, custom_patterns={}, fullmatch=True): 15 | self.pattern = pattern 16 | self.custom_patterns_dir = custom_patterns_dir 17 | self.predefined_patterns = _reload_patterns(DEFAULT_PATTERNS_DIRS) 18 | self.fullmatch = fullmatch 19 | 20 | custom_pats = {} 21 | if custom_patterns_dir is not None: 22 | custom_pats = _reload_patterns([custom_patterns_dir]) 23 | 24 | for pat_name, regex_str in custom_patterns.items(): 25 | custom_pats[pat_name] = Pattern(pat_name, regex_str) 26 | 27 | if len(custom_pats) > 0: 28 | self.predefined_patterns.update(custom_pats) 29 | 30 | self._load_search_pattern() 31 | 32 | 33 | def match(self, text): 34 | """If text is matched with pattern, return variable names specified(%{pattern:variable name}) 35 | in pattern and their corresponding values.If not matched, return None. 36 | custom patterns can be passed in by custom_patterns(pattern name, pattern regular expression pair) 37 | or custom_patterns_dir. 38 | """ 39 | 40 | match_obj = None 41 | if self.fullmatch: 42 | match_obj = self.regex_obj.fullmatch(text) 43 | else: 44 | match_obj = self.regex_obj.search(text) 45 | 46 | if match_obj == None: 47 | return None 48 | matches = match_obj.groupdict() 49 | for key,match in matches.items(): 50 | try: 51 | if self.type_mapper[key] == 'int': 52 | matches[key] = int(match) 53 | if self.type_mapper[key] == 'float': 54 | matches[key] = float(match) 55 | except (TypeError, KeyError) as e: 56 | pass 57 | return matches 58 | 59 | def set_search_pattern(self, pattern=None): 60 | if type(pattern) is not str : 61 | raise ValueError("Please supply a valid pattern") 62 | self.pattern = pattern 63 | self._load_search_pattern() 64 | 65 | def _load_search_pattern(self): 66 | self.type_mapper = {} 67 | py_regex_pattern = self.pattern 68 | while True: 69 | # Finding all types specified in the groks 70 | m = re.findall(r'%{(\w+):(\w+):(\w+)}', py_regex_pattern) 71 | for n in m: 72 | self.type_mapper[n[1]] = n[2] 73 | #replace %{pattern_name:custom_name} (or %{pattern_name:custom_name:type} 74 | # with regex and regex group name 75 | 76 | py_regex_pattern = re.sub(r'%{(\w+):(\w+)(?::\w+)?}', 77 | lambda m: "(?P<" + m.group(2) + ">" + self.predefined_patterns[m.group(1)].regex_str + ")", 78 | py_regex_pattern) 79 | 80 | #replace %{pattern_name} with regex 81 | py_regex_pattern = re.sub(r'%{(\w+)}', 82 | lambda m: "(" + self.predefined_patterns[m.group(1)].regex_str + ")", 83 | py_regex_pattern) 84 | 85 | if re.search('%{\w+(:\w+)?}', py_regex_pattern) is None: 86 | break 87 | 88 | self.regex_obj = re.compile(py_regex_pattern) 89 | 90 | def _wrap_pattern_name(pat_name): 91 | return '%{' + pat_name + '}' 92 | 93 | def _reload_patterns(patterns_dirs): 94 | """ 95 | """ 96 | all_patterns = {} 97 | for dir in patterns_dirs: 98 | for f in os.listdir(dir): 99 | patterns = _load_patterns_from_file(os.path.join(dir, f)) 100 | all_patterns.update(patterns) 101 | 102 | return all_patterns 103 | 104 | 105 | def _load_patterns_from_file(file): 106 | """ 107 | """ 108 | patterns = {} 109 | with codecs.open(file, 'r', encoding='utf-8') as f: 110 | for l in f: 111 | l = l.strip() 112 | if l == '' or l.startswith('#'): 113 | continue 114 | 115 | sep = l.find(' ') 116 | pat_name = l[:sep] 117 | regex_str = l[sep:].strip() 118 | pat = Pattern(pat_name, regex_str) 119 | patterns[pat.pattern_name] = pat 120 | return patterns 121 | 122 | 123 | class Pattern(object): 124 | """ 125 | """ 126 | def __init__(self, pattern_name, regex_str, sub_patterns = {}): 127 | self.pattern_name = pattern_name 128 | self.regex_str = regex_str 129 | self.sub_patterns = sub_patterns # sub_pattern name list 130 | 131 | def __str__(self): 132 | return '' % (self.pattern_name, self.regex_str, self.sub_patterns) 133 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | regex==2015.10.29 -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | long_desc = '' 5 | 6 | if os.path.exists('README.rst'): 7 | long_desc = open('README.rst').read() 8 | 9 | setup(name = 'pygrok', 10 | version = '1.0.0', 11 | description = 'A Python library to parse strings and' 12 | + ' extract information from structured/unstructured data', 13 | long_description= long_desc, 14 | url = 'https://github.com/garyelephant/pygrok', 15 | author = 'garyelephant', 16 | author_email = 'garygaowork@gmail.com', 17 | license = 'MIT', 18 | packages = ['pygrok'], 19 | include_package_data = True, 20 | zip_safe = True, 21 | keywords = ['python grok', 'regex'], # arbitrary keywords 22 | download_url = 'https://github.com/garyelephant/pygrok/tarball/v1.0.0', 23 | install_requires=['regex'] 24 | ) 25 | -------------------------------------------------------------------------------- /tests/test_patterns/pats: -------------------------------------------------------------------------------- 1 | ID %{WORD}-%{INT} 2 | -------------------------------------------------------------------------------- /tests/test_pygrok.py: -------------------------------------------------------------------------------- 1 | from pygrok import Grok 2 | 3 | 4 | def test_one_pat(): 5 | text = '1024' 6 | pat = '%{INT:test_int}' 7 | grok = Grok(pat) 8 | m = grok.match(text) 9 | assert m['test_int'] == '1024', 'grok match failed:%s, %s' % (text, pat, ) 10 | 11 | text = '1024' 12 | pat = '%{NUMBER:test_num}' 13 | grok = Grok(pat) 14 | m = grok.match(text) 15 | assert m['test_num'] == '1024', 'grok match failed:%s, %s' % (text, pat, ) 16 | 17 | text = 'garyelephant ' 18 | pat = '%{WORD:name} ' 19 | grok = Grok(pat) 20 | m = grok.match(text) 21 | assert m['name'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, ) 22 | 23 | text = '192.168.1.1' 24 | pat = '%{IP:ip}' 25 | grok = Grok(pat) 26 | m = grok.match(text) 27 | assert m['ip'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, ) 28 | 29 | text = 'github.com' 30 | pat = '%{HOSTNAME:website}' 31 | grok = Grok(pat) 32 | m = grok.match(text) 33 | assert m['website'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, ) 34 | 35 | text = '1989-11-04 05:33:02+0800' 36 | pat = '%{TIMESTAMP_ISO8601:ts}' 37 | grok = Grok(pat) 38 | m = grok.match(text) 39 | assert m['ts'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, ) 40 | 41 | text = 'github' 42 | pat = '%{WORD}' 43 | grok = Grok(pat) 44 | m = grok.match(text) 45 | assert m == {}, 'grok match failed:%s, %s' % (text, pat, ) 46 | # you get nothing because variable name is not set, compare "%{WORD}" and "%{WORD:variable_name}" 47 | 48 | text = 'github' 49 | pat = '%{NUMBER:test_num}' 50 | grok = Grok(pat) 51 | m = grok.match(text) 52 | assert m is None, 'grok match failed:%s, %s' % (text, pat, ) 53 | #not match 54 | 55 | text = '1989' 56 | pat = '%{NUMBER:birthyear:int}' 57 | grok = Grok(pat) 58 | m = grok.match(text) 59 | assert m == {'birthyear': 1989}, 'grok match failed:%s, %s' % (text, pat, ) 60 | 61 | 62 | def test_multiple_pats(): 63 | text = 'gary 25 "never quit"' 64 | pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}' 65 | grok = Grok(pat) 66 | m = grok.match(text) 67 | assert m['name'] == 'gary' and m['age'] == '25' and m['motto'] == '"never quit"', \ 68 | 'grok match failed:%s, %s' % (text, pat, ) 69 | 70 | # variable names are not set 71 | text = 'gary 25 "never quit"' 72 | pat = '%{WORD} %{INT} %{QUOTEDSTRING}' 73 | grok = Grok(pat) 74 | m = grok.match(text) 75 | assert m == {}, 'grok match failed:%s, %s' % (text, pat, ) 76 | 77 | #"male" is not INT 78 | text = 'gary male "never quit"' 79 | pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}' 80 | grok = Grok(pat) 81 | m = grok.match(text) 82 | assert m is None, 'grok match failed:%s, %s' % (text, pat, ) 83 | 84 | # nginx log 85 | text = 'edge.v.iask.com.edge.sinastorage.com 14.18.243.65 6.032s - [21/Jul/2014:16:00:02 +0800]' \ 86 | + ' "GET /edge.v.iask.com/125880034.hlv HTTP/1.0" 200 70528990 "-"' \ 87 | + ' "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \ 88 | + ' Chrome/36.0.1985.125 Safari/537.36"' 89 | pat = '%{HOSTNAME:host} %{IP:client_ip} %{NUMBER:delay}s - \[%{DATA:time_stamp}\]' \ 90 | + ' "%{WORD:verb} %{URIPATHPARAM:uri_path} HTTP/%{NUMBER:http_ver}" %{INT:http_status} %{INT:bytes} %{QS}' \ 91 | + ' %{QS:client}' 92 | grok = Grok(pat) 93 | m = grok.match(text) 94 | assert m['host'] == 'edge.v.iask.com.edge.sinastorage.com' and m['client_ip'] == '14.18.243.65' \ 95 | and m['delay'] == '6.032' and m['time_stamp'] == '21/Jul/2014:16:00:02 +0800' and m['verb'] == 'GET' \ 96 | and m['uri_path'] == '/edge.v.iask.com/125880034.hlv' and m['http_ver'] == '1.0' \ 97 | and m['http_status'] == '200' and m['bytes'] == '70528990' \ 98 | and m['client'] == '"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \ 99 | + ' Chrome/36.0.1985.125 Safari/537.36"', 'grok match failed:%s, %s' % (text, pat, ) 100 | 101 | text = '1989/02/23' 102 | pat = '%{NUMBER:birthyear:int}/%{NUMBER:birthmonth:int}/%{NUMBER:birthday:int}' 103 | grok = Grok(pat) 104 | m = grok.match(text) 105 | assert m == {'birthyear': 1989, 'birthmonth': 2, 'birthday': 23}, 'grok match failed:%s, %s' % (text, pat, ) 106 | 107 | text = 'load average: 1.88, 1.73, 1.49' 108 | pat = 'load average: %{NUMBER:load_1:float}, %{NUMBER:load_2:float}, %{NUMBER:load_3:float}' 109 | grok = Grok(pat) 110 | m = grok.match(text) 111 | assert m == {'load_1': 1.88, 'load_2': 1.73, 'load_3': 1.49}, 'grok match failed:%s, %s' % (text, pat, ) 112 | 113 | 114 | def test_custom_pats(): 115 | custom_pats = {'ID': '%{WORD}-%{INT}'} 116 | text = 'Beijing-1104,gary 25 "never quit"' 117 | pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}' 118 | grok = Grok(pat, custom_patterns=custom_pats) 119 | m = grok.match(text) 120 | assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \ 121 | and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, ) 122 | 123 | 124 | def test_custom_pat_files(): 125 | import os.path 126 | pats_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'test_patterns') 127 | text = 'Beijing-1104,gary 25 "never quit"' 128 | # pattern "ID" is defined in ./test_patterns/pats 129 | pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}' 130 | grok = Grok(pat, custom_patterns_dir=pats_dir) 131 | m = grok.match(text) 132 | assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \ 133 | and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, ) 134 | 135 | 136 | def test_hotloading_pats(): 137 | text = 'github' 138 | pat = '%{WORD:test_word}' 139 | grok = Grok(pat) 140 | m = grok.match(text) 141 | assert m['test_word'] == 'github', 'grok match failed:%s, %s' % (text, pat, ) 142 | # matches 143 | 144 | text = '1989' 145 | pat = '%{NUMBER:birthyear:int}' 146 | grok.set_search_pattern(pat) 147 | m = grok.match(text) 148 | assert m == {'birthyear': 1989}, 'grok match failed:%s, %s' % (text, pat, ) 149 | 150 | 151 | if __name__ == '__main__': 152 | test_one_pat() 153 | test_multiple_pats() 154 | test_custom_pats() 155 | test_custom_pat_files() 156 | test_hotloading_pats() 157 | --------------------------------------------------------------------------------