").addClass("plain-text-link").append(plain_text_view)
149 | div.insertBefore(this);
150 | $(this).addClass("has-view-text-link code");
151 | });
152 | };
153 |
154 | jQuery.fn.linkify = function(selector) {
155 | this.delegate(selector, "click", function(event) {
156 | event.preventDefault();
157 | $("a:first-of-type", this).each(function(i, el) {
158 | document.location = $(el).attr("href");
159 | });
160 | });
161 | };
162 |
163 | });
164 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/apache-lumberjack.conf:
--------------------------------------------------------------------------------
1 | CustomLog "||/opt/lumberjack/bin/lumberjack.sh --host remote.logstash.instance.example.com --port 6782 --ssl-ca-path /etc/logstash/lumberjack.crt -" json_event_log
2 |
3 | ErrorLog "||/opt/lumberjack/bin/lumberjack.sh --host remote.logstash.instance.example.com --port 6783 --ssl-ca-path /etc/logstash/lumberjack.crt -"
4 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/apache.conf:
--------------------------------------------------------------------------------
1 | # Create a log format called 'logstash_json' that emits, in json, the parts of an http
2 | # request I care about. For more details on the features of the 'LogFormat'
3 | # directive, see the apache docs:
4 | # http://httpd.apache.org/docs/2.2/mod/mod_log_config.html#formats
5 | LogFormat "{ \"@timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \"@fields\": { \"client\": \"%a\", \"duration_usec\": %D, \"status\": %s, \"request\": \"%U%q\", \"method\": \"%m\", \"referrer\": \"%{Referer}i\" } }" logstash_json
6 |
7 | LogFormat "{ \"@timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \"@message\": \"%r\", \"@fields\": { \"user-agent\": \"%{User-agent}i\", \"client\": \"%a\", \"duration_usec\": %D, \"duration_sec\": %T, \"status\": %s, \"request_path\": \"%U\", \"request\": \"%U%q\", \"method\": \"%m\", \"referrer\": \"%{Referer}i\" } }" logstash_ext_json
8 |
9 | # Write our 'logstash_json' logs to logs/access_json.log
10 | CustomLog logs/access_json.log logstash_json
11 |
12 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: make apache log in json
4 | tags: apache, httpd, json
5 | ---
6 |
7 | * Goal: Log directly to a structured format in apache.
8 | * Audience: Folks looking to improve their log formats.
9 |
10 | # preface: why json?
11 |
12 | The default log format offered by apache is only semi-structured. It appears,
13 | as a human reading it, to have some kind of reasonable structure. However, to
14 | process this with the logstash grok filter, it requires a complex and expensive
15 | regular expression to parse it.
16 |
17 | The best case for log formats is if you can simply emit them in a structured
18 | format from the application itself. This will reduce any extra parsing in the
19 | future!
20 |
21 | # agenda
22 |
23 | * Configure apache to emit json to a logfile
24 | * Configure logstash to read the file
25 |
26 | ## apache config
27 |
28 | First, we'll need to tell apache about our new log format. You'll put this in
29 | your httpd.conf:
30 |
31 | {% include_code apache.conf %}
32 |
33 | Keeping in mind that the goal here is to dump these logs into logstash, the
34 | json schema I provided is specific to how logstash forms its own events.
35 |
36 | Reload the apache config, and I now see things like this in my logs:
37 |
38 | { "@timestamp": "2012-08-22T14:35:19-0700", "client": "127.0.0.1", "duration_usec": 532, "status": 404, "request": "/favicon.ico", "method": "GET", "referrer": "-" }
39 |
40 | Apache's documentation explains how/why it escapes values:
41 |
42 | > For security reasons, starting with version 2.0.46, non-printable and other
43 | > special characters in %r, %i and %o are escaped using \xhh sequences, where hh
44 | > stands for the hexadecimal representation of the raw byte. Exceptions from this
45 | > rule are " and \, which are escaped by prepending a backslash, and all
46 | > whitespace characters, which are written in their C-style notation (\n, \t,
47 | > etc). In versions prior to 2.0.46, no escaping was performed on these strings
48 | > so you had to be quite careful when dealing with raw log files.
49 | > (from [mod_log_config's format
50 | notes](http://httpd.apache.org/docs/2.2/mod/mod_log_config.html#format-notes)
51 |
52 | This should suffice that our log format always produces valid JSON since apache
53 | escapes most/all necessary things JSON requires to be escaped :)
54 |
55 | ## logstash config
56 |
57 | The config now is pretty simple. We simply tell logstash to expect 'logstash
58 | json' events from the given apache log file. No filters are required because
59 | we are already emitting proper logstash json events!
60 |
61 | {% include_code logstash.conf %}
62 |
63 | Running logstash with the above config:
64 |
65 | % java -jar logstash.jar agent -f logstash.conf
66 | {
67 | "@source" => "pork.example.com",
68 | "@type" => "apache",
69 | "@tags" => [],
70 | "@fields" => {
71 | "client" => "127.0.0.1",
72 | "duration_usec" => 240,
73 | "status" => 404,
74 | "request" => "/favicon.ico",
75 | "method" => "GET",
76 | "referrer" => "-"
77 | },
78 | "@timestamp" => "2012-08-22T14:53:47-0700"
79 | }
80 |
81 | Voila!
82 |
83 | ## Simplify with mod_macro
84 |
85 | We can greatly simplify our setup by using [mod_macro](https://people.apache.org/~fabien/mod_macro/)
86 | to generate the [LogFormat](http://httpd.apache.org/docs/current/mod/mod_log_config.html#logformat) and
87 | [CustomLog](http://httpd.apache.org/docs/current/mod/mod_log_config.html#customlog) at the same time.
88 |
89 | If our [VirtualHost](http://httpd.apache.org/docs/current/mod/core.html#virtualhost)'s
90 | [DirectoryRoots](http://httpd.apache.org/docs/current/mod/core.html#directoryroot) are consistently built
91 | we can predictably build our configuration as follows:'
92 |
93 | {% include_code macro.conf %}
94 |
95 | We can now create a VirtualHost, that uses this macro:
96 |
97 |
98 | ServerName www.example.com
99 | DirectoryRoot /srv/web/example.com/www/htdocs
100 | Use logstash_log www.example.com prod-web137.dmz01.dc03.acme.com
101 |
102 |
103 | ## Using lumberjack as a shipper
104 |
105 | A lightweight alternative is to use
106 | [lumberjack](https://github.com/jordansissel/lumberjack/) to send your Apache
107 | logs to a logstash server on another host. This doesn't cover lumberjack
108 | installation details. See [the github project's
109 | README](https://github.com/jordansissel/lumberjack/#readme) for that.
110 |
111 | Apache can be configured to [pipe logs to an external
112 | program](http://httpd.apache.org/docs/2.2/logs.html#piped). The nice thing
113 | about this option (we'll only focus on this) is that the piped program is under
114 | supervision of the apache master process. So no initscript/daemon to take care
115 | of, and the process will get restarted by apache, in the event it crashes.
116 |
117 | This is done with the first of the following lines:
118 |
119 | {% include_code apache-lumberjack.conf %}
120 |
121 | The second line ships Apache unformatted error logs (see below). In both cases,
122 | don't miss out the trailing dash in the lumberjack command-line, which stands
123 | for "read log messages from standard input".
124 |
125 | Then define lumberjack inputs on your logstash central server:
126 |
127 | {% include_code logstash-lumberjack.conf %}
128 |
129 | Note that the "format" is set to "json_event" in the first case. And as Apache
130 | has no option for error log formatting, we have to setup a second instance,
131 | listening on another port, with the "format" set to "plain".
132 |
133 | The parts related to error logs can of course be skipped if you're only
134 | interested by the web server's access logs.
135 |
136 | ### SELinux
137 |
138 | One caveat with our setup when using SELinux is that lumberjack needs to change
139 | some system limits and connect to the logstash server, which are both blocked
140 | by default inside the httpd context (`httpd_selinux(8)` for details ). Just
141 | run:
142 |
143 | sudo setsebool -P httpd_setrlimit 1
144 | sudo setsebool -P httpd_can_network_connect 1
145 |
146 | in case you see these messages appear:
147 |
148 | # tail /var/log/audit/audit.log
149 | type=AVC msg=audit(1367837376.591:136095): avc: denied { setrlimit } for pid=17814 comm="lumberjack" scontext=unconfined_u:system_r:httpd_t:s0 tcontext=unconfined_u:system_r:httpd_t:s0 tclass=process
150 | type=AVC msg=audit(1367928599.621:829): avc: denied { name_connect } for pid=11398 comm="lumberjack" dest=6782 scontext=unconfined_u:system_r:httpd_t:s0 tcontext=system_u:object_r:cyphesis_port_t:s0 tclass=tcp_socket
151 |
152 | # tail /var/log/httpd/error_log
153 | Assertion failed lumberjack.c:111 in set_resource_limits(), insist(rc != -1): setrlimit(RLIMIT_NOFILE, ... 103) failed: Permission denied
154 |
155 | ## Is it safe?
156 |
157 | Well, I tested with Apache/2.2.22 and found it appears quite safe.
158 |
159 | What is safe? Well, safe meaning apache generates valid JSON.
160 |
161 | To test this, I made a simple apache config and two scripts; the first script
162 | spams apache with some pretty unsavory http requests, and the second script
163 | reads the apache log and verifies that all the entries parse as valid JSON.
164 |
165 | % sh run.sh
166 | Starting apache
167 | Spamming apache with requests
168 | Verifying valid JSON
169 | Successful: 10000
170 |
171 | Technically, what is verified above is that the ruby JSON parser can process
172 | the data. Since apache uses '\xNN' notation for escaping special characters,
173 | it is technically invalid 'JSON', but I've found that many JSON parsers happily
174 | accept it.
175 |
176 | You can see the code for this test here: [apache.conf](test/apache.conf),
177 | [spam.rb](test/spam.rb), [check.rb](test/check.rb).
178 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/logstash-lumberjack.conf:
--------------------------------------------------------------------------------
1 | input {
2 | lumberjack {
3 | port => 6782
4 | tags => ["apache_access_log"]
5 | format => "json_event"
6 | ssl_certificate => "/etc/logstash/lumberjack.crt"
7 | ssl_key => "/etc/logstash/lumberjack.key"
8 | type => "lumberjack"
9 | }
10 |
11 | lumberjack {
12 | port => 6783
13 | tags => ["apache_error_log"]
14 | format => "plain"
15 | ssl_certificate => "/etc/logstash/lumberjack.crt"
16 | ssl_key => "/etc/logstash/lumberjack.key"
17 | type => "lumberjack"
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/logstash.conf:
--------------------------------------------------------------------------------
1 | input {
2 | file {
3 | path => "/var/log/httpd/access_json.log"
4 | type => apache
5 |
6 | # This format tells logstash to expect 'logstash' json events from the file.
7 | format => json_event
8 | }
9 | }
10 |
11 | output {
12 | stdout { debug => true }
13 | }
14 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/macro.conf:
--------------------------------------------------------------------------------
1 | # Create a Macro named logstash_log that is used in the VirtualHost
2 | # It defines, on the fly, a macro for the specific vhost $servername
3 | # and anchors its @source, $source_host and @source_path.
4 | #
5 |
6 | LogFormat "{ \
7 | \"@source\":\"file ://${hostname}//var/log/httpd/${servername}-access_log\",\"@source_host\": \"${hostname}\", \
8 | \"@source_path\": \"/var/log/httpd/${servername}-access_log\", \
9 | \"@tags\":[\"${servername}\"], \
10 | \"@message\": \"%h %l %u %t \\\"%r\\\" %>s %b\", \
11 | \"@fields\": { \
12 | \"timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \
13 | \"clientip\": \"%a\", \
14 | \"duration\": %D, \
15 | \"status\": %>s, \
16 | \"request\": \"%U%q\", \
17 | \"urlpath\": \"%U\", \
18 | \"urlquery\": \"%q\", \
19 | \"method\": \"%m\", \
20 | \"bytes\": %B, \
21 | \"vhost\": \"%v\" \
22 | } \
23 | }" logstash_apache_json
24 |
25 | CustomLog /var/log/httpd/${servername}-access_log json_event_log
26 |
27 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/test/README.md:
--------------------------------------------------------------------------------
1 | # Testing Apache logs in JSON
2 |
3 | Run:
4 |
5 | * `sh run.sh`
6 |
7 | This will run `httpd` using the apache.conf here, then run 'spam.rb' and finally 'check.rb'
8 |
9 | ## apache.conf
10 |
11 | A minimal apache config that spits out JSON events, one per line.
12 |
13 | ## spam.rb
14 |
15 | Generates random (and possibly unsavory) http requests for apache to parse and log.
16 |
17 | ## check.rb
18 |
19 | Parses the `access_json.log` and verifies it's all valid.
20 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/test/apache.conf:
--------------------------------------------------------------------------------
1 |
2 | # Create a log format called 'logstash_json' that emits, in json, the parts of an http
3 | # request I care about. For more details on the features of the 'LogFormat'
4 | # directive, see the apache docs:
5 | # http://httpd.apache.org/docs/2.2/mod/mod_log_config.html#formats
6 | LogFormat "{ \"@timestamp\": \"%{%Y-%m-%dT%H:%M:%S%z}t\", \"@fields\": { \"client\": \"%a\", \"duration_usec\": %D, \"status\": %s, \"request\": \"%U%q\", \"method\": \"%m\", \"referrer\": \"%{Referer}i\" } }" logstash_json
7 |
8 | # Write our 'logstash_json' logs to logs/access_json.log
9 | CustomLog access_json.log logstash_json
10 | ErrorLog error.log
11 | PidFile apache.pid
12 |
13 | LoadModule log_config_module modules/mod_log_config.so
14 | LoadModule logio_module modules/mod_logio.so
15 | KeepAlive On
16 | ServerRoot .
17 | DocumentRoot .
18 | Listen 9396
19 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/test/check.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "json"
4 |
5 | count = 0
6 | File.new("access_json.log").each_line do |line|
7 | begin
8 | JSON.parse(line)
9 | count += 1
10 | rescue => e
11 | puts "Failed to parse: #{line.inspect}"
12 | puts e
13 | end
14 | end
15 |
16 | puts "Successful: #{count}"
17 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/test/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | echo "Starting apache"
4 | httpd -f $PWD/apache.conf -k start > /dev/null 2>&1
5 |
6 | echo "Spamming apache with requests"
7 | ruby spam.rb http://localhost:9396/
8 |
9 | echo "Verifying valid JSON"
10 | ruby check.rb access_json.log
11 | if [ $? -ne 0 ] ; then
12 | echo "Failure!"
13 | exit 1
14 | fi
15 |
16 |
--------------------------------------------------------------------------------
/recipes/apache-json-logs/test/spam.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "socket"
4 |
5 | 10000.times do
6 | path = 50.times.collect { rand(256).chr }.join("").gsub("\n", "")
7 | query = 50.times.collect { rand(256).chr }.join("").gsub("\n", "")
8 | referrer = 50.times.collect { rand(256).chr }.join("").gsub("\n", "")
9 | agent = 50.times.collect { rand(256).chr }.join("").gsub("\n", "")
10 |
11 | conn = TCPSocket.new("localhost", 9396)
12 | conn.write("GET /#{path}?#{query} HTTP/1.1\r\n")
13 | conn.write("Host: localhost\r\n")
14 | conn.write("User-Agent: #{agent}\r\n")
15 | conn.write("Referer: #{referrer}\r\n")
16 | conn.write("Connection: close\r\n")
17 | conn.write("\r\n")
18 | conn.read rescue nil
19 | conn.close rescue nil
20 | end
21 |
--------------------------------------------------------------------------------
/recipes/central-syslog/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: parsing a central syslog server
4 | tags: getting started, syslog
5 | ---
6 |
7 | * Goal: Parse syslog messages on an existing centralized server
8 | * Audience: Anyone with an existing centralized syslog server.
9 |
10 | # introduction
11 |
12 | Many organizations already have a centralized syslog server and are comfortable with collecting logs this way. There are many good reasons to do this even if you want to use a system like logstash.
13 |
14 | It is assumed that you know how to set up, and already have a working syslog server filled with tasty logs. It also assumes that you will be using redis for a message queue _(Combine the configs and remove redis output / input to run on same server)_.
15 |
16 | For the sake of simplicity this example assumes that all messages are of standard syslog format. If you have apache or other logs being pushed through you may need to modify the file input locations ( and set appropriate types ) and add appropriate filters to process them.
17 |
18 | If you have a large set of logs to slurp, you may want to set `maxmemory 500mb` in your `/etc/redis.conf` to ensure redis can't overrun your system.
19 |
20 | # agenda
21 |
22 | * Prerequisites
23 | * Syslog Server - File Input
24 | * Logstash Indexer - Processing and Output to ES
25 | * Logstash Indexer - Additional Processing
26 |
27 | # prerequisites
28 |
29 | * This recipe requires logstash 1.2.1 or newer.
30 | * This recipe assumes a standard syslog format ( PRI prefix not needed, but it does yield richer results )
31 | * This recipe assumes you have a logstash-indexer running redis for queueing.
32 |
33 |
34 | # Syslog Server - File Input
35 |
36 | The config on your syslog server should look like below. Pretty simple stuff, we're just declaring a file input and pushing to redis. I chose not to do any filtering here as I want logstash to simply act as an agent on this server.
37 |
38 | {% include_code syslog-server.conf %}
39 |
40 | # Logstash Indexer - Processing and Output to ES
41 |
42 | Here's the workhorse. It looks like a lot of processing but I have [tested this](https://gist.github.com/4513552) to > 20,000 messages/second using generator and 8 filter workers on a single box.
43 |
44 | {% include_code logstash-indexer.conf %}
45 |
46 | # Logstash Indexer - Additional Processing
47 |
48 | Using grep we can easily check each syslog message for particular patterns and add a tag to the if they match. This allows us to add more context to known log types. An example below is the format for NAT logs on my linux firewalls.
49 |
50 | {% include_code logstash-indexer_NAT.conf %}
51 |
52 |
53 |
--------------------------------------------------------------------------------
/recipes/central-syslog/logstash-indexer.conf:
--------------------------------------------------------------------------------
1 | input {
2 | redis {
3 | host => "127.0.0.1"
4 | data_type => "list"
5 | key => "logstash"
6 | }
7 | }
8 |
9 | filter {
10 | if [type] == "syslog" {
11 | grok {
12 | overwrite => "message"
13 | match => {
14 | "message" => "^(?:<%{POSINT:syslog_pri}>)?%{SYSLOGTIMESTAMP:timestamp} %{IPORHOST:host} (?:%{PROG:program}(?:\[%{POSINT:pid}\])?: )?%{GREEDYDATA:message}"
15 | }
16 | }
17 | syslog_pri { }
18 | date {
19 | # season to taste for your own syslog format(s)
20 | match => [ "timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss", "ISO8601" ]
21 | }
22 | }
23 | }
24 |
25 | output {
26 | elasticsearch { }
27 | }
28 |
--------------------------------------------------------------------------------
/recipes/central-syslog/logstash-indexer_NAT.conf:
--------------------------------------------------------------------------------
1 | # Example of extending the syslog filters for a specific syslog format.
2 | # This example matches the NAT logs on my linux firewalls.
3 | # Slip this into the logstash-indexer.conf just before the filter stanza is closed.
4 |
5 |
6 | grep {
7 | type => "syslog"
8 | match => ["@message","^RULE"]
9 | add_tag => "is_Linux_NAT"
10 | drop => false
11 | }
12 | kv {
13 | type => "syslog"
14 | tags => [ "is_Linux_NAT" ]
15 | prefix => "nat_"
16 | }
17 | grok {
18 | type => "syslog"
19 | tags => [ "is_Linux_NAT" ]
20 | pattern => [ "^RULE %{NUMBER:nat_Rule} -- %{DATA:nat_Action} %{GREEDYDATA:message_remainder}" ]
21 | }
22 | mutate {
23 | type => "syslog"
24 | tags => [ "is_Linux_NAT" ]
25 | replace => [ "@message", "NAT - %{nat_Action} -- %{nat_SRC}:%{nat_SPT} -> %{nat_DST}:%{nat_DPT}" ]
26 | }
27 | mutate {
28 | # XXX must not be combined with replacement which uses same field
29 | type => "syslog"
30 | tags => [ "is_Linux_NAT" ]
31 | remove => [ "message_remainder" ]
32 | }
33 |
--------------------------------------------------------------------------------
/recipes/central-syslog/syslog-server.conf:
--------------------------------------------------------------------------------
1 | input {
2 | file {
3 | type => "syslog"
4 | # modify to path to suit your local syslog configuration.
5 | # The below will recursively grab all files in /var/log/rsyslog that end in .log
6 | path => ["/var/log/rsyslog/**/*.log"]
7 | # comment out below after logstash has slurped in all of your existing logs otherwise
8 | # you risk it double indexing if you lose your sincedb file.
9 | start_position => "beginning"
10 | }
11 | }
12 |
13 | output {
14 | redis {
15 | # change below to the hostname or ip address of your redis server. can add more than one redis host.
16 | host => [ "ip.add.of.redis" ]
17 | data_type => 'list'
18 | key => 'logstash'
19 | batch => true
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/recipes/central-syslog/test.rb:
--------------------------------------------------------------------------------
1 | require "test_utils"
2 |
3 | describe "parse syslog" do
4 | extend LogStash::RSpec
5 |
6 | config File.read(File.join(File.dirname(__FILE__), "logstash-indexer.conf"))
7 |
8 | sample("message" => "<164>Oct 26 15:19:25 1.2.3.4 %ASA-4-106023: Deny udp src DRAC:10.1.2.3/43434 dst outside:192.168.0.1/53 by access-group \"acl_drac\" [0x0, 0x0]", "type" => "syslog") do
9 | insist { subject["type"] } == "syslog"
10 | insist { subject["syslog_pri"] } == "164"
11 | insist { subject["message"] } == "Deny udp src DRAC:10.1.2.3/43434 dst outside:192.168.0.1/53 by access-group \"acl_drac\" [0x0, 0x0]"
12 | end
13 |
14 | # Single digit day
15 | sample("message" => "<164>Oct 6 15:19:25 1.2.3.4 %ASA-4-106023: Deny udp src DRAC:10.1.2.3/43434 dst outside:192.168.0.1/53 by access-group \"acl_drac\" [0x0, 0x0]", "type" => "syslog") do
16 | insist { subject["type"] } == "syslog"
17 | insist { subject["syslog_pri"] } == "164"
18 | insist { subject["message"] } == "Deny udp src DRAC:10.1.2.3/43434 dst outside:192.168.0.1/53 by access-group \"acl_drac\" [0x0, 0x0]"
19 | end
20 | end
21 |
--------------------------------------------------------------------------------
/recipes/chef-cookbook/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Chef Cookbooks
4 | tags: chef, automation
5 | ---
6 |
7 | * Goal: Learn how to configure logstash with Chef
8 | * Audience: Folks who want to automate their logging setup
9 |
10 | #
11 |
12 | * Logstash
13 | * [lusis/chef-logstash](https://github.com/lusis/chef-logstash)
14 | includes recipes for beaver, pyshipper, and others
15 | * Elasticsearch
16 | * [elasticsearch/cookbook-elasticsearch](https://github.com/elasticsearch/cookbook-elasticsearch)
17 | * Graphite
18 | * [hwops/graphite](https://github.com/hw-cookbooks/graphite)
19 | * Kibana
20 | * lusis' [Kibana3 cookbook](https://github.com/lusis/chef-kibana)
21 | * Peter Donald's
22 | [kibana cookbook](http://community.opscode.com/cookbooks/kibana)
23 |
24 | This
25 | [blog post](http://devopsanywhere.blogspot.it/2012/07/stash-those-logs-set-up-logstash.html)
26 | explains how to get up and running quickly with these cookbooks.
27 |
--------------------------------------------------------------------------------
/recipes/cisco-asa/extending.conf:
--------------------------------------------------------------------------------
1 | grok {
2 | patterns_dir => "/path/to/your/patterns/file"
3 | match => [
4 | ...
5 | "cisco_message", "%{YOUR_CUSTOM_PATTERN}"
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/recipes/cisco-asa/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Cisco ASA
4 | tags: cisco asa firewall
5 | ---
6 |
7 | * Goal: Demonstrate how to use the built-in Grok patterns to index syslog messages from a Cisco ASA
8 | * Audience: Anyone who has a Cisco ASA firewall or similar appliance
9 |
10 | # Required Version
11 |
12 | These Grok patterns were added to the core of LogStash in version 1.2.0.
13 | If you are running an older version and don't want to upgrade, you can copy
14 | the relevant patterns into a local pattern file and reference the file in
15 | your configuration.
16 |
17 | # The Basic Configuration
18 |
19 | The following configuration shows how you would accept syslog messages on the
20 | default port (UDP/514) from the firewall and parse the message formats included
21 | in LogStash as of version 1.2.0.
22 |
23 | {% include_code logstash.conf %}
24 |
25 | # Extending the Configuration with Custom Message Formats
26 |
27 | If you want to add new formats that aren't included in LogStash, you can easily
28 | do so by tweaking your grok filter as follows. If you take the time to write a
29 | Grok expression for another Cisco ASA log message, please consider contributing
30 | it back to the project so it can be added to the core distribution.
31 |
32 | {% include_code extending.conf %}
33 |
--------------------------------------------------------------------------------
/recipes/cisco-asa/logstash.conf:
--------------------------------------------------------------------------------
1 | input {
2 | udp { port => 514 }
3 | }
4 |
5 | filter {
6 | # Pull the syslog part and Cisco tag out of the message
7 | grok {
8 | match => ["message", "%{CISCO_TAGGED_SYSLOG} %{GREEDYDATA:cisco_message}"]
9 | }
10 |
11 | # Parse the syslog severity and facility
12 | syslog_pri { }
13 |
14 | # Extract fields from the each of the detailed message types
15 | # The patterns provided below are included in core of LogStash 1.2.0.
16 | grok {
17 | match => [
18 | "cisco_message", "%{CISCOFW106001}",
19 | "cisco_message", "%{CISCOFW106006_106007_106010}",
20 | "cisco_message", "%{CISCOFW106014}",
21 | "cisco_message", "%{CISCOFW106015}",
22 | "cisco_message", "%{CISCOFW106021}",
23 | "cisco_message", "%{CISCOFW106023}",
24 | "cisco_message", "%{CISCOFW106100}",
25 | "cisco_message", "%{CISCOFW110002}",
26 | "cisco_message", "%{CISCOFW302010}",
27 | "cisco_message", "%{CISCOFW302013_302014_302015_302016}",
28 | "cisco_message", "%{CISCOFW302020_302021}",
29 | "cisco_message", "%{CISCOFW305011}",
30 | "cisco_message", "%{CISCOFW313001_313004_313008}",
31 | "cisco_message", "%{CISCOFW313005}",
32 | "cisco_message", "%{CISCOFW402117}",
33 | "cisco_message", "%{CISCOFW402119}",
34 | "cisco_message", "%{CISCOFW419001}",
35 | "cisco_message", "%{CISCOFW419002}",
36 | "cisco_message", "%{CISCOFW500004}",
37 | "cisco_message", "%{CISCOFW602303_602304}",
38 | "cisco_message", "%{CISCOFW710001_710002_710003_710005_710006}",
39 | "cisco_message", "%{CISCOFW713172}",
40 | "cisco_message", "%{CISCOFW733100}"
41 | ]
42 | }
43 |
44 | # Parse the date
45 | date {
46 | match => ["timestamp",
47 | "MMM dd HH:mm:ss",
48 | "MMM d HH:mm:ss",
49 | "MMM dd yyyy HH:mm:ss",
50 | "MMM d yyyy HH:mm:ss"
51 | ]
52 | }
53 | }
54 |
55 | output {
56 | elasticsearch_http { host => "localhost" }
57 | }
58 |
--------------------------------------------------------------------------------
/recipes/color-codes/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: removing color codes from linux logs
4 | tags: linux
5 | ---
6 |
7 | * Goal: Remove color codes from Linux logs.
8 | * Audience: People who are trying to parse logs that contain color codes (Unicorn for example)
9 |
10 | # preface: color codes
11 |
12 | Color codes are added to various log files by loggers (Ruby in this case) which are useful when
13 | viewing logs on a Linux server. This causes issues when the logs are forwarded to Logstash by
14 | these servers. The color codes are displayed as normal text but contain non-printable characters.
15 | This will stop patterns working on lines from these logs
16 |
17 | # example
18 |
19 | For this example, we will be using these strings from a Ruby log file:
20 |
21 | D, [2013-01-17T14:59:52.415396 #10324] DEBUG -- : ESC[1mESC[36mProduct Load (0.8ms)ESC[0m ESC[1mSELECT `product`.* FROM `products` WHERE `products`.`id` = 14 LIMIT 1ESC[0m
22 | D, [2013-01-17T14:59:52.436057 #10324] DEBUG -- : ESC[1mESC[35mUser Load (1.0ms)ESC[0m SELECT `users`.* FROM `users` WHERE `users`.`id` = '123' LIMIT 1
23 |
24 | The ESC characters are non-printable and will cause patterns to fail. Adding a mutate on the
25 | incoming message like this:
26 |
27 | # Get rid of color codes
28 | mutate {
29 | gsub => ["@message", "\x1B\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]", ""]
30 | }
31 |
32 | will result in the Ruby logs becoming this:
33 |
34 | D, [2013-01-17T14:59:52.415396 #10324] DEBUG -- : Product Load (0.8ms) SELECT `product`.* FROM `products` WHERE `products`.`id` = 14 LIMIT 1
35 | D, [2013-01-17T14:59:52.436057 #10324] DEBUG -- : User Load (1.0ms) SELECT `users`.* FROM `users` WHERE `users`.`id` = '123' LIMIT 1
36 |
37 | These logs are then more easily grokked as the logs are now in a predictable format.
--------------------------------------------------------------------------------
/recipes/config-snippets/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Config snippets
4 | tags: config snippets
5 | ---
6 |
7 | * Goal: Collect different config snippets that people use
8 | * Audience: Anyone
9 |
10 | Here we can paste all kinds of config snippets that might be useful for other people.
11 |
12 |
13 | # multiline
14 |
15 | ## ModSec
16 |
17 | # merge all modsec events for a given entity into the same event.
18 | multiline {
19 | type => "modsec"
20 | pattern => "^--[a-fA-F0-9]{8}-A--$"
21 | negate => true
22 | what => previous
23 | }
24 |
25 | # Grok
26 |
27 | ## Stunnel
28 |
29 | # Grok patterns for stunnel:
30 | STUNNELPREFIX ^LOG%{NUMBER:level:int}\[%{NUMBER:pid:int}:%{NUMBER:thread_id:int}\]:
31 | STUNNELSERVICE %{STUNNELPREFIX} Service %{WORD:service} %{GREEDYDATA:message} %{IPORHOST:from_host}:%{NUMBER:from_port:int}
32 | STUNNELCONNECT %{STUNNELPREFIX} %{GREEDYDATA:message} %{IPORHOST:to_host}:%{NUMBER:to_port:int}
33 | STUNNELGENERAL %{STUNNELPREFIX} %{GREEDYDATA:message}
34 |
35 |
36 | # Use it in grok:
37 | filter {
38 | grok {
39 | pattern => [ "%{STUNNELSERVICE}", "%{STUNNELCONNECT}", "%{STUNNELGENERAL}" ]
40 | }
41 | }
42 |
43 | ## Squid
44 |
45 | Pattern: %{SYSLOGTIMESTAMP} %{SYSLOGHOST:sourcehost} %{SYSLOGPROG}: %{NUMBER:timestamp} \s+ %{NUMBER:request_msec:float} %{IPORHOST:client} %{WORD:cache_result}/%{NUMBER:status:int} %{NUMBER:size:int} %{WORD:http_type} %{URI:url} - %{WORD:request_type}/%{IPORHOST:forwarded_to} %{GREEDYDATA:content_type}
46 |
47 | ## iptables
48 |
49 | # Grok patterns for iptables:
50 | NETFILTERMAC %{COMMONMAC:dst_mac}:%{COMMONMAC:src_mac}:%{ETHTYPE:ethtype}
51 | ETHTYPE (?:(?:[A-Fa-f0-9]{2}):(?:[A-Fa-f0-9]{2}))
52 | IPTABLES1 (?:IN=%{WORD:in_device} OUT=(%{WORD:out_device})? MAC=%{NETFILTERMAC} SRC=%{IP:src_ip} DST=%{IP:dst_ip}.*(TTL=%{INT:ttl})?.*PROTO=%{WORD:proto}?.*SPT=%{INT:src_port}?.*DPT=%{INT:dst_port}?.*)
53 | IPTABLES2 (?:IN=%{WORD:in_device} OUT=(%{WORD:out_device})? MAC=%{NETFILTERMAC} SRC=%{IP:src_ip} DST=%{IP:dst_ip}.*(TTL=%{INT:ttl})?.*PROTO=%{INT:proto}?.*)
54 | IPTABLES (?:%{IPTABLES1}|%{IPTABLES2})
55 |
56 | # Use it in grok:
57 | filter {
58 | grok {
59 | pattern => "%{IPTABLES}"
60 | }
61 | }
62 |
63 | ## Jenkins logs
64 |
65 | # logstash snippet to remove ConsoleNote stuff in Jenkins logs
66 | # note: there are some escape characters you might not see -- check the source of this cookbook
67 | filter {
68 | mutate {
69 | gsub => ["@message", "\[8mha.*==\[0m", ""]
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/recipes/debug-config/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: debug config/filters
4 | tags: tools, debug, filter, config
5 | ---
6 |
7 | * Goal: debug your (failing) config/filters
8 | * Audience: Folks who want to know why their config breaks
9 |
10 | # Workflow
11 |
12 | This howto will show you how you can debug your logstash-config in a linux-terminal.
13 |
14 | ## initial setup
15 |
16 | You need a console-windows running logstash and your editor, you should be able to switch between them in a reasonably short time :-)
17 |
18 | In the console run start logstash with a clean config-file that only contains the filter you want to debug:
19 | ```bash
20 | java -jar /home/logstash/logstash-1.1.10-flatjar.jar agent -e "$(cat /usr/local/etc/logstash/conf-available/50-filtertest.conf)"
21 | ```
22 | You will have to change the paths in the command according to your setup.
23 |
24 | ## debugging
25 |
26 | The config (`50-filtertest.conf`) should only contain a simple filter:
27 | ```
28 | filter {
29 | grok {
30 | type => "stdin"
31 | patterns_dir => [ "/usr/local/etc/logstash/patterns" ]
32 | pattern => "%{FOO_LOG}"
33 | }
34 | }
35 | ```
36 |
37 | Here we use additional patterns from the file `/usr/local/etc/logstash/patterns`, and try to use the pattern `FOO_LOG`._FIXME
38 |
39 | You now have to paste one/many line/s into the console where logstash is running. This makes logstash run the pasted data through the specified filter.
40 | Try to break the pattern down to something more simple when you get parse-errors.
41 | You have to restart logstash (press `CTRL-c`, `arrow up`, `enter`) every time you change something in the config and want to test that.
42 |
43 | Add debug to your stdout output what can additionally help debugging your patterns and fields:
44 | ```
45 | output { stdout { debug => true } }
46 | ```
47 |
48 | ## example with URIPATHPARAM
49 |
50 | Lets say you have a problem matching `%{URIPATHPARAM}`. You shoud then try to match the following (going from big rules to smaller ones):
51 |
52 | 1. `(?:/[A-Za-z0-9$.+!*'(){},~:;=#%_\-]*)+` (try only to paste the first part of the whole line that should match on URIPATHPARAM)
when that works try the next part of `%{URIPATHPARAM}`
53 | 2. `\?[A-Za-z0-9$.+!*'|(){},~#%&/=:;_?\-\[\]]*` (try to paste the last part of the line
54 |
55 | ## example with SYSLOGBASE
56 | Again try to match every single part of a syslogline:
57 |
58 | 1. `%{MONTH} +%{MONTHDAY} %{TIME}` (%{SYSLOGTIMESTAMP})
59 | 2. `<%{NONNEGINT:facility}.%{NONNEGINT:priority}>` (%{SYSLOGFACILITY})
60 | [...]
61 |
62 | When all single parts match w/o error try to combine all parts beginning with the fist and the second, then continue adding parts and test again.
63 |
--------------------------------------------------------------------------------
/recipes/enable-ipv6-dns-lookups/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Running Logstash in a dual stack ip v4/v6 environment
4 | tags:
5 | ---
6 |
7 | * Goal: Launch logstash with ipv6 dns lookup support
8 | * Audience: Logstash developers, administrators
9 |
10 | # baseline
11 |
12 | On my box, a default startup does not allow ipv6 dns lookups as the default for java is preferring ipv4 over ipv6. So starting logstash with the following option:
13 |
14 | java -Xmx32m -jar /usr/local/logstash/logstash-1.1.1-monolithic.jar agent -f /etc/logstash/logstash.conf -l /var/log/logstash/logstash.log
15 |
16 | results in dns lookups resolving to ipv4 instead of doing ipv6 -> ipv4.
17 |
18 | # baseline + ipv6 enable
19 |
20 | To enable ipv6 dns lookups with logstash you need to enable the following java startup option:
21 |
22 | java -Djava.net.preferIPv6Addresses=true -Xmx32m -jar /usr/local/logstash/logstash-1.1.1-monolithic.jar agent -f /etc/logstash/logstash.conf -l /var/log/logstash/logstash.log
23 |
24 | or just add the option to _JAVA_OPTIONS
25 |
26 | export _JAVA_OPTIONS=’-Djava.net.preferIPv6Addresses=true’
27 |
28 | which will be picked up by java on startup.
29 |
30 | Here is a description on what that option does:
31 | This option does the following: (http://docs.oracle.com/javase/1.4.2/docs/guide/net/properties.html)
32 |
33 | “java.net.preferIPv6Addresses (default: false)
34 |
35 | If IPv6 is available on the operating system the default preference is to prefer an IPv4-mapped address over an IPv6 address. This is for backward compatibility reasons – for example applications that depend on access to an IPv4 only service or applications that depend on the %d.%d.%d.%d representation of an IP address. This property can be set to try to change the preferences to use IPv6 addresses over IPv4 addresses. This allows applications to be tested and deployed in environments where the application is expected to connect to IPv6 services.”
36 |
37 |
--------------------------------------------------------------------------------
/recipes/example/example.txt:
--------------------------------------------------------------------------------
1 | this is a nice example file
2 |
3 | Hello world!
4 |
--------------------------------------------------------------------------------
/recipes/example/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: sample article title
4 | tags: relevant article tags
5 | ---
6 |
7 | * Goal: What will reading this article accomplish?
8 | * Audience: Who should be reading this?
9 |
10 | # First Header
11 |
12 | Some introductory content. Describe some background, give info on the problem
13 | you're trying to solve, and include any relevant links that will help.
14 |
15 | # Second Header
16 |
17 | Get rolling with your topic.
18 |
19 | You can use `{% include_code example.txt %}` on a line by itself to display
20 | files like config files, etc, right in the page. It'll show up like this:
21 |
22 | {% include_code example.txt %}
23 |
24 | # Conclusions
25 |
26 | Don't forget to conclude!
27 |
--------------------------------------------------------------------------------
/recipes/example/index.md.txt:
--------------------------------------------------------------------------------
1 |
2 | ---
3 | layout: article
4 | title: sample article title
5 | tags: relevant article tags
6 | ---
7 |
8 | * Goal: What will reading this article accomplish?
9 | * Audience: Who should be reading this?
10 |
11 | # First Header
12 |
13 | Some introductory content. Describe some background, give info on the problem
14 | you're trying to solve, and include any relevant links that will help.
15 |
16 | # Second Header
17 |
18 | Get rolling with your topic.
19 |
20 | You can use `{% include_code example.txt %}` on a line by itself to display
21 | files like config files, etc, right in the page. It'll show up like this:
22 |
23 | {% include_code example.txt %}
24 |
25 | # Conclusions
26 |
27 | Don't forget to conclude!
28 |
--------------------------------------------------------------------------------
/recipes/log-from-cron/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: logging from cron
4 | tags: cron
5 | ---
6 |
7 | * Goal: Ship cron logs to logstash.
8 | * Audience: Folks who use cron and want to direct log output somewhere
9 |
10 | # preface: email sucks
11 |
12 | The most common way I see folks logging from cron is to use the MAILTO= setting
13 | to direct job output to email. Unfortunately, people easily train themselves
14 | and their mail programs to ignore such mail.
15 |
16 | You probably to save the logs somewhere so you can debug and analyze them later,
17 | and the best place for that is probably not your inbox.
18 |
19 | # using logger, instead
20 |
21 | logger is a tool that reads data on standard input. Here's an example use:
22 |
23 | echo "Hello world" | logger -t sample
24 |
25 | Logger uses the syslog(3) POSIX api, so your local syslog daemon should pick it
26 | up, and you can usually see the result in /var/log/messages:
27 |
28 | Feb 26 23:07:08 seven sample: Hello world
29 |
30 | With this usage in mind, you should see now that you can add '| logger ...' to
31 | your cron jobs and you will get output captured to syslog.
32 |
33 | Once you are using the `logger` tool, you can have your local syslog agent
34 | (syslog-ng, etc) ship those logs to logstash directly. Alternately, you can use
35 | logstash's file input to pick this data up from wherever the logs end up (often
36 | /var/log/messages)
37 |
38 | # example
39 |
40 | For this example, we will be logging output of an old-file cleaner:
41 |
42 | 0 0 * * * find /data/cache -print0 -mtime +7 | xargs -0tn100 rm | logger -t "file cleaner"
43 |
44 | Because the above uses the '-t' flag on xargs, it will print each command
45 | invoked by xargs which is piped to logger and will be logged.
46 |
47 | # caveats
48 |
49 | The 'logger' tool that ships with most linux distros will chop messages into
50 | 1024-byte lengths. This causes longer messages to be spread across multiple
51 | events. Frankly, this is pretty annoying and makes searching for longer logs
52 | quite difficult.
53 |
54 | Additionally, on Linux, syslog(3) writes to /dev/log which is a shared datagram
55 | socket with a limited (but tunable) buffer size. At high log-rates, you *will*
56 | lose events.
57 |
58 | If you have short log messages and your system logs at moderate rates or less,
59 | logger is a decent tool for solving this problem.
60 |
61 | # further reading
62 |
63 | If you're interested in other good cron practices, check out this
64 | [sysdavent article](http://sysadvent.blogspot.com/2009/12/cron-practices.html) about cron practices.
--------------------------------------------------------------------------------
/recipes/log-shippers/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: log shippers
4 | tags: tools, transport
5 | ---
6 |
7 | * Goal: Learn about options for shipping logs
8 | * Audience: Folks who are looking for alternatives to the logstash agent in log transport.
9 |
10 | # tools!
11 |
12 | There are lots of ways to ship logs off of your servers -
13 |
14 | * [beaver](https://github.com/josegonzalez/beaver) - python, multiple outputs
15 | * [woodchuck](https://github.com/danryan/woodchuck) - ruby, multiple outputs
16 | * [awesant](https://github.com/bloonix/awesant) - perl, multiple outputs supported
17 | * [lumberjack](https://github.com/jordansissel/lumberjack) - C, encrypted+compressed transport
18 | * [syslog-shipper](https://github.com/jordansissel/syslog-shipper) - ruby, syslog tcp
19 | * [remote_syslog](https://github.com/papertrail/remote_syslog) - ruby, syslog tcp/tls
20 | * [Message::Passing](https://metacpan.org/module/Message::Passing) - perl, multiple inputs and outputs
21 | * [nxlog](http://nxlog-ce.sourceforge.net) - C, multi platform including windows, tcp/udp/ssl/tls
22 | * [logtail](https://github.com/shtouff/logtail) - perl, from flat files to redis
23 | * [node-logstash](https://github.com/bpaquet/node-logstash) - JavaScript, NodeJS implementation of Logstash
24 |
25 | In general, all of the above perform the task of taking log file logs and
26 | shipping them away in a way that logstash can consume.
27 |
28 |
--------------------------------------------------------------------------------
/recipes/logging-from-nodejs/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: logging from NodeJS
4 | tags: node nodejs bucker
5 | ---
6 |
7 | * Goal: Get NodeJS apps to output logstash compatible log files.
8 | * Audience: Folks who have NodeJS apps and want an easy path to logstash
9 |
10 | # Introduction
11 |
12 | Logging from NodeJS is normally done by sending console output to
13 | a file. Most of the time this is done via upstart or runit to
14 | brute-force capture the console stream.
15 |
16 | Some applications use logging modules but the vast majority of those
17 | wrap the console output to a file with minimal fanfare.
18 |
19 | We use at [And Yet](http://andyet.com) use [bucker](https://npmjs.org/package/bucker)
20 | for that - it's a module that was created by us for internal use and
21 | we have open-sourced it. This gives you all the normal things you expect from
22 | a logging tool: levels, exception handling, middleware integration - and then
23 | it also adds multiple transport options.
24 |
25 | Bucker just gained the ability to output directly to logstash via either
26 | UDP or Redis PubSub and it outputs json_event data for easy input to logstash.
27 |
28 | # Logging to UDP
29 |
30 | {
31 | logstash: {
32 | udp: true, // or send directly over UDP
33 | host: '127.0.0.1', // defaults to localhost
34 | port: 9999, // defaults to 6379 for redis, 9999 for udp
35 | }
36 | }
37 |
38 | # Logging to Redis
39 |
40 | {
41 | logstash: {
42 | redis: true, // send as redis pubsub messages
43 | host: '127.0.0.1', // defaults to localhost
44 | port: 6379, // defaults to 6379 for redis
45 | key: 'bucker_logs' // defaults to 'bucker', this is only used for the redis transport
46 | }
47 | }
48 |
49 | NOTE: you can only send to Redis OR UDP, not both
50 |
51 | # Logstash side
52 |
53 | udp {
54 | host => "127.0.0.1"
55 | type => "udp"
56 | format => "json_event"
57 | }
58 | redis {
59 | host => "127.0.0.1"
60 | type => "redis-channel"
61 | data_type => "channel"
62 | key => "bucker" // bucker will only ever send to this key
63 | }
64 |
--------------------------------------------------------------------------------
/recipes/logging-from-python/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: logging from python
4 | tags: python
5 | ---
6 |
7 | * Goal: Get python apps to output logstash compatible log files.
8 | * Audience: Folks who have python apps and want an easy path to logstash
9 |
10 | # preface: python logging
11 |
12 | Python logging is organised around a few key concepts:
13 |
14 | * _loggers_ are sink for logs
15 | * _handlers_ are storage endpoints for loggers
16 | * _formatters_ generate the actual payload to store in handlers
17 |
18 | The logging path could be represented this way:
19 |
20 | 
21 |
22 | # the proposed architecture
23 |
24 | In this document will push towards letting standard python handlers store
25 | the logs, as opposed to talking directly to elasticsearch, redis, or zmq as
26 | this might be too particular for some use cases.
27 |
28 | The alternative proposed is to use a log formatter which outputs in a format
29 | meant to be directly consumable by the `json` format in logstash.
30 |
31 | On the logstash side a simple file input and `json` format will then be
32 | sufficient.
33 |
34 | # python setup
35 |
36 | A pip package is available for formatting logs correctly: `logstash_formatter`,
37 | available here: https://github.com/exoscale/python-logstash-formatter, other
38 | dependencies are provided by python itself.
39 |
40 | First step in your application will be to create an appropriate logger,
41 | for instance the following code will create a logger ready to output to a file:
42 |
43 | import logging
44 | import logstash_formatter
45 |
46 | logger = logging.getLogger()
47 | handler = logging.FileHandler('/var/log/myapp.log')
48 | formatter = logstash_formatter.LogstashFormatter()
49 |
50 | handler.setFormatter(formatter)
51 | logger.addHandler(handler)
52 |
53 | # logstash setup
54 |
55 | Provided the above logger is used to output messages, the following
56 | logstash input can be used to gather messages in logstash:
57 |
58 | file {
59 | sincedb_path => "/var/lib/logstash/sincedb"
60 | format => "json_event"
61 | type => "myapp"
62 | path => [ "/var/log/myapp.log" ]
63 | }
64 |
--------------------------------------------------------------------------------
/recipes/logging-from-python/python_logging.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/logstash/cookbook/caff4ff5189ffb0b113061b6d97ca511f1a08b78/recipes/logging-from-python/python_logging.png
--------------------------------------------------------------------------------
/recipes/mysql/mysql-general.conf:
--------------------------------------------------------------------------------
1 | filter {
2 | grok {
3 | type => "mysql-general"
4 | # Some lines start with a timestamp: 12103 12:00:06
5 | # Some lines start with a tab (no timestamp)
6 | pattern => "^(?:(?:\s)|(?:%{NUMBER} %{TIME}))\s%{NUMBER:id}"
7 | }
8 |
9 | multiline {
10 | type => "mysql-general"
11 | pattern => "^(?:(?:\s)|(?:%{NUMBER} %{TIME}))\s%{NUMBER:id}"
12 | negate => true
13 | what => previous
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/recipes/mysql/mysql-slow.conf:
--------------------------------------------------------------------------------
1 | filter {
2 | grok {
3 | type => "mysql-slow"
4 | singles => true
5 | pattern => [
6 | "^# User@Host: %{USER:user}(?>\[[^\]]+\])? @ %{HOST:host} \[%{IP:ip}?\]",
7 | "^# Query_time: %{NUMBER:duration:float} Lock_time: %{NUMBER:lock_wait:float} Rows_sent: %{NUMBER:results:int} \s*Rows_examined: %{NUMBER:scanned:int}",
8 | "^SET timestamp=%{NUMBER:timestamp};"
9 | ]
10 | }
11 |
12 | multiline {
13 | type => "mysql-slow"
14 | pattern => "^# Time: "
15 | what => next
16 | }
17 |
18 | multiline {
19 | type => "mysql-slow"
20 | pattern => "^# User@Host: "
21 | negate => true
22 | what => previous
23 | }
24 |
25 | date {
26 | type => "mysql-slow"
27 | timestamp => UNIX
28 | }
29 |
30 | #mutate {
31 | #type => "mysql-slow"
32 | #remove => "timestamp"
33 | #}
34 | }
35 |
--------------------------------------------------------------------------------
/recipes/mysql/test.rb:
--------------------------------------------------------------------------------
1 | describe "mysql logs" do
2 | extend LogStash::RSpec
3 |
4 | describe "slow query log" do
5 | # The logstash config goes here.
6 | # At this time, only filters are supported.
7 | config File.read(File.join(File.dirname(__FILE__), "mysql-slow.conf"))
8 |
9 | data = File.open(__FILE__)
10 | data.each { |line| break if line == "__END__\n" }
11 |
12 | events = data.collect do |line|
13 | LogStash::Event.new("@message" => line.chomp, "@type" => "mysql-slow")
14 | end
15 |
16 | sample events do
17 | p subject
18 | event = subject.first
19 | insist { subject["user"] } == "amavis"
20 | insist { subject["host"] } == "randomhost.local"
21 | insist { subject["ip"] } == "10.1.22.33"
22 | insist { subject["duration"] } == 114
23 | insist { subject["lock_wait"] } == 0
24 | insist { subject["results"] } == 25856
25 | insist { subject["scanned"] } == 10864578
26 | insist { subject.timestamp } == "2012-10-25T15:40:26.000Z"
27 | end
28 | end
29 | end
30 |
31 | __END__
32 | # Time: 121025 15:40:26
33 | # User@Host: amavis[amavis] @ [10.3.67.54]
34 | # Query_time: 114 Lock_time: 0 Rows_sent: 25856 Rows_examined: 10864578
35 | SET timestamp=1351204826;
36 | /* amavis cleanquarantine N:D:S:Amavis 175 */ select msg_headers.id, filename from msg_headers, users where (users.id=msg_headers.user_id) and (date_add(storetime, interval retention day) < now());
37 |
38 |
--------------------------------------------------------------------------------
/recipes/packaging/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Packaging Logstash
4 | tags: rpm,init.d,logrotate,redhat,centos,fedora
5 | ---
6 |
7 | * Goal: Packaging Logstash to install and update it easily via package manager
8 | * Audience: Folks who want to install logstash from a distribution repository
9 |
10 | # Packaging for RedHat/CentOS/Fedora
11 |
12 | * [logstash-pkgs](https://github.com/bloonix/logstash-pkgs) - Spec file, logrotate, init script, sysconfig
13 |
14 |
--------------------------------------------------------------------------------
/recipes/puppet-modules/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Puppet Modules
4 | tags: puppet, automation
5 | ---
6 |
7 | * Goal: Learn about different puppet modules
8 | * Audience: Folks who want to automate their central loggin setup
9 |
10 | # Logstash
11 |
12 | * [electrical/puppet-logstash](https://github.com/electrical/puppet-logstash)
13 |
14 | # Elasticsearch
15 |
16 | * [electrical/puppet-elasticsearch](https://github.com/electrical/puppet-elasticsearch)
17 |
18 | # Bucky
19 |
20 | Bucky is a python replacement of statsd which also supports collectd and metricsd.
21 |
22 | * [electrical/puppet-bucky](https://github.com/electrical/puppet-bucky)
23 |
24 | # Graphite
25 |
26 | * [electrical/puppet-graphite](https://github.com/electrical/puppet-graphite)
27 |
28 | # Beaver
29 |
30 | * [electrical/puppet-beaver](https://github.com/electrical/puppet-beaver)
31 |
32 | # Lumberjack
33 |
34 | * [electrical/puppet-lumberjack](https://github.com/electrical/puppet-lumberjack)
35 |
36 | # Kibana
37 |
38 | * [electrical/puppet-kibana](https://github.com/electrical/puppet-kibana)
39 |
--------------------------------------------------------------------------------
/recipes/rsyslog-agent/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: shipping to logstash with rsyslog
4 | tags: getting started, rsyslog
5 | ---
6 |
7 | * Goal: Use rsyslog as a local agent to ship logs to logstash.
8 | * Audience: Folks who cannot or will not deploy the logstash agent to some or
9 | all servers.
10 |
11 | # preface: why rsyslog?
12 |
13 | It's an alternative to using logstash on your nodes to ship logs.
14 |
15 | The logstash agent, when run from java, can incur significant overhead. The
16 | minimum memory footprint I have been able to achieve is about 100mb. On tiny
17 | virtual machines, this may not be acceptable, so you need an alternative.
18 |
19 | This document is more about solving the "logstash agent is too big" problem.
20 |
21 | I have no preference for rsyslog, but it happens to be a reasonable alternative
22 | to the logstash agent on your edge servers given it ships by default with some
23 | OS distributions.
24 |
25 | # agenda
26 |
27 | * Configure rsyslog
28 | * Configure logstash to accept syslog messages
29 |
30 | # configure rsyslog
31 |
32 | The rsyslog daemon is useful for both taking local syslog messages as well as
33 | for pulling logs from files.
34 |
35 | To watch files with rsyslog, you want to use the
36 | [imfile](http://www.rsyslog.com/doc/imfile.html) rsyslog module.
37 |
38 | For example, let's say we want to forward local syslog as well as apache and
39 | mysql log files to logstash.
40 |
41 | {% include_code rsyslog.conf %}
42 |
43 | # configure logstash
44 |
45 | Now, logstash needs to be told to accept syslog input. This is simple enough.
46 | Here is an example config that takes syslog and emits it to stdout:
47 |
48 | {% include_code logstash.conf %}
49 |
50 | The above sets up logstash to listen on port 5544 for syslog messages.
51 |
--------------------------------------------------------------------------------
/recipes/rsyslog-agent/logstash.conf:
--------------------------------------------------------------------------------
1 | input {
2 | syslog {
3 | type => syslog
4 | port => 5544
5 | }
6 | }
7 |
8 | output {
9 | stdout { }
10 | }
11 |
--------------------------------------------------------------------------------
/recipes/rsyslog-agent/rsyslog.conf:
--------------------------------------------------------------------------------
1 | $ModLoad imfile # Load the imfile input module
2 | $ModLoad imklog # for reading kernel log messages
3 | $ModLoad imuxsock # for reading local syslog messages
4 |
5 | # Watch /var/log/apache2/access.log
6 | $InputFileName /var/log/apache2/access.log
7 | $InputFileTag apache-access:
8 | $InputFileStateFile state-apache-access
9 | $InputRunFileMonitor
10 |
11 | # Watch /var/log/apache2/error.log
12 | $InputFileName /var/log/apache2/error.log
13 | $InputFileTag apache-error:
14 | $InputFileStateFile state-apache-error
15 | $InputRunFileMonitor
16 |
17 | # Watch /var/log/mysql/mysql.log
18 | $InputFileName /var/log/mysql/mysql.log
19 | $InputFileTag mysql:
20 | $InputFileStateFile state-mysql
21 | $InputRunFileMonitor
22 |
23 | # Send everything to a logstash server named 'myserver' on port 5544:
24 | *.* @@myserver:5544
25 |
--------------------------------------------------------------------------------
/recipes/statsd-metrics/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: StatsD plugin metrics explanation
4 | tags: getting started, statsd, metrics, logstash
5 | ---
6 |
7 | * Goal: Help people understand the StatsD metrics
8 | * Audience: Everyone who wants to use the StatsD plugin
9 |
10 | # preface: StatsD Metrics
11 |
12 | I will show you with examples what the following StatsD metrics mean: **increment**, **timing**, **count**
13 |
14 | # Sample Case 1 ( count metric )
15 |
16 | We receive a number each second for a period of 10 seconds. Let's say that the starting moment is "t1".
17 |
18 | As a table this will look like this:
19 |
20 | Moment | Number
21 | ---- |:------------
22 | t1 | 1
23 | t1+1s | 2
24 | t1+2s | 3
25 | t1+3s | 4
26 | t1+4s | 5
27 | t1+5s | 6
28 | t1+6s | 7
29 | t1+7s | 8
30 | t1+8s | 9
31 | t1+9s | 10
32 |
33 | Now the **count** metric will be equal to the sum of all numbers for our period of 10 seconds
34 |
35 | 1+2+3+4+5+6+7+8+9+10 = 55
36 |
37 | # Sample Case 2 (increment metric)
38 |
39 | We receive a status each second for a period of 10 seconds. **status** is just a number.
40 |
41 | For example lets take HTTP statuses: *200*, *404*, *302*. Let's say that the starting moment is "t1".
42 |
43 | As a table this will look like this:
44 |
45 | Moment | Status
46 | ---- |:------------
47 | t1 | 200
48 | t1+1s | 200
49 | t1+2s | 404
50 | t1+3s | 200
51 | t1+4s | 200
52 | t1+5s | 302
53 | t1+6s | 200
54 | t1+7s | 302
55 | t1+8s | 200
56 | t1+9s | 200
57 |
58 | Now the **increment** metric will show how many times a given status is received for our period of 10 seconds.
59 |
60 | In our case this will be:
61 |
62 | * Status 200: 7 times
63 | * Status 404: 1 times
64 | * Status 302: 2 times
65 |
66 | # Sample Case 3 ( timing metric )
67 |
68 | This case is a combination of case 1 and 2. We receive a status each second for a period of 10 seconds,
69 | but each status comes with a second number. Let's imagine that this number is the response time for a HTTP request.
70 |
71 | As a table this will look like this:
72 |
73 | Moment | Status |Response Time
74 | ---- |:------ |:-------------
75 | t1 | 200 | 15ms
76 | t1+1s | 200 | 10ms
77 | t1+2s | 404 | 10ms
78 | t1+3s | 200 | 20ms
79 | t1+4s | 200 | 30ms
80 | t1+5s | 302 | 10ms
81 | t1+6s | 200 | 15ms
82 | t1+7s | 302 | 10ms
83 | t1+8s | 200 | 10ms
84 | t1+9s | 200 | 20ms
85 |
86 | Now the **timing** metric will show things like the highest, lowest, and mean response time for all requests for our period of 10 seconds.
87 |
88 | In this case it doesn't matter what the status number is.
89 |
90 | In our case this will be:
91 |
92 | * Highest: 30ms
93 | * Lowest: 10ms
94 | * Mean: 15+10+10+20+30+10+15+10+10+20 / 10 = 15ms
95 |
96 | # Sample Case 4 ( apache log )
97 |
98 | Now let's have the following excerpt form an apache access log file:
99 |
100 | #Remote_host Request_time Request Status Response_bytes Response_time
101 |
102 | 10.10.10.1 [13/Feb/2013:10:27:02 +0200] "GET / HTTP/1.1" 200 "566" 10000
103 | 10.10.10.1 [13/Feb/2013:10:27:02 +0200] "GET /icons/blank.gif HTTP/1.1" 304 "195" 5000
104 | 10.10.10.1 [13/Feb/2013:10:27:02 +0200] "GET /icons/folder.gif HTTP/1.1" 304 "123" 4000
105 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET / HTTP/1.1" 200 "520" 11000
106 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET /icons/folder.gif HTTP/1.1" 304 "151" 6000
107 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET /icons/blank.gif HTTP/1.1" 304 "158" 5000
108 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET / HTTP/1.1" 200 "502" 12000
109 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET /icons/folder.gif HTTP/1.1" 304 "226" 4000
110 | 10.10.10.1 [13/Feb/2013:10:27:03 +0200] "GET /icons/blank.gif HTTP/1.1" 304 "107" 5000
111 |
112 | Let's have this excerpt from a logstash configuration:
113 |
114 | {% include_code logstash.conf %}
115 |
116 | ,where:
117 |
118 | * reqmusecst - the value in column "Response_time"
119 | * response - the value in column "Status"
120 | * bytes - the value in column "Response_bytes"
121 | * sitename - has a value "site1"
122 |
123 | StatsD will produce the following data for our 10 seconds period from 10:27:00h to 10:27:10h
124 |
125 | * stats_count.logstash.10_10_10_1.apache.site1.response.200 = 3 (we have received 3 times status 200 for our period of 10 seconds)
126 | * stats_count.logstash.10_10_10_1.apache.site1.response.304 = 6 (we have received 6 times status 304 for our period of 10 seconds)
127 | * stats_count.logstash.10_10_10_1.apache.site1.bytes = 566+195+123+520+151+158+502+226+107 = 2548 bytes
128 | * stats.timers.logstash.10_10_10_1.apache.site1.lower = 4000 (lowest response time is 4000 ms for our period of 10 seconds)
129 | * stats.timers.logstash.10_10_10_1.apache.site1.upper = 12000 (highest response time is 12000 ms for our period of 10 seconds)
130 | * stats.timers.logstash.10_10_10_1.apache.site1.mean = (10000 + 5000 + 4000 + 11000 + 6000 + 5000 + 12000 + 4000 + 5000) / 9 = 6888 (mean response time for our period of 10 seconds)
131 | * stats.timers.logstash.10_10_10_1.apache.site1.count = 9 ( total number of responses for our period of 10 seconds)
132 |
133 | StatsD calculates some additional data:
134 |
135 | * stats.logstash.10_10_10_1.apache.site1.response.200 = 3 / 10 = 0.3 (number of responses with status 200 per second)
136 | * stats.logstash.10_10_10_1.apache.site1.response.304 = 6 / 10 = 0.6 (number of responses with status 304 per second)
137 | * stats.logstash.10_10_10_1.apache.site1.bytes = 2548 / 10 = 254.8 (bytes per second)
138 | * stats.timers.logstash.10_10_10_1.apache.site1.count_ps = 9 / 10 = 0.9 (responses per second)
139 |
--------------------------------------------------------------------------------
/recipes/statsd-metrics/logstash.conf:
--------------------------------------------------------------------------------
1 | output {
2 | statsd {
3 | type => "apache-access-ext"
4 | host => "localhost"
5 | port => 8125
6 | namespace => "logstash"
7 | timing => [ "apache.%{sitename}.servetime", "%{reqmusecst}" ]
8 | increment => "apache.%{sitename}.response.%{response}"
9 | count => [ "apache.%{sitename}.bytes", "%{bytes}" ]
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/recipes/syslog-pri/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: parsing syslog messages
4 | tags: getting started, syslog
5 | ---
6 |
7 | * Goal: Parse the syslog facility, severity, timestamp, host, and program
8 | * Audience: Anyone sending RFC3164 syslog messages to logstash
9 |
10 | # preface: parsing syslog
11 |
12 | Until logstash 1.2 there was a native syslog input plugin bundled with
13 | logstash. It was deprecated because different syslog servers use slightly
14 | different formats, so more often than not the syslog filter didn't work.
15 |
16 | It turns out that most of the work of parsing a syslog line can be done with
17 | native logstash `grok` filters. This recipe shows how to do that, which means
18 | if your syslog server isn't quite following RFC 3164 you should still be able
19 | to cope.
20 |
21 | # introduction
22 |
23 | A standard format syslog line contains a Header with facility, severity,
24 | timestamp, and host, and then the Body of the message. We want logstash to
25 | treat the message body as the complete log message, and have fields created
26 | for the other metadata items.
27 |
28 | # agenda
29 |
30 | * Prerequisites
31 | * Parsing BSD Syslog format
32 | * How it works
33 | * If it doesn't work
34 |
35 | # prerequisites
36 |
37 | This recipe requires logstash 1.1.1 or newer.
38 |
39 | # parsing bsd syslog format
40 |
41 | The goal is to turn BSD syslog line in to a sensible Logstash event. To
42 | this end we need to parse the message, modify a few things, then clean
43 | up.
44 |
45 | Here's what the complete configuration looks like:
46 |
47 | {% include_code syslog.conf %}
48 |
49 | # How it works
50 |
51 | Most of the heavy lifting is done by the `grok` filter at the start. It
52 | takes care of parsing the entire syslog message into various fields in
53 | the Logstash event. It also copies some information, like the time Logstash
54 | received the event, in to new fields.
55 |
56 | The `syslog_pri` filter takes care of parsing the priority and facility
57 | number from the Logstash event. By default it looks for a field called
58 | `syslog_pri` and parses it to create new fields that contain the severity and
59 | facility.
60 |
61 | Converting the timestamp of the syslog message is the task of the `date`
62 | filter. It looks for the `syslog_timestamp` field and tries to parse it
63 | using the formats given. If the parsing is successful, it will replace
64 | the `@timestamp` field of the Logstash event with time of the syslog
65 | message.
66 |
67 | The first `mutate` filter replaces the Logstash event metadata with the
68 | correct data from the syslog message. Note that it only executes if the
69 | `grok` filter was successful. This allows for the original message to be
70 | preserved if parsing failed.
71 |
72 | The second `mutate` filter removes redundant fields from the logstash
73 | event. This helps save storage and reduces confusion for people looking
74 | at the event stream. It's not combined with the previous `mutate` filter
75 | because there's no guaranteed order of operation so the removes could
76 | happen before replace resulting in missing data.
77 |
78 | # if it doesn't work
79 |
80 | It may be that your syslog uses a format different from RFC 3164. For example
81 | the timestamp might be in a different format, or the facility and priority
82 | might not be encoded numerically.
83 |
84 | By changing the `grok` filters above, and in particular the regular
85 | expressions and
86 | [`patterns`](https://github.com/logstash/logstash/blob/master/patterns/grok-patterns
87 | "grok patterns at github") used to extract fields, you can easily cope with
88 | different formats.
89 |
90 | You can also receive logs from different syslog servers using different
91 | formats, simply by tieing a series of filters to a `type` or `tag`.
92 |
93 | Please ask on the logstash IRC channel if you need any help.
94 |
95 |
--------------------------------------------------------------------------------
/recipes/syslog-pri/syslog.conf:
--------------------------------------------------------------------------------
1 | input {
2 | tcp {
3 | port => 5000
4 | type => syslog
5 | }
6 | udp {
7 | port => 5000
8 | type => syslog
9 | }
10 | }
11 |
12 | filter {
13 | if [type] == "syslog" {
14 | grok {
15 | match => { "message" => "<%{POSINT:syslog_pri}>%{SYSLOGTIMESTAMP:syslog_timestamp} %{SYSLOGHOST:syslog_hostname} %{DATA:syslog_program}(?:\[%{POSINT:syslog_pid}\])?: %{GREEDYDATA:syslog_message}" }
16 | add_field => [ "received_at", "%{@timestamp}" ]
17 | add_field => [ "received_from", "%{host}" ]
18 | }
19 | syslog_pri { }
20 | date {
21 | match => [ "syslog_timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ]
22 | }
23 | if !("_grokparsefailure" in [tags]) {
24 | mutate {
25 | replace => [ "@source_host", "%{syslog_hostname}" ]
26 | replace => [ "@message", "%{syslog_message}" ]
27 | }
28 | }
29 | mutate {
30 | remove_field => [ "syslog_hostname", "syslog_message", "syslog_timestamp" ]
31 | }
32 | }
33 | }
34 |
35 | output {
36 | # Example just to output to elasticsearch
37 | elasticsearch { }
38 | }
39 |
--------------------------------------------------------------------------------
/recipes/using-init/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: running logstash with init
4 | tags: init,rhel
5 | ---
6 |
7 | * Goal: Run the logstash agent as a service using an init script.
8 | * Target audience: Users who have RHEL based servers.
9 |
10 | {% include_code logstash.sh %}
11 |
12 |
--------------------------------------------------------------------------------
/recipes/using-init/logstash.sh:
--------------------------------------------------------------------------------
1 | #! /bin/sh
2 | #
3 | # /etc/rc.d/init.d/logstash
4 | #
5 | # Starts Logstash as a daemon
6 | #
7 | # chkconfig: 2345 20 80
8 | # description: Starts Logstash as a daemon
9 | # pidfile: /var/run/logstash-agent.pid
10 |
11 | ### BEGIN INIT INFO
12 | # Provides: logstash
13 | # Required-Start: $local_fs $remote_fs
14 | # Required-Stop: $local_fs $remote_fs
15 | # Default-Start: 2 3 4 5
16 | # Default-Stop: S 0 1 6
17 | # Short-Description: Logstash
18 | # Description: Starts Logstash as a daemon.
19 | # Modified originally from https://gist.github.com/2228905#file_logstash.sh
20 |
21 | ### END INIT INFO
22 |
23 | # Amount of memory for Java
24 | #JAVAMEM=256M
25 |
26 | # Location of logstash files
27 | LOCATION=/opt/logstash
28 |
29 | PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
30 | DESC="Logstash Daemon"
31 | NAME=java
32 | DAEMON=$(which java)
33 | CONFIG_DIR=/opt/logstash/logstash.conf
34 | LOGFILE=/opt/logstash/logstash.log
35 | JARNAME=logstash-monolithic.jar
36 | #ARGS="-Xmx$JAVAMEM -Xms$JAVAMEM -jar ${JARNAME} agent --config ${CONFIG_DIR} --log ${LOGFILE} --grok-patterns-path ${PATTERNSPATH}"
37 | ARGS="-jar ${JARNAME} agent --config ${CONFIG_DIR} --log ${LOGFILE}"
38 | SCRIPTNAME=/etc/init.d/logstash
39 | PIDFILE=/var/run/logstash.pid
40 | base=logstash
41 |
42 | # Exit if the package is not installed
43 | if [ ! -x "$DAEMON" ]; then
44 | {
45 | echo "Couldn't find $DAEMON"
46 | exit 99
47 | }
48 | fi
49 |
50 | . /etc/init.d/functions
51 |
52 | #
53 | # Function that starts the daemon/service
54 | #
55 | do_start()
56 | {
57 | cd $LOCATION && \
58 | ($DAEMON $ARGS &) \
59 | && success || failure
60 | }
61 |
62 | set_pidfile()
63 | {
64 | pgrep -f "$DAEMON[[:space:]]*$ARGS" > $PIDFILE
65 | }
66 |
67 | #
68 | # Function that stops the daemon/service
69 | #
70 | do_stop()
71 | {
72 | pid=`cat $PIDFILE`
73 | if checkpid $pid 2>&1; then
74 | # TERM first, then KILL if not dead
75 | kill -TERM $pid >/dev/null 2>&1
76 | usleep 100000
77 | if checkpid $pid && sleep 1 &&
78 | checkpid $pid && sleep $delay &&
79 | checkpid $pid ; then
80 | kill -KILL $pid >/dev/null 2>&1
81 | usleep 100000
82 | fi
83 | fi
84 | checkpid $pid
85 | RC=$?
86 | [ "$RC" -eq 0 ] && failure $"$base shutdown" || success $"$base shutdown"
87 |
88 | }
89 |
90 | case "$1" in
91 | start)
92 | echo -n "Starting $DESC: "
93 | do_start
94 | touch /var/lock/subsys/$JARNAME
95 | set_pidfile
96 | ;;
97 | stop)
98 | echo -n "Stopping $DESC: "
99 | do_stop
100 | rm /var/lock/subsys/$JARNAME
101 | rm $PIDFILE
102 | ;;
103 | restart|reload)
104 | echo -n "Restarting $DESC: "
105 | do_stop
106 | do_start
107 | touch /var/lock/subsys/$JARNAME
108 | set_pidfile
109 | ;;
110 | status)
111 | status -p $PIDFILE
112 | ;;
113 | *)
114 | echo "Usage: $SCRIPTNAME {start|stop|status|restart}" >&2
115 | exit 3
116 | ;;
117 | esac
118 |
119 | echo
120 | exit 0
121 |
--------------------------------------------------------------------------------
/recipes/using-upstart/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: running logstash with upstart
4 | tags: upstart,ubuntu
5 | ---
6 |
7 | * Goal: Run the logstash agent and elasticsearch through the upstart system daemon.
8 | * Target audience: Users who have servers with upstart.
9 |
10 | # what is upstart?
11 |
12 | Upstart ships with most modern Ubuntu distros. Here are the steps you need to use this:
13 |
14 | # upstart logstash config
15 |
16 | 1. Put the config below in `/etc/init/logstash-agent.conf`
17 | 2. To start it: `sudo initctl start logstash-agent`
18 |
19 | {% include_code logstash-agent.conf %}
20 |
21 | # Upstart 1.5+
22 |
23 | Starting with upstart 1.5 you can use upstart's own semantics for all of the boilerplate code you can see.
24 | Here's an example for an indexer:
25 |
26 | {% include_code logstash-indexer.conf %}
27 |
--------------------------------------------------------------------------------
/recipes/using-upstart/logstash-agent.conf:
--------------------------------------------------------------------------------
1 | # logstash - agent instance
2 | #
3 |
4 | description "logstash agent instance"
5 |
6 | start on virtual-filesystems
7 | stop on runlevel [06]
8 |
9 | # Respawn it if the process exits
10 | respawn
11 | respawn limit 5 30
12 | limit nofile 65550 65550
13 | expect fork
14 |
15 | # You need to chdir somewhere writable because logstash needs to unpack a few
16 | # temporary files on startup.
17 | chdir /home/logstash
18 |
19 | script
20 |
21 | # This runs logstash agent as the 'logstash' user
22 | su -s /bin/sh -c 'exec "$0" "$@"' logstash -- /usr/bin/java -jar logstash.jar agent -f /etc/logstash/agent.conf --log /var/log/logstash.log &
23 | emit logstash-agent-running
24 | end script
25 |
--------------------------------------------------------------------------------
/recipes/using-upstart/logstash-indexer.conf:
--------------------------------------------------------------------------------
1 | # logstash - indexer instance
2 | #
3 |
4 | description "logstash indexer instance"
5 |
6 | start on virtual-filesystems
7 | stop on runlevel [06]
8 |
9 | respawn
10 | respawn limit 5 30
11 | limit nofile 65550 65550
12 |
13 | # set HOME to point to where you want the embedded elasticsearch
14 | # data directory to be created and ensure /opt/logstash is owned
15 | # by logstash:adm
16 |
17 | #env HOME=/opt/logstash
18 |
19 | #env JAVA_OPTS='-Xms512m -Xmx512m'
20 |
21 | chdir /opt/logstash
22 | setuid logstash
23 | setgid adm
24 | console log
25 |
26 | # for versions 1.1.1 - 1.1.4 the internal web service crashes when touched
27 | # and the current workaround is to just not run it and run Kibana instead
28 |
29 | script
30 | exec java -jar logstash.jar agent -f /etc/indexer.conf --log /var/log/logstash-indexer.out
31 | end script
32 |
33 |
--------------------------------------------------------------------------------
/recipes/version-updater/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: Version Updater
4 | tags: python,script,automate,version,update
5 | ---
6 |
7 | * Goal: Keep your logstash.jar up to date with the latest monolithic jar!
8 | * Audience: Those of us who want to keep it fresh.
9 |
10 | # Requirements
11 |
12 | * Python 2.7+
13 | * sh module
14 | * lxml module
15 |
16 | # Quick guide
17 |
18 | If you don't have the required modules install them with pip:
19 |
20 | pip install sh lxml
21 |
22 | If that complains about missing dependencies and you are running Ubuntu try installing these:
23 |
24 | apt-get -y install python-dev python-pip libxml2-dev libxslt-dev
25 |
26 | # Using the updater
27 |
28 | Below is the usage as printed by help.
29 |
30 | usage: version-updater.py [-h] [-d DIR] [-v VERSION]
31 |
32 | optional arguments:
33 | -h, --help show this help message and exit
34 | -d DIR, --dir DIR Give the location of your logstash directory, ex:
35 | /opt/logstash
36 | -v VERSION, --version VERSION
37 | Which version do you want to install, ex: 1.
38 |
39 | As an example here is how to get version 1.1.8 downloaded to /opt/logstash:
40 |
41 | python version-updater.py -d /opt/logstash -v 1.1.8
42 |
43 | You can set the defaults inside of the script if you want to run it without passing any arguments.
44 |
45 | # Code
46 |
47 | {% include_code version-updater.py %}
48 |
--------------------------------------------------------------------------------
/recipes/version-updater/version-updater.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import re
3 | import sh
4 | import io
5 | import os
6 | import sys
7 | import urllib
8 | import argparse
9 |
10 | from lxml import objectify, etree
11 | from distutils.version import StrictVersion
12 |
13 |
14 | def get_args():
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument("-d", "--dir", dest="dir", required=False, default="/opt/logstash",
17 | help="Give the location of your logstash directory, ex: /opt/logstash")
18 | parser.add_argument("-v", "--version", dest="version", required=False, default="latest",
19 | help="Which version do you want to install, ex: 1.1.9 or latest")
20 | return parser.parse_args()
21 |
22 |
23 | def version_info(url, dir):
24 | """ This method checks the available versions from the remote url as well
25 | as the local version if it exists. """
26 |
27 | # Checking the Logstash version might take a while, be patient.
28 | response = urllib.urlopen(url).read().replace('','')
29 | response = response.replace(" xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"", "")
30 |
31 | xml = objectify.fromstring(response)
32 | xmltree = xml.getchildren()
33 |
34 | try:
35 | open('%s/logstash.jar' % dir)
36 | except IOError, e:
37 | if e.errno == 13:
38 | sys.exit("We don't have permission to read/write the file")
39 | elif e.errno == 2:
40 | print("The file %s/logstash.jar does not exist, let's figure out which one to get." % dir)
41 | else:
42 | current_version = sh.java("-jar", "%s/logstash.jar" % dir, "agent", "-V").split(" ")[1].strip('\n')
43 | return xmltree, current_version
44 | finally:
45 | return xmltree
46 |
47 |
48 | def updater(url, xmltree, current_version, args):
49 | # Version as key, with a dict as value. Example:
50 | # {'1.1.1': {'releasename': 'release/logstash-1.1.1-monolithic.jar'}}
51 | available_versions = {'latest': {'releasename': None, 'version': '0.0.0'}}
52 |
53 | version_parser = re.compile("release\/logstash-(.*)-monolithic\.jar")
54 |
55 | try:
56 | if args.version == "latest":
57 | user_version = "latest"
58 | else:
59 | user_version = args.version
60 | except ValueError:
61 | sys.exit("Supplied value for version is incorrect or this script is outdated.")
62 |
63 | # Iterating over the releases and putting them in a dict.
64 | for subtree in xmltree:
65 | for release in subtree.iterchildren():
66 | if release.tag == "Key":
67 | release = str(release)
68 | matching = re.search(version_parser, release)
69 | if matching:
70 | available_versions[matching.group(1)] = {'releasename': release, 'version': matching.group(1)}
71 |
72 | # Determine which version is the latest and assign it to available_versions['latest']
73 | for version in available_versions:
74 | if version != "latest":
75 | if StrictVersion(version) > StrictVersion(available_versions['latest']['version']):
76 | available_versions['latest'] = available_versions[version]
77 |
78 | try:
79 | if user_version:
80 | print "You want the %s version, ok." % user_version
81 | new_version_url = url+available_versions[user_version]['releasename']
82 | urllib.urlretrieve(new_version_url, filename="%s/%s" % (args.dir, "logstash.jar"))
83 | elif current_version != "0.0.0":
84 | if StrictVersion(current_version) < StrictVersion(available_versions['latest']):
85 | print "There is a newer version (%s) than the one you have (%s)" % (available_versions['latest'], current_version)
86 | new_version_url = url+available_versions[version]['releasename']
87 | urllib.urlretrieve(new_version_url, filename="%s/%s" % (args.dir, "logstash.jar"))
88 | else:
89 | print "Let's get the latest one which is %s" % available_versions['latest']
90 | new_version_url = url+available_versions['latest']['releasename']
91 | urllib.urlretrieve(new_version_url, filename="%s/%s" % (args.dir, "logstash.jar"))
92 | except KeyError:
93 | sys.exit("Not sure that version is available.")
94 | except Exception, e:
95 | sys.exit(e)
96 |
97 |
98 | def run():
99 | args = get_args()
100 | url = "https://logstash.objects.dreamhost.com/" # xml listing of releases
101 |
102 | logstash_info = version_info(url, args.dir)
103 |
104 | # The first part of this tuple is the xmltree, the second indices is the version string
105 | if isinstance(logstash_info, tuple):
106 | updater(url=url, xmltree=logstash_info[0], current_version=logstash_info[1], args=args)
107 | else:
108 | updater(url=url, xmltree=logstash_info, current_version="0.0.0", args=args)
109 |
110 | if __name__ == '__main__':
111 | run()
112 |
113 |
--------------------------------------------------------------------------------
/recipes/windows-service/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: running logstash as a windows service
4 | tags: service,windows
5 | ---
6 |
7 | * Goal: Start logstash on boot up.
8 | * Target audience: Users who have logstash agents on windows servers.
9 |
10 | # Service wrapper installation
11 |
12 | - Download NSSM - the Non-Sucking Service Manager from http://nssm.cc and copy nssm.exe to a directory in your path like c:\windows.
13 | - Copy logstash jar into c:\logstash
14 | - Create directory c:\logstash\sincedb
15 | - Create a batch file c:\logstash\logstash.bat
16 | - Create c:\logstash\logstash.conf file
17 | - Create the windows service by typing the following on the command line: nssm install Logstash C:\logstash\logstash.bat
18 |
19 | Verify you have a service named "Logstash". Set the startup type to "Automatic" or "Automatic (delayed start)"
20 |
21 |
22 | {% include_code logstash.bat %}
23 |
24 |
25 | {% include_code logstash.conf %}
26 |
27 |
28 | # Other Java service wrappers
29 |
30 | http://winrun4j.sourceforge.net/
31 |
32 | http://yajsw.sourceforge.net/
33 |
--------------------------------------------------------------------------------
/recipes/windows-service/logstash.bat:
--------------------------------------------------------------------------------
1 | C:
2 | cd \logstash
3 |
4 | set HOME=c:/logstash/sincedb
5 |
6 | "c:\Program Files\Java\jdk1.6.0_37\bin\java.exe" -Xmx256m -jar logstash-1.1.7-monolithic.jar agent --config logstash.conf --log logstash.log
7 |
8 |
--------------------------------------------------------------------------------
/recipes/windows-service/logstash.conf:
--------------------------------------------------------------------------------
1 | input {
2 |
3 | file {
4 | path => "C:/inetpub/logs/app/logfile.log"
5 | type => log4j_xml
6 | }
7 | }
8 |
9 |
10 | output {
11 | redis {
12 |
13 | host => "your-redis-server"
14 | key => "logstash-key"
15 | data_type => "list"
16 | }
17 |
18 | }
19 |
20 |
--------------------------------------------------------------------------------
/regen.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | . $HOME/.rvm/scripts/rvm
4 | git pull --rebase
5 | jekyll --kramdown --no-auto
6 | compass compile --css-dir _site/css
7 |
--------------------------------------------------------------------------------
/robots.txt:
--------------------------------------------------------------------------------
1 | # www.robotstxt.org/
2 | # http://code.google.com/web/controlcrawlindex/
3 |
4 | User-agent: *
5 |
--------------------------------------------------------------------------------
/sass/_palette.scss:
--------------------------------------------------------------------------------
1 | $boxminwidth: 300px;
2 | $margin: 1.282%;
3 |
4 | /* to convert from colorschemedesigner css to variables, copy the CSS given and run this:
5 | :.!xclip -o | sed -re 's/ .*:/:/; /^ *$/d' | tr . '$' | sed -e 's/ }/;/'
6 | */
7 | // From http://colorschemedesigner.com/#3341Tw0w0w0w0
8 | $primary-1: #3EA024;
9 | $primary-2: #417B32;
10 | $primary-3: #216C0D;
11 | $primary-4: #71D057;
12 | $primary-5: #8BD078;
13 | $secondary-a-1: #1B7969;
14 | $secondary-a-2: #255D53;
15 | $secondary-a-3: #0A5145;
16 | $secondary-a-4: #4FBDAA;
17 | $secondary-a-5: #6DBDAF;
18 | $secondary-b-1: #BE662A;
19 | $secondary-b-2: #925E3B;
20 | $secondary-b-3: #803D0F;
21 | $secondary-b-4: #DF925E;
22 | $secondary-b-5: #DFA781;
23 | $complement-1: #B62939;
24 | $complement-2: #8C3842;
25 | $complement-3: #7B0F1B;
26 | $complement-4: #DB5C6B;
27 | $complement-5: #DB7F89;
28 |
29 | .primary-1 { background-color: $primary-1 }
30 | .primary-2 { background-color: $primary-2 }
31 | .primary-3 { background-color: $primary-3 }
32 | .primary-4 { background-color: $primary-4 }
33 | .primary-5 { background-color: $primary-5 }
34 | .secondary-a-1 { background-color: $secondary-a_1 }
35 | .secondary-a-2 { background-color: $secondary-a_2 }
36 | .secondary-a-3 { background-color: $secondary-a_3 }
37 | .secondary-a-4 { background-color: $secondary-a_4 }
38 | .secondary-a-5 { background-color: $secondary-a_5 }
39 | .secondary-b-1 { background-color: $secondary-b_1 }
40 | .secondary-b-2 { background-color: $secondary-b_2 }
41 | .secondary-b-3 { background-color: $secondary-b_3 }
42 | .secondary-b-4 { background-color: $secondary-b_4 }
43 | .secondary-b-5 { background-color: $secondary-b_5 }
44 | .complement-1 { background-color: $complement-1 }
45 | .complement-2 { background-color: $complement-2 }
46 | .complement-3 { background-color: $complement-3 }
47 | .complement-4 { background-color: $complement-4 }
48 | .complement-5 { background-color: $complement-5 }
49 |
--------------------------------------------------------------------------------
/sass/style.scss:
--------------------------------------------------------------------------------
1 | @import "compass";
2 | @import "palette";
3 |
4 | @media all and (max-width: 699px) and (min-width: 520px), (min-width: 1151px) {
5 | }
6 |
7 | html {
8 | height: 100%;
9 | margin: 0px;
10 | padding: 0px;
11 | @extend .secondary-a-3;
12 |
13 | body {
14 | box-shadow: -0px -5px 20px 5px $primary_2;
15 | @extend .primary-3;
16 | color: #FEE;
17 | margin-top: 0;
18 | max-width: 1000px;
19 | min-height: 100%;
20 | margin-left: auto;
21 | margin-right: auto;
22 | border-left: 1px solid black;
23 | border-right: 1px solid black;
24 | font-family: "Gudea", Helvetica, Arial, sans-serif;
25 | @media (max-width: 400px) {
26 | /* iphones and other small screens */
27 | font-size: 10pt;
28 | }
29 | font-size: 12pt;
30 | } /* body */
31 | } /* html */
32 |
33 | .readmargin {
34 | padding-left: 10px;
35 | padding-right: 10px;
36 | }
37 |
38 | .splash {
39 | border-bottom: 2px solid black;
40 | overflow: auto;
41 | width: 100%;
42 |
43 | background-color: $primary-1 !important;
44 | .inner {
45 | overflow: auto;
46 |
47 | padding: 40px;
48 | @media (max-width: 600px) {
49 | background-color: rgba(red($primary-1), green($primary-2), blue($primary-2), 0.7 );
50 | }
51 | @media (max-width: 400px) {
52 | padding: 5px !important;
53 | }
54 | }
55 | h1 {
56 | text-shadow: black 1px 1px 8px;
57 | margin: 0px;
58 | font-size: 300%;
59 | line-height: 1;
60 | letter-spacing: -1px;
61 | }
62 |
63 | p {
64 | font-size: 18px;
65 | font-weight: 200;
66 | line-height: 27px;
67 | color: inherit;
68 | }
69 | } /* splash */
70 |
71 | .featurelet {
72 | min-width: $boxminwidth;
73 | float: left;
74 | margin-top: 10px !important;
75 | color: white;
76 |
77 | // 3 columns
78 | width: ((100 - ($margin * 4)) / 3);
79 | margin-left: $margin;
80 | .inner {
81 | @extend .secondary-b-2;
82 | @include background-image(linear-gradient(top, lighten($secondary-b-2, 5%) 0%,darken($secondary-b-2, 10%) 100%));
83 | width: 100%;
84 | height: 100%;
85 | border: 2px solid black !important;
86 | border-radius: 15px !important;
87 | height: 10em;
88 | padding-bottom: $margin;
89 | overflow: hidden;
90 | @include transition("box-shadow, border, background", 0.5s);
91 |
92 | h1, > h2, > h3 {
93 | text-shadow: black 2px 2px 3px;
94 | margin: 0;
95 | }
96 |
97 | > p {
98 | text-shadow: black 1px 1px 2px;
99 | }
100 |
101 | a {
102 | color: inherit;
103 | text-decoration: none;
104 | }
105 |
106 | > * {
107 | padding-left: 15px;
108 | padding-right: 15px;
109 | }
110 |
111 | > *:first-child {
112 | padding-top: 10px;
113 | }
114 |
115 | > *:last-child {
116 | padding-bottom: 15px;
117 | }
118 |
119 | }
120 | } /* .featurelet */
121 |
122 | .featurelet .inner:hover {
123 | @extend .secondary-b-3;
124 | box-shadow: 0 0 15px 3px $complement-5;
125 | border: 2px solid $secondary-b-5 !important;
126 | @include background-image(linear-gradient(top, lighten($secondary-b-2, 5%) 0%,darken($secondary-b-2, 20%) 100%));
127 | cursor: pointer;
128 | background-position: 0% 5%;
129 | }
130 |
131 | .features {
132 | overflow: hidden;
133 | padding-bottom: 1em;
134 | width: 100%;
135 | @include background-image(linear-gradient(top, lighten($primary-2, 5%) 0%,darken($primary-2, 10%) 100%));
136 | border-bottom: 2px solid black;
137 | border-bottom-left-radius: 15px;
138 | border-bottom-right-radius: 15px;
139 | }
140 |
141 | .details {
142 | overflow: hidden;
143 | padding-bottom: 1em;
144 | width: 100%;
145 | }
146 |
147 | .details .info {
148 | min-width: $boxminwidth;
149 | margin-left: $margin;
150 |
151 | // two columns.
152 | width: ((100 - ($margin * 3)) / 2);
153 |
154 | float: left;
155 | margin-top: 10px;
156 | color: white;
157 |
158 | > *:last-child {
159 | border-left: 0px;
160 | }
161 |
162 | .inner {
163 | width: 100%;
164 | }
165 | }
166 |
167 | header {
168 | @extend .secondary-a-1;
169 | box-shadow: -0px -5px 40px 15px $secondary_a_4;
170 | border-bottom: 2px solid black;
171 |
172 | .logo a {
173 | font-family: 'Noticia Text', serif;
174 | font-size: 2em;
175 | color: #FFFFFF;
176 | font-weight: bold;
177 | text-shadow: #A62F00 1px 1px 3px;
178 | width: 100%;
179 | @include transition("color", 0.5s);
180 | }
181 | .logo a:hover {
182 | color: #FFAD40;
183 | text-decoration: none;
184 | }
185 |
186 | .logo {
187 | }
188 | .edit-this-page {
189 | text-align: right;
190 | }
191 |
192 | .search {
193 | float: right;
194 |
195 | input[type="submit"] {
196 | display: none;
197 | }
198 | }
199 |
200 | .float-right {
201 | float: right;
202 | }
203 | padding: 1em;
204 | }
205 |
206 | a {
207 | color: lighten($complement-5, 20%);
208 | font-weight: bold;
209 | text-decoration: none;
210 | }
211 |
212 | a:hover {
213 | color: #BDF;
214 | text-decoration: underline;
215 | }
216 |
217 | h1, h2, h3 {
218 | font-family: "Gudea", sans-serif !important;
219 | }
220 |
221 | #content {
222 | padding-bottom: 1em;
223 | }
224 |
225 | .article-splash {
226 | border-bottom: 2px solid black;
227 | @extend .primary-1;
228 |
229 | .inner {
230 | @media (max-width: 400px) {
231 | padding: 5px;
232 | }
233 | padding: 40px;
234 | padding-top: 1em;
235 | padding-bottom: 1em;
236 | }
237 |
238 | h1 {
239 | text-shadow: black 1px 1px 8px;
240 | margin: 0px;
241 | font-size: 300%;
242 | line-height: 1;
243 | letter-spacing: -1px;
244 | display: inline;
245 | }
246 | } /* article-splash */
247 |
248 | .article-splash ~ * {
249 | margin-left: 20px;
250 | margin-right: 20px;
251 | }
252 |
253 | .table-of-contents {
254 | border: 1px solid black;
255 | min-width: $boxminwidth;
256 | @media (max-width: 400px) {
257 | min-width: ($boxminwidth - 40);
258 | }
259 | border-radius: 15px;
260 | box-shadow: -0px -0px 20px 1px $primary_4;
261 | @extend .secondary-a-3;
262 | float: right;
263 | margin: $margin;
264 | padding: 1em;
265 | width: 30%;
266 |
267 | ul, ol {
268 | margin: 0;
269 | padding-left: 1em;
270 | }
271 |
272 |
273 | h1 {
274 | font-size: 130%;
275 | padding: 0;
276 | margin: 0;
277 | }
278 |
279 | .topic-h1 { margin-left: 1em; }
280 | .topic-h2 { margin-left: 2em; }
281 | .topic-h3 { margin-left: 3em; }
282 | } /* .table-of-contents */
283 |
284 | .article {
285 | .bullet-list {
286 | margin: 0;
287 | margin-top: 1.5em;
288 | padding: $margin;
289 | @extend .primary-2;
290 | border-radius: 15px;
291 | box-shadow: 0px 0px 7px 7px $primary_2;
292 | max-width: 50%;
293 |
294 | min-width: $boxminwidth;
295 | @media (max-width: 400px) {
296 | min-width: ($boxminwidth - 40);
297 | }
298 |
299 | opacity: 0.9;
300 |
301 | li.bullet:last {
302 | margin-bottom: 0em;
303 | }
304 |
305 | li.bullet {
306 | font-size: 110%;
307 | margin: 0;
308 | list-style-type: none;
309 | padding-top: 1px;
310 | padding-bottom: 1px;
311 | }
312 |
313 | .bullet-title {
314 | font-weight: bold;
315 | padding-right: 5px;
316 | }
317 | }
318 |
319 | pre.code-header {
320 | margin-top: 0px; /* for 'view plain text' link */
321 | margin-bottom: 0px;
322 | border-top-right-radius: 0px;
323 | border-bottom-left-radius: 0px;
324 | border-bottom-right-radius: 0px;
325 | padding: 2px;
326 | padding-left: 10px;
327 | overflow: hidden;
328 | @extend .secondary-a-4;
329 | color: black;
330 | * {
331 | color: black;
332 | }
333 | }
334 |
335 | pre.code {
336 | margin-top: 0px;
337 | border-top-left-radius: 0px;
338 | border-top-right-radius: 0px;
339 | }
340 |
341 | pre.wrap {
342 | white-space: pre-wrap;
343 | overflow: auto;
344 | }
345 |
346 | .has-view-text-link {
347 | margin-top: 0;
348 | }
349 |
350 | pre {
351 | max-width: 100%;
352 | border: 1px solid $primary-5;
353 | color: white;
354 | overflow-x: auto;
355 | margin-left: 0px;
356 | margin-right: 0px;
357 | border-top-right-radius: 0px;
358 | overflow-x: auto;
359 | margin-left: 1em;
360 | margin-right: 1em;
361 | @extend .primary-2;
362 | border-radius: 8px;
363 | padding: 10px;
364 | }
365 |
366 | /* The 'view plain text' boxes */
367 | div.plain-text-link {
368 | @media (min-width: 400px) {
369 | //display: none;
370 | }
371 | text-align: right;
372 | width: 100%;
373 | margin-bottom: -1px;
374 | padding: 0px;
375 | /* The 'view plain text' link */
376 | a {
377 | padding-left: 1em;
378 | padding-right: 1em;
379 | @media (min-width: 400px) {
380 | margin-right: 1em; /* match
tag margin */
381 | }
382 | @include background-image(linear-gradient(top, darken($primary-2, 8%) 0%, $primary-2 100%));
383 | border-left: 1px solid $primary-5;
384 | border-right: 1px solid $primary-5;
385 | border-top: 1px solid $primary-5;
386 | border-top-left-radius: 8px;
387 | border-top-right-radius: 8px;
388 | text-decoration: none;
389 | font-weight: bold;
390 | }
391 | }
392 |
393 | a {
394 | color: inherit;
395 | text-decoration: underline;
396 | }
397 |
398 | } /* .article */
399 |
--------------------------------------------------------------------------------
/search.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: search
3 | title: search results
4 | ---
5 |
6 |
--------------------------------------------------------------------------------
/studies/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: landing
3 | title: log studies
4 | ---
5 |
6 | # Logs in the Wild
7 |
8 | it's like cultural anthropology, but for logs.
9 |
10 | ## [mysql](mysql)
11 |
12 | Go deep into mysql's logs. It has many to show you.
13 |
14 | ## [Your Study Here!](https://github.com/logstash/cookbook)
15 |
16 | ## [Your Study Here!](https://github.com/logstash/cookbook)
17 |
18 | ### Log Studies?
19 |
20 | In order to find common terminology, patterns, and anti-patterns, it's useful
21 | to study existing systems and note how, what, and why they log.
22 |
23 | ### Got Suggestions?
24 |
25 | Got an application you'd like studied? You can submit your own study or just
26 | recommend an application for studying! File a pull request or
27 | [ticket](https://github.com/logstash/cookbook/issues) :)
28 |
--------------------------------------------------------------------------------
/studies/mysql/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | layout: article
3 | title: mysql logs - a study
4 | ---
5 |
6 | * Goal: Learn from mistakes and successes in mysql's log systems.
7 | * Audience: Folks interested in learning from live examples!
8 |
9 | # MySQL Log Study
10 |
11 | According to [mysql's docs][mysql-docs], there are about five different kinds of logs:
12 |
13 | [mysql-docs]: [http://dev.mysql.com/doc/refman/5.5/en/server-logs.html]
14 |
15 | * error log
16 | * general query log
17 | * binary log
18 | * relay log
19 | * slow query log
20 |
21 | Depending on storage engines used, you'll have other log files as well (innodb
22 | log file, etc).
23 |
24 | In most mysql deployments, there are 3 possible actors: the mysql server, mysql
25 | clients, and replication parters. The logs tend to cover actions between pairs
26 | of actors, or roughly the purpose of each log:
27 |
28 | * internal mysql server: error log
29 | * replication: binary and relay logs
30 | * clients: general query and slow query logs
31 |
32 | ## Error Log
33 |
34 | Relevant my.cnf settings: `log-error`, `log-warnings`.
35 |
36 | According to mysql's docs, the error log generally contains information about
37 | mysqld's status; generally start and stop messages as well as critical errors.
38 | Unfortunately, mysql also logs warnings to the error log by default.
39 |
40 | What the docs don't say is that pretty much anything not data-related is logged
41 | here. This log file is a complete mess. Most of what I see in the sample below
42 | has nothing to do with errors or warnings.
43 |
44 | 120707 0:37:09 [Note] Plugin 'FEDERATED' is disabled.
45 | 120707 0:37:09 InnoDB: The InnoDB memory heap is disabled
46 | 120707 0:37:09 InnoDB: Mutexes and rw_locks use GCC atomic builtins
47 | 120707 0:37:09 InnoDB: Compressed tables use zlib 1.2.5
48 | 120707 0:37:09 InnoDB: Using Linux native AIO
49 | 120707 0:37:09 InnoDB: Initializing buffer pool, size = 128.0M
50 | 120707 0:37:09 InnoDB: Completed initialization of buffer pool
51 | 120707 0:37:09 InnoDB: highest supported file format is Barracuda.
52 | 120707 0:37:09 InnoDB: Waiting for the background threads to start
53 | 120707 0:37:10 InnoDB: 1.1.8 started; log sequence number 1595675
54 | 120707 0:37:10 [Note] Server hostname (bind-address): '(null)'; port: 3306
55 | 120707 0:37:10 [Note] - '(null)' resolves to '0.0.0.0';
56 | 120707 0:37:10 [Note] - '(null)' resolves to '::';
57 | 120707 0:37:10 [Note] Server socket created on IP: '0.0.0.0'.
58 | 120707 0:37:10 [Note] Event Scheduler: Loaded 0 events
59 | 120707 0:37:10 [Note] /usr/libexec/mysqld: ready for connections.
60 | Version: '5.5.24-log' socket: 'mysql.sock' port: 3306 MySQL Community Server (GPL)
61 |
62 | The format of this log file is a bit confusing and pretty inconsistent. I'm not
63 | sure how "highest supported file format is Barracuda." is an error or a
64 | warning, but it's in the error log anyway. ☹
65 |
66 | ### Mistakes
67 |
68 | This log format is a bit strange. The timestamp seems consistent, even if it is
69 | odd. The message seems to have no consistency, meaning 'grep' and other
70 | pattern/text tools may be your chief weapons against this beast.
71 |
72 | I wouldn't expect to achieve much in the ways of data mining from this log.
73 |
74 | ## General Query Log
75 |
76 | This log appears to contain every request sent to the mysql server. Here's what
77 | shows up when I login and run a few commands:
78 |
79 | % mysql -uroot
80 | > select * from mysql.user;
81 | ...
82 | > hello world;
83 | ... (syntax error ... )
84 |
85 | Contents of general query log:
86 |
87 | 120707 0:40:34 4 Connect root@localhost on
88 | 4 Query select @@version_comment limit 1
89 | 120707 0:40:45 4 Query select * from mysql.user
90 | 120707 0:41:18 5 Query hello world
91 |
92 | Notes:
93 |
94 | * Column-oriented visual format.
95 | * Poor timestamp: No timezone. No subsecond precision.
96 | * ANSI control codes and other oddities are not escaped. Fun and profit here.
97 |
98 | ### Hacks
99 |
100 | Because the general query log literally logs all requests sent to mysqld, you could
101 | mark major events (schema changes, upgrades, etc) by sending bad requests;
102 |
103 | mysql> NOTE: SCHEMA VERSION 12345 DEPLOYED;
104 |
105 | And in the general query log:
106 |
107 | 120707 0:49:47 5 Query NOTE: SCHEMA VERSION 12345 DEPLOYED
108 |
109 | ## Slow Query Log
110 |
111 | The slow query log contains slow queries as well as "possibly slow" queries
112 | (ones made without index hits)
113 |
114 | The default format is the long format:
115 |
116 | ```
117 | /usr/local/Cellar/mysql/5.5.27/bin/mysqld, Version: 5.5.27-log (Source distribution). started with:
118 | Tcp port: 0 Unix socket: (null)
119 | Time Id Command Argument
120 | # Time: 130912 13:14:51
121 | # User@Host: root[root] @ localhost []
122 | # Query_time: 3.003669 Lock_time: 0.000245 Rows_sent: 3 Rows_examined: 3
123 | use mysql;
124 | SET timestamp=1379006091;
125 | select sleep(1) from db limit 3;
126 | # Time: 130912 13:17:36
127 | # User@Host: root[root] @ localhost []
128 | # Query_time: 4.003400 Lock_time: 0.000076 Rows_sent: 4 Rows_examined: 4
129 | SET timestamp=1379006256;
130 | select sleep(1) from db limit 4;
131 | ```
132 |
133 | The short format looks like this:
134 |
135 | ```
136 | /usr/local/Cellar/mysql/5.5.27/bin/mysqld, Version: 5.5.27-log (Source distribution). started with:
137 | Tcp port: 0 Unix socket: (null)
138 | Time Id Command Argument
139 | # Query_time: 4.005010 Lock_time: 0.000129 Rows_sent: 4 Rows_examined: 4
140 | use mysql;
141 | SET timestamp=1379007812;
142 | select sleep(1) from db limit 4;
143 | # Query_time: 2.001827 Lock_time: 0.000066 Rows_sent: 2 Rows_examined: 2
144 | SET timestamp=1379007829;
145 | select sleep(1) from db limit 2;
146 | ```
147 |
148 | ## Binary Log
149 |
150 | ## Relay Log
151 |
152 |
--------------------------------------------------------------------------------