├── .gitignore
├── .gitmodules
├── CHANGELOG.md
├── LICENSE
├── README.md
├── Vagrantfile
├── environments
    └── production
    │   └── .gitkeep
├── examples
    ├── complex_logstash.md
    ├── curator_cleanup.md
    ├── curator_snapshot_complex.md
    ├── field_cleanup.md
    ├── filebeat.md
    ├── grokking_rules.md
    ├── grokking_syslog.md
    ├── large_distributed_source.md
    ├── large_routed_router.md
    ├── multiple_filter.md
    ├── multiple_filter_lines.md
    ├── output_hipchat.md
    ├── threading_example_parallel.md
    ├── threading_example_singlethread.md
    └── winlogbeat.md
├── hiera.yaml
├── manifests
    └── site.pp
├── modules
    ├── profiles
    │   ├── files
    │   │   ├── apache
    │   │   │   └── sysadmin1138-net.basis
    │   │   ├── base
    │   │   │   └── hcl.vim
    │   │   └── logstash
    │   │   │   └── templates
    │   │   │       └── onebox.json
    │   ├── manifests
    │   │   ├── apache_stub.pp
    │   │   ├── base.pp
    │   │   ├── curator.pp
    │   │   ├── curator_job.pp
    │   │   ├── elastic_key.pp
    │   │   ├── escluster.pp
    │   │   ├── init.pp
    │   │   ├── kibana_local.pp
    │   │   ├── kibana_network.pp
    │   │   ├── logredis.pp
    │   │   ├── logstash.pp
    │   │   ├── logstash
    │   │   │   ├── filter_apache.pp
    │   │   │   ├── filter_syslog.pp
    │   │   │   ├── input_apache.pp
    │   │   │   ├── input_nasa_feeds.pp
    │   │   │   ├── input_redis.pp
    │   │   │   ├── input_syslog_file.pp
    │   │   │   ├── input_syslog_server.pp
    │   │   │   ├── output_escluster.pp
    │   │   │   ├── output_onebox.pp
    │   │   │   └── output_redis.pp
    │   │   └── onebox_es.pp
    │   └── templates
    │   │   ├── base
    │   │       └── bash_aliases
    │   │   ├── curator
    │   │       ├── config.yml
    │   │       └── onebox_delete.yml
    │   │   └── logstash
    │   │       ├── filter
    │   │           ├── apache
    │   │           └── syslog
    │   │       ├── input
    │   │           ├── apache
    │   │           ├── journald
    │   │           ├── nasa_feeds
    │   │           ├── redis
    │   │           ├── syslog_file
    │   │           └── syslog_server
    │   │       └── output
    │   │           ├── escluster
    │   │           ├── onebox_es
    │   │           └── redis
    └── roles
    │   └── manifests
    │       ├── apache.pp
    │       ├── escluster.pp
    │       ├── init.pp
    │       ├── mdcluster.pp
    │       ├── onebox_nasa.pp
    │       └── onebox_syslog.pp
├── prep_environment
├── puppet-hiera
    ├── common.yaml
    ├── env-type
    │   ├── medium.yaml
    │   └── small.yaml
    ├── hiera.yaml
    └── node-type
    │   └── onebox_nasa.yaml
└── vup


/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant
2 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "modules/elasticsearch"]
 2 | 	path = modules/elasticsearch
 3 | 	url = https://github.com/elastic/puppet-elasticsearch.git
 4 | 	branch = tag/5.4.3
 5 | [submodule "modules/stdlib"]
 6 | 	path = modules/stdlib
 7 | 	url = https://github.com/puppetlabs/puppetlabs-stdlib.git
 8 | 	branch = releases/tag/4.19.0
 9 | [submodule "modules/apt"]
10 | 	path = modules/apt
11 | 	url = https://github.com/puppetlabs/puppetlabs-apt.git
12 | 	branch = release/tag/4.1.0
13 | [submodule "modules/yum"]
14 | 	path = modules/yum
15 | 	url = https://github.com/CERIT-SC/puppet-yum.git
16 | 	branch = releases/tag/0.9.8
17 | [submodule "modules/datacat"]
18 | 	path = modules/datacat
19 | 	url = https://github.com/richardc/puppet-datacat.git
20 | 	branch = 5a6114cd41823160bdb01dad6eb0e2af85a8fa69
21 | [submodule "modules/logstash"]
22 | 	path = modules/logstash
23 | 	url = https://github.com/elastic/puppet-logstash.git
24 | 	branch = releases/tag/5.2.1
25 | [submodule "modules/kibana4"]
26 |         path = modules/kibana4
27 |         url = https://github.com/lesaux/puppet-kibana4.git
28 |         branch = v1.0.17
29 | [submodule "modules/kibana"]
30 |         path = modules/kibana
31 |         url = https://github.com/elastic/puppet-kibana.git
32 |         branch = releases/tag/5.1.0
33 | [submodule "modules/apache"]
34 |         path = modules/apache
35 |         url = https://github.com/puppetlabs/puppetlabs-apache.git
36 |         branch = 1.10.0
37 | [submodule "modules/redis"]
38 | 	path = modules/redis
39 |         url = https://github.com/arioch/puppet-redis.git
40 |         branch = 1.2.3
41 | [submodule "modules/inifile"]
42 | 	path = modules/inifile
43 | 	url = https://github.com/puppetlabs/puppetlabs-inifile.git
44 |         branch = 1.6.0
45 | [submodule "modules/concat"]
46 | 	path = modules/concat
47 | 	url = https://github.com/puppetlabs/puppetlabs-concat.git
48 |         branch = 4.0.1
49 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## v0.2.1 [2017-09-02]
 4 | 
 5 | #### Release Notes
 6 | This is mostly a documentation update. No significant technical changes.
 7 | 
 8 | * Update examples under `examples/`
 9 | 
10 | ## v0.2.0 [2017-06-18]
11 | 
12 | #### Release Notes
13 | This updates the existing repository to support the following:
14 | 
15 | * Puppet 4 for provisioning
16 | * Updated Vagrant provisioning to reflect the Puppet 4 changes
17 | * Update the base box used from Ubuntu 14.04 to Ubuntu 16.10
18 | * Update the ElasticSearch, LogStash, and Kibana versions to 2.4 to 5.4
19 | 
20 | ## v0.1.0 [2016-10-16]
21 | 
22 | #### Release Notes
23 | Initial release, shipped with the LISA 2016 tutorial USB key.
24 | 
25 | This release contains definitions for the following Vagrant boxes:
26 | 
27 | * `onebox_nasa`: An all-in-one box to demo the twitter logstash integration, with Kibana.
28 | * `onebox_syslog`: An all-in-one box to demo a syslog-server, with curator and Kibana.
29 | * `small_escluster`: An all-in-one ElasticSearch cluster-box, with minimal on-box Logstash.
30 | * `small_apache`: An emulated Apache server, with logstash, that ships data to the `small_escluster` box. With Kibana.
31 | * `medium_mdcluster`: A combined ElasticSearch cluster-box with, redis, and complex Logstash parsing rules.
32 | * `medium_apache`: Like `small_apache`, but only ships events to the redis server on `medium_mdcluster`. Hosts Kibana.
33 | 
34 | At this time, this repo supports the following versions of Elastic products:
35 | 
36 | * **Logstash**: 2.4
37 | * **ElasticSearch**: 2.4
38 | * **Kibana**: 4.6
39 | * **Curator**: 4.1.2
40 | 
41 | ElasticStack 5.0 is in beta, and not yet supported by all of the puppet components.
42 | As a result, 5 is not yet implemented here. By the time of the LISA conference,
43 | this support may have been added in.
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (C) 2016 hellosign.com
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Logstash Fundamentals
 2 | This repo is intended to be a learning tool for [Logstash by Elastic.co](https://www.elastic.co/guide/en/logstash/current/index.html).
 3 | The examples here should get you familiar with the basic structure of Logstash,
 4 | and on the way to a working proof-of-concept.
 5 | 
 6 | This uses [Puppet 3.8](https://docs.puppet.com/puppet/3.8/reference/) for provisioning
 7 | the vagrant boxes. The intent is to also show how Logstash could be managed through
 8 | a configuration management product, and to break down the installation components.
 9 | By using modules from Puppet Forge, this allows quick setup.
10 | 
11 | ## Requirements
12 | 
13 | * Vagrant version 1.7 or newer.
14 | * Virtual Box, or VMware Fusion.
15 | * At least 10GB of free disk-space for boxes.
16 | * Internet connection capable of downloading ~300MB (Java and Logstash/ElasticSearch packages) each vagrant run without you losing interest.
17 | 
18 | ## Setup
19 | All of the demos use an Ubuntu Trusty (14.04) box. You will need Linux skills
20 | to move around the filesystem and examine files.
21 | 
22 | ### Linux and Mac
23 | 1. Clone this repo.
24 | 1. While in the repo, run `./prep_environment`
25 |  * This will checkout the submodules and copy the Hiera details to the parent directory, where Vagrant will use them.
26 |  * This was done to allow you to make your own changes without worrying about committing secrets.
27 | 
28 | ### Windows
29 | 1. Clone this repo.
30 | 1. While in the repo, run `git submodule init`, or equivalent.
31 | 1. Then run, `git submodule update`, or equivalent.
32 | 1. Copy the `puppet-hiera` directory to the parent directory of this repo.
33 |  * This allows Vagrant to use it for its work, and to allow you to make your own changes without worrying about committing secrets.
34 | 
35 | ## License
36 | ```
37 | The MIT License (MIT)
38 | 
39 | Copyright (C) 2016 hellosign.com
40 | 
41 | Permission is hereby granted, free of charge, to any person obtaining a copy
42 | of this software and associated documentation files (the "Software"), to deal
43 | in the Software without restriction, including without limitation the rights
44 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
45 | copies of the Software, and to permit persons to whom the Software is
46 | furnished to do so, subject to the following conditions:
47 | 
48 | The above copyright notice and this permission notice shall be included in all
49 | copies or substantial portions of the Software.
50 | 
51 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
53 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
54 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
55 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
56 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
57 | SOFTWARE.
58 | ```
59 | 
60 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
  1 | # vi: set syntax=ruby 
  2 | Vagrant.require_version ">= 1.7.0"
  3 | 
  4 | # Create some happy little VMs to demo on.
  5 | 
  6 | small_environment = {
  7 |   :escluster => '192.168.99.10',
  8 |   :apache    => '192.168.99.20'
  9 | }
 10 | 
 11 | medium_environment = {
 12 |   :mdcluster => '192.168.99.10',
 13 |   :apache    => '192.168.99.20'
 14 | }
 15 | 
 16 | Vagrant.configure("2") do |config|
 17 | 
 18 |   # Define the base box we want to play with, and some always-on-everything
 19 |   # items.
 20 |   config.vm.box = 'bento/ubuntu-16.10'
 21 |   config.vm.provision "shell", :path => "vup"
 22 |   # Define the Hiera directory
 23 |   # Use this for your own.
 24 |   config.vm.synced_folder "../puppet-hiera", "/etc/puppet-hiera"
 25 |   # Use this for the repo's version of hiera
 26 |   # config.vm.synced_folder "hiera/" "/etc/puppet-hiera"
 27 | 
 28 |   puppet_common = proc do |puppet|
 29 |     puppet.manifests_path    = "manifests"
 30 |     puppet.manifest_file     = "site.pp"
 31 |     puppet.module_path       = "modules"
 32 |     puppet.environment_path  = "environments"
 33 |     puppet.environment       = "production"
 34 |     puppet.hiera_config_path = "hiera.yaml"
 35 |     puppet.working_directory = "/tmp/vagrant-puppet"
 36 |   end
 37 | 
 38 |   config.vm.define :onebox_nasa do |onebox_nasa|
 39 |     onebox_nasa.vm.hostname = 'oneboxnasa'
 40 |     onebox_nasa.vm.network "private_network", ip: "192.168.99.20"
 41 |     onebox_nasa.vm.provision :puppet do |puppet|
 42 |       puppet_common.call(puppet)
 43 |       puppet.facter = {
 44 |         "node_type"      => 'onebox_nasa',
 45 |         "hostname"       => 'onebox_nasa',
 46 |         "env_type"       => 'onebox'
 47 |       }
 48 |     end
 49 |     onebox_nasa.vm.provider :virtualbox do |vb|
 50 |       vb.memory = '1256'
 51 |       vb.cpus = 2
 52 |       vb.customize ["modifyvm", :id, "--nictype1", "virtio"]
 53 |     end
 54 |     onebox_nasa.vm.provider :vmware_fusion do |vb|
 55 |       vb.vmx["memsize"] = '1256'
 56 |       vb.vmx["numvcpus"] = 2
 57 |     end
 58 |   end
 59 | 
 60 |   config.vm.define :onebox_syslog do |onebox_syslog|
 61 |     onebox_syslog.vm.hostname = 'oneboxsyslog'
 62 |     onebox_syslog.vm.network "private_network", ip: "192.168.99.20"
 63 |     onebox_syslog.vm.provision :puppet do |puppet|
 64 |       puppet_common.call(puppet)
 65 |       puppet.facter = {
 66 |         "node_type"      => 'onebox_syslog',
 67 |         "hostname"       => 'onebox_syslog',
 68 |         "env_type"       => 'onebox'
 69 |       }
 70 |     end
 71 |     onebox_syslog.vm.provider :virtualbox do |vb|
 72 |       vb.memory = '1256'
 73 |       vb.cpus = 2
 74 |       vb.customize ["modifyvm", :id, "--nictype1", "virtio"]
 75 |     end
 76 |     onebox_syslog.vm.provider :vmware_fusion do |vb|
 77 |       vb.vmx["memsize"] = '1256'
 78 |       vb.vmx["numvcpus"] = 2
 79 |     end
 80 |   end
 81 | 
 82 |   # This iterator builds the Vagrant definitions for all of the small_environment
 83 |   # machines, defined at the top.
 84 |   small_environment.keys.each do |node_name|
 85 |     config.vm.define "small_#{node_name}" do |node|
 86 |       node.vm.hostname = "#{node_name}"
 87 |       node.vm.network :private_network, ip: small_environment[node_name]
 88 |       node.vm.provision :puppet do |puppet|
 89 |         puppet_common.call(puppet)
 90 |         puppet.facter = {
 91 |           "node_type"  => "#{node_name}",
 92 |           "env_type"   => "small"
 93 |         }
 94 |       end
 95 |       node.vm.provider :virtualbox do |vb|
 96 |         vb.memory = '1256'
 97 |         vb.cpus = '2'
 98 |         vb.customize ["modifyvm", :id, "--nictype1", "virtio"]
 99 |       end
100 |       node.vm.provider :vmware_fusion do |vb|
101 |         vb.vmx["memsize"] = '1024'
102 |         vb.vmx["numcpus"] = 2
103 |       end
104 |     end
105 |   end
106 | 
107 |   # This iterator builds the Vagrant definitions for all of the medium_environment
108 |   # machines, defined at the top.
109 |   medium_environment.keys.each do |node_name|
110 |     config.vm.define "medium_#{node_name}" do |node|
111 |       node.vm.hostname = "#{node_name}"
112 |       node.vm.network :private_network, ip: medium_environment[node_name]
113 |       node.vm.provision :puppet do |puppet|
114 |         puppet_common.call(puppet)
115 |         puppet.facter = {
116 |           "node_type"  => "#{node_name}",
117 |           "env_type"   => "medium"
118 |         }
119 |       end
120 |       node.vm.provider :virtualbox do |vb|
121 |         vb.memory = '1256'
122 |         vb.cpus = '2'
123 |         vb.customize ["modifyvm", :id, "--nictype1", "virtio"]
124 |       end
125 |       node.vm.provider :vmware_fusion do |vb|
126 |         vb.vmx["memsize"] = '1024'
127 |         vb.vmx["numcpus"] = 2
128 |       end
129 |     end
130 |   end
131 | 
132 | end
133 | 


--------------------------------------------------------------------------------
/environments/production/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellosign/logstash-fundamentals/d99189b93d4c57312eaaeb8150435a2eefd41a27/environments/production/.gitkeep


--------------------------------------------------------------------------------
/examples/complex_logstash.md:
--------------------------------------------------------------------------------
  1 | # Example Complex LogStash File
  2 | In this example, we're loading data from N sources, applying several filters,
  3 | and outputting it to two different sources. This example includes some organic
  4 | cruft! Like a real, live config-file would.
  5 | 
  6 | 
  7 | ```ruby
  8 | # Pull in syslog data
  9 | input {
 10 |   file {
 11 |     path => [
 12 |       "/var/log/syslog",
 13 |       "/var/log/auth.log"
 14 |     ]
 15 |     type => "syslog"
 16 |   }
 17 | }
 18 | 
 19 | # Pull in application-log data. They emit data in JSON form.
 20 | input {
 21 |   file {
 22 |     path => [
 23 |       "/var/log/app/worker_info.log",
 24 |       "/var/log/app/broker_info.log",
 25 |       "/var/log/app/supervisor.log"
 26 |     ]
 27 |     exclude => "*.gz"
 28 |     type => "applog"
 29 |     codec => "json"
 30 |   }
 31 | }
 32 | 
 33 | # Set up a couple of UDP listeners for network-based logging.
 34 | # Perhaps we're experimenting with not logging to files!
 35 | input {
 36 |   udp {
 37 |     port => "8192"
 38 |     host => "localhost"
 39 |     type => "applog"
 40 |     codec => "json"
 41 |   }
 42 |   udp {
 43 |     port => "8193"
 44 |     host => "localhost"
 45 |     type => "controllog"
 46 |     codec => "json"
 47 |   }
 48 | }
 49 | 
 50 | filter {
 51 |   # The broad filter on Syslog.
 52 |   if [type] == "syslog" {
 53 |     grok {
 54 |       match => {
 55 |         "message" => "%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}"
 56 |       }
 57 |     }
 58 |     
 59 |     # Turn the log timestamp into a true event timestamp.
 60 |     date {
 61 |       match => [ "timestamp", "MMM  d HH:mm:ss", "MMM dd HH:mm:ss" ]
 62 |     }
 63 |   }
 64 |   
 65 |   # App-logs are already formatted thanks to JSON, so much less grokking.
 66 |   # But we still have to do a few things.
 67 |   
 68 |   # Parse the timestamp in the network inputs.
 69 |   if [type] == "applog" OR [type] == "controllog" {
 70 |     date {
 71 |       match => [ "info.timestamp", "ISO8601" ]
 72 |     }
 73 |   }
 74 |   
 75 |   # Drop the debug lines in the info feeds.
 76 |   if [type] == "applog" AND [info][message] =~ "^DEBUG:" {
 77 |     drop {}
 78 |   }
 79 |   
 80 |   # Parse the metrics data encoded in a field.
 81 |   if [type] == "applog" AND [info][message] =~ "^metrics: " {
 82 |     grok {
 83 |       match => {
 84 |         "info.message" => "^metrics: %{GREEDYDATA:metrics_raw}$"
 85 |       }
 86 |       tag => [ "metrics" ]
 87 |     }
 88 |   }
 89 |   
 90 |   # Parse that key-value field we just found. And drop the 'raw' field.
 91 |   if "metrics" in [tags] {
 92 |     kv {
 93 |       source => "metrics_raw"
 94 |       target => "metrics"
 95 |       remove_field => "metrics_raw"
 96 |     }
 97 |   }
 98 |   
 99 |   if ([type] == "applog" OR [type] == "controllog") AND [supervisor][event_type] == "auth" {
100 |     mutate {
101 |       add_tag => [ "audit" ]
102 |     }
103 |   } else {
104 |     mutate {
105 |       add_tag => [ "logline" ]
106 |     }
107 |   }
108 | }
109 | 
110 | # Finally, the outputs
111 | output {
112 | 
113 |   if "logline" in [tags] {
114 |     elasticsearch {
115 |       hosts => [
116 |         "localhost",
117 |         "logelastic.prod.internal"
118 |       ]
119 |       template_name => "logstash"
120 |       index => "logstash-{+YYYY.MM.dd}"
121 |     }
122 |   } else if "audit" in [tags] {
123 |     elasticsearch {
124 |       hosts => [
125 |         "localhost",
126 |         "logelastic.prod.internal"
127 |       ]
128 |       template_name => "audit"
129 |       index => "audit-{+xxxx.ww}"
130 |     }
131 |   }
132 |   
133 |   if "metrics" in [tags] {
134 |     influxdb {
135 |       host => "influx.prod.internal"
136 |       db => "logstash"
137 |       measurement => "appstats"
138 |       # This next bit only works because it is already a hash.
139 |       data_points => "%{metrics}"
140 |     }
141 |   }
142 | }
143 | 
144 | ```
145 | 


--------------------------------------------------------------------------------
/examples/curator_cleanup.md:
--------------------------------------------------------------------------------
 1 | # Curator Cleanup
 2 | In this example, old indices and snapshots are cleaned up from the ElasticSearch cluster.
 3 | 
 4 | ```yaml
 5 | actions:
 6 |   1:
 7 |     action: delete_indices
 8 |     description: "Removes logstash indices older than 28 days."
 9 |     filters:
10 |       - filtertype: pattern
11 |         kind: prefix
12 |         value: logstash-
13 |       - filtertype: age
14 |         source: name
15 |         timestring: '%Y.%m.%d'
16 |         direction: older
17 |         unit: days
18 |         unit_count: 28
19 |   2:
20 |     action: delete_snapshot
21 |     description: "Remove logstash backups older than 6 months"
22 |     options:
23 |       repository: logstash_backup
24 |     filters:
25 |       - filtertype: pattern
26 |         kind: prefix
27 |         value: 'logstash-'
28 |       - filtertype: age
29 |         source: creation_date
30 |         direction: older
31 |         unit: months
32 |         unit_count: 6
33 | ```
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/curator_snapshot_complex.md:
--------------------------------------------------------------------------------
  1 | # Two Curator examples. A snapshot-yesterday example, and a snapshot-hourly.
  2 | 
  3 | This uses a trick of filtering to figure out what 'yesterday' is, and snapshot
  4 | that. Makes sure you're snapshotting a quiet index!
  5 | ```yaml
  6 | actions:
  7 |   1:
  8 |     action: snapshot
  9 |     description: Snapshot the yesterday index for onebox.
 10 |     options:
 11 |       repository: logstash_backup
 12 |       name: onebox-%Y.%m.%d
 13 |     filters:
 14 |       - filtertype: pattern
 15 |         kind: prefix
 16 |         value: 'onebox-'
 17 |       - filtertype: age
 18 |         source: name
 19 |         timestring: %Y.%m.%d
 20 |         direction: older
 21 |         unit: days
 22 |         unit_count: 1
 23 |       - filtertype: count
 24 |         count: 1
 25 |         reverse: True
 26 | ```
 27 | This works by filtering on the pattern (`onebox-`) to get just the indices we
 28 | care about. Then by filtering on the age of the index, to get the list of indices
 29 | older than a day. Finally, we pull exactly one index out, which is the newest
 30 | index in that list of old indexes.
 31 | 
 32 | This next example, an hourly snapshot is taken of the 'audit' index, and
 33 | a second job to delete the old ones. To make it interesting, the 'audit' index
 34 | rotates weekly, not daily.
 35 | 
 36 | ```yaml
 37 | actions:
 38 |   1:
 39 |     action: snapshot
 40 |     description: "Hourly snapshot of the audit index"
 41 |     options:
 42 |       repository: logstash_backup
 43 |       name: hraudit-%Y%m%d%H
 44 |     filters:
 45 |       - filtertype: pattern
 46 |         kind: timestring
 47 |         value: '%G.%V'
 48 |       - filtertype: pattern
 49 |         kind: prefix
 50 |         value: 'audit-'
 51 |   2:
 52 |     action: delete_snapshot
 53 |     description: "Remove old hourly snapshots of the audit index"
 54 |     options:
 55 |       repository: logstash_backup
 56 |     filters:
 57 |       - filtertype: pattern
 58 |         kind: prefix
 59 |         value: 'hraudit-'
 60 |       - filtertype: age
 61 |         source: creation_date
 62 |         direction: older
 63 |         unit: hours
 64 |         unit_count: 26
 65 | ```
 66 | Then another one to snapshot it after the next week has started. As these have
 67 | a seven year retention period (ick), there is no snapshot-removal step.
 68 | 
 69 | ```yaml
 70 | actions:
 71 |   1:
 72 |     action: snapshot
 73 |     description: "Snapshot the last-week index for audit"
 74 |     options:
 75 |       repository: logstash_backup
 76 |       name: audit-%G.%V
 77 |     filters:
 78 |       - filtertype: pattern
 79 |         kind: prefix
 80 |         value: 'audit-'
 81 |       - filtertype: age
 82 |         source: name
 83 |         timestring: %G.%V
 84 |         direction: older
 85 |         unit: weeks
 86 |         unit_count: 1
 87 |       - filtertype: count
 88 |         count: 1
 89 |         reverse: True
 90 | 
 91 | ```
 92 | These would be launched through cron. The executions would look something like:
 93 | 
 94 | ```shell
 95 | /usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/snap_audit-hourly.yml
 96 | /usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/snap_audit-weekly.yml
 97 | ```
 98 | 
 99 | ## Restoring those snapshots
100 | You can restore with curator as well! Here is an example of restoring from
101 | the most recent hourly backup of that audit index. It's smart enough to
102 | know that you want the 'most recent' unless told otherwise.
103 | 
104 | ```yaml
105 | actions:
106 |   1:
107 |     action: restore
108 |     description: "Restore the most recent 'audit' snapshot."
109 |     options:
110 |       repository: logstash-backup
111 |       partial: False
112 |     filters:
113 |       - filtertype: pattern
114 |         kind: prefix
115 |         value: 'hraudit-'
116 |       - filtertype: state
117 |         state: SUCCESS
118 | ```
119 | You can set this yaml file somewhere for use in your disaster-recovery
120 | runbooks.
121 | 


--------------------------------------------------------------------------------
/examples/field_cleanup.md:
--------------------------------------------------------------------------------
 1 | # Field Cleanup Example
 2 | In this example, we show a filter-block that casts various fields to specific
 3 | data-types. This is done to ensure type-conversions are handled correctly, and
 4 | to ensure that the generated index can be loaded after a future ElasticSearch
 5 | 2.x upgrade.
 6 | 
 7 | ```ruby
 8 | filter {
 9 | 
10 |   mutate {
11 |     convert => {
12 |       "priority" => "string",
13 |       "value" => "float"
14 |       "response_code" => "long"
15 |     }
16 |   }
17 | 
18 |   if "metric" in [tags] {
19 |     mutate {
20 |       convert => { "metric_value" => "float" }
21 |     }
22 |   }
23 | 
24 |   if [type] == "cheese_api" {
25 |     mutate {
26 |       convert => {
27 |         "status_code" => "long",
28 |         "runtime" => "float"
29 |       }
30 |       remove_field => [ "subtype" ]
31 |     }
32 |   }
33 | 
34 | }
35 | ```
36 | 
37 | Put this type of block at the end of your filter-chains, the last step before
38 | the pipeline enters the `output {}` stage. This is a cleanup step. For larger
39 | environments, you'll still need this even after getting to ES 2.x, simply to
40 | catch problems earlier.
41 | 


--------------------------------------------------------------------------------
/examples/filebeat.md:
--------------------------------------------------------------------------------
 1 | # FileBeat Examples
 2 | [FileBeat is part of the Beats framework, designed to replace the `file` input
 3 | on logstash](https://www.elastic.co/guide/en/beats/filebeat/current/index.html).
 4 | Here are a couple of examples of FileBeat configurations.
 5 | 
 6 | ```yaml
 7 | filebeat:
 8 |   prospectors:
 9 |     -
10 |       paths:
11 |         - "/var/log/syslog"
12 |         - "/var/log/auth.log"
13 |       input_type: log
14 |       document_type: syslog
15 |     -
16 |       paths:
17 |         - "/var/log/apache2/*.log"
18 |       input_type: log
19 |       document_type: apache
20 | output:
21 |   logstash:
22 |     hosts: [ "prodstash.prod.internal:5044" ]
23 | 
24 | ```
25 | This configuration monitors two system logfiles, setting their LogStash type to
26 | be `syslog`, and monitors apache logs, setting their LogStash type to `apache`.
27 | Completed events are then sent to a LogStash instance running the `beats` input
28 | on port 5044.
29 | 
30 | ```yaml
31 | filebeat:
32 |   prospectors:
33 |     -
34 |       paths:
35 |         - "/var/log/syslog"
36 |         - "/var/log/auth.log"
37 |         input_type: log
38 |         document_type: syslog
39 |     -
40 |       paths:
41 |         - "/var/log/app/api_callback*"
42 |         input_type: log
43 |         document_type: applog
44 |         exclude_files: [ '\.gz$' ]
45 |         exclude_lines: [ '^DEBUG:.*' ]
46 |         fields:
47 |           application: "myapp"
48 |           app_component: "callbacks"
49 |     -
50 |       paths:
51 |         - "/var/log/app/workers/*.log"
52 |         input_type: log
53 |         document_type: applog
54 |         exclude_files: [ '\.gz$' ]
55 |         exclude_lines: [ '^DEBUG:.*' ]
56 |         fields:
57 |           application: "myapp"
58 |           app_component: "workers"
59 |   output:
60 |     redis:
61 |       host: "logredis.prod.internal:6379"
62 |       index: "filebeat_prod"
63 | ```
64 | This more complex example pulls some system log information, like the above
65 | example, but also pulls in some application-specific logs. It then uses FileBeat
66 | filters to configure it to reject `DEBUG` loglines, and not parse logfiles that
67 | have been gzipped. It then adds appropriate fields to the events. This uses the 
68 | redis output, dumping events into the `filebeat_prod` key.
69 | 


--------------------------------------------------------------------------------
/examples/grokking_rules.md:
--------------------------------------------------------------------------------
  1 | # Grok Performance Rules
  2 | 
  3 | 1. Parse failures are very expensive. Structure your filters to avoid them.
  4 | 1. Do your broad captures early in your filter sections.
  5 | 1. Refine broadly captured fields in later grok filters.
  6 | 1. Anchor your regexes. [This reduces substring searching.](https://www.elastic.co/blog/do-you-grok-grok)
  7 | 1. Only use `%{GREEDYDATA}` at the end of a capture. Reduces back-tracking.
  8 | 1. Use an internal-log format standard. Greatly eases grok construction.
  9 | 1. Convert your plain language `Created account #{x} in zone #{y} with email #{z}` log-statements to something machine parseable.
 10 | 
 11 | ## Avoid the dictionary anti-pattern:
 12 | 
 13 | Don't do this:
 14 | 
 15 | ```ruby
 16 | filter {
 17 |   if [type] == "applog"
 18 |     grok {
 19 |       match => {
 20 |         "message" => [
 21 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:acct_action} account %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id} with email %{EMAILADDRESS:email_address}",
 22 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Account %{BASE16NUM:acct_num} %{WORD:acct_action} from zone %{BASE16NUM:zone_id}",
 23 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:acct_action} zone %{BASE16NUM:zone_id} account %{BASE16NUM:acct_num} for %{GREEDYDATA:suspension_reason}",
 24 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:zone_action} new zone: %{BASE16NUM:zone_id}",
 25 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Zone %{BASE16NUM:zone_id} %{WORD:zone_action}",
 26 |           "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{GREEDYDATA:app_logline}"
 27 |         ]
 28 |     }
 29 |   }
 30 | }
 31 | ```
 32 | The temptation with grok matches is to treat it like a dictionary. Since matches
 33 | are run in order, start with the most specific filters and get broader, finishing
 34 | with a catch-all statement to sweep up the remainers. *This will destroy your performance*.
 35 | Remember, each grok-miss is expensive. Constructing it like this ensures that most
 36 | log-lines will get missed several times before getting matched.
 37 | 
 38 | This is terse, and shows your intent. However, it's *really bad*.
 39 | 
 40 | The above can be made to perform much better without modifying the log-format.
 41 | At the very least, move the prefix in each capture to it's own expression, and
 42 | grok on the remaining ones in a later dictionary:
 43 | 
 44 | ```ruby
 45 | filter {
 46 |   if [type] == "applog"
 47 |     grok {
 48 |       match => {
 49 |         "message" => [
 50 |           "^%{SYSLOGTIMESTAMP} \[%WORD:component}\] %{GREEDYDATA:app_logline}$"
 51 |         ]
 52 |       }
 53 |     }
 54 |     if [component] == "account" {
 55 |       grok {
 56 |         match => {
 57 |           "app_logline" => [
 58 |             "^%{WORD:acct_action} account %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id} with email %{EMAILADDRESS:email_address}$",
 59 |             "^%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Account %{BASE16NUM:acct_num} %{WORD:acct_action} from zone %{BASE16NUM:zone_id}$",
 60 |             "^%{WORD:acct_action} zone %{BASE16NUM:zone_id} account %{BASE16NUM:acct_num} for %{GREEDYDATA:suspension_reason}$"
 61 |           ]
 62 |         }
 63 |       }
 64 |     }
 65 |     if [component] == "zone" {
 66 |       grok {
 67 |         match => {
 68 |           "app_logline" => [
 69 |             "^%{WORD:zone_action} new zone: %{BASE16NUM:zone_id}$",
 70 |             "^Zone %{BASE16NUM:zone_id} %{WORD:zone_action}$"
 71 |           ]
 72 |         }
 73 |       }
 74 |     }
 75 |   }
 76 | }
 77 | ```
 78 | 
 79 | This is much longer, but it will perform *much* better. While it still uses
 80 | dictionaries, the grok expressions are now anchored (see the `^` and `$` 
 81 | characters) which will improve performance. Also, we use conditional statements
 82 | to avoid grok-parsing lines against patterns we already know won't match.
 83 | 
 84 | If we move towards grok-ready logging statements, we can make these:
 85 | ```
 86 | Created account #{x} in zone #{y} with email #{z}
 87 | Account #{x} deleted from zone #{y}
 88 | Suspended zone #{y} account #{x} for #{s}
 89 | Created new zone #{y}
 90 | Zone #{y} deleted
 91 | ```
 92 | Into these easier to parse versions:
 93 | ```
 94 | [account] Created account #{x} in zone #{y} with email #{z}
 95 | [account] Deleted account #{x} in zone #{y}
 96 | [account] Suspended account #{x} in zone #{y} for #{s}
 97 | [zone] Created #{y}
 98 | [zone] Deleted #{y}
 99 | ```
100 | Which means we can do away with dictionaries entirely:
101 | 
102 | ```ruby
103 | filter {
104 |   if [type] == "applog"
105 |     grok {
106 |       match => {
107 |         "message" => [
108 |           "^%{SYSLOGTIMESTAMP} \[%WORD:component}\] %{GREEDYDATA:app_logline}$"
109 |         ]
110 |       }
111 |     }
112 |     if [component] == "account" {
113 |       grok {
114 |         match => {
115 |           "app_logline" => [
116 |             "^{WORD:acct_action} %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id}( %{GREEDYDATA:acct_extra})$"
117 |           ]
118 |         }
119 |       }
120 |     } else if [component] == "zone" {
121 |       grok {
122 |         match => {
123 |           "app_logline" => [
124 |             "^%{WORD:zone_action} %{BASE16NUM:zone_id}$"
125 |           ]
126 |         }
127 |       }
128 |     }
129 |     if [acct_action] == "Created" {
130 |       grok {
131 |         match => {
132 |           "acct_extra" => [
133 |             "with email address %{EMAILADDRESS:email_address}$"
134 |           ]
135 |         }
136 |       }
137 |       mutate {
138 |         remove_field => [ "acct_extra" ]
139 |       }
140 |     } else if [acct_action] == "Suspended" {
141 |       grok {
142 |         match => {
143 |           "acct_extra" => [
144 |             "for %{GREEDYDATA:suspension_reason}$"
145 |           ]
146 |         }
147 |       }
148 |       mutate {
149 |         remove_field => [ "acct_extra" ]
150 |       }
151 |     }
152 |   }
153 | }
154 | ```
155 | This version avoids dictionaries all together, and uses conditionals to ensure
156 | that each grok-expression is only matched against a string that is highly
157 | likely to match.
158 | 
159 | ## Skipping to the end to see what the closing paragraph is
160 | 
161 | 1. Use regex-anchors. `^` and `$` will give you the biggest bang for your performance optimization time.
162 | 2. Tiering your matches lets you group filter-statements on a component in ways that are easier for an engineering group to maintain.
163 | 


--------------------------------------------------------------------------------
/examples/grokking_syslog.md:
--------------------------------------------------------------------------------
  1 | # Grokking Syslog
  2 | In this example, we progressively build some syslog grokking. We are looking
  3 | for output from backup scripts. The output of the scripts is well known, which
  4 | allows us to build some simple grok expressions and give our events rich data
  5 | to work with.
  6 | 
  7 | It all begins with syslog parsing. This is taken from [one of the filters we use in vagrant builds](modules/profiles/templates/logstash/filter/syslog_file)
  8 | 
  9 | ```ruby
 10 | filter {
 11 |   if [type] == "syslog-file" {
 12 |     # Syslog parsing is handled through Grok.
 13 |     # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html
 14 |     grok {
 15 |       # This will create a new field called SYSLOGMESSAGE, that contains the 
 16 |       # data part of a syslog line.
 17 |       #
 18 |       # If given a line like:
 19 |       # Sep  9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds.
 20 |       # SYSLOGMESSAGE will equal "bound to 192.0.2.153 -- renewal in 1367 seconds."
 21 |       #
 22 |       match => {
 23 |         "message" => "%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}"
 24 |       }
 25 |     }
 26 |   }
 27 | }
 28 | ```
 29 | This will give us a variety of fields to work with, such as:
 30 | 
 31 | * `program`: The program that issued the log-line.
 32 | * `pid`: The PID of the program.
 33 | * `logsource`: The machine that recorded the message.
 34 | * `SYSLOGMESSAGE`: The message-part of the syslog line
 35 | 
 36 | If we are given some log-lines such as these:
 37 | 
 38 | ```
 39 | May 19 19:22:06 ip-172-16-2-4 pii-repo-backup[4982]: ALARM Unable to isolate framulator, backup not taken.
 40 | May 20 07:01:02 ip-172-16-2-4 pii-repo-backup[5122]: OK Hourly backup success.
 41 | ```
 42 | 
 43 | We can construct patterns to match these. We already know that `SYSLOGMESSAGE`
 44 | will be set to `ALARM Unable to isolate framulator, backup not taken.` So let's
 45 | construct a pattern to extract the meaningful information.
 46 | 
 47 | A good tool for figuring out how to grok this is [grokdebug.herokuapp.com](http://grokdebug.herokuapp.com/).
 48 | It allows you to paste in a log-line, and progressively build your expression.
 49 | Remember, you can name your fields by using either `%{PATTERN:field_name}` or
 50 | `(?<field_name>regex)`. Use the former if you're using a built in pattern, the
 51 | latter, if you're building your own regex.
 52 | 
 53 | A simple capture for these events could be this:
 54 | ```ruby
 55 | %{WORD:backup_state} %{GREEDYDATA:backup_message}
 56 | ```
 57 | It's not all that efficient, but it gets the job done. However, the internal
 58 | standard for backup-output has only a few states defined. A more targeted capture
 59 | would look like this:
 60 | ```ruby
 61 | ^(?<backup_state>OK|WARN|ALARM|CRIT) %{GREEDYDATA:backup_message}$
 62 | ```
 63 | We can now start building our Grok expression.
 64 | 
 65 | For best efficiency, we need to place this Grok expression *after* the above expression.
 66 | This allows us to filter on a specific field and reduce the per-cycle computational
 67 | overhead. Since we know all of our backup scripts end with "-backup":
 68 | ```ruby
 69 | if [program] =~ "-backup$" {
 70 |   grok {
 71 |     match => {
 72 |       "SYSLOGMESSAGE" => "^(?<backup_state>OK|WARN|ALARM|CRIT) %{GREEDYDATA:backup_message}$"
 73 |       "program" => "^%{DATA:backup_name}-backup$"
 74 |     }
 75 |     add_tag => [ "backup_output" ]
 76 |   }
 77 | }
 78 | ```
 79 | The conditional looks for strings ending with `-backup`, and then applies the
 80 | grok expression to it. We use two matches; one on `SYSLOGMESSAGE` to pull out the
 81 | `backup_state` and `backup_message` fields, and a second on the `program` field
 82 | to pull out the `backup_name` field. Finally, we tag the event with `backup_output`
 83 | for use later on and to ease finding the event in reporting.
 84 | 
 85 | * `backup_name`: pii-repo
 86 | * `backup_state`: ALARM
 87 | * `backup_message`: Unable to isolate framulator, backup not taken.
 88 | * `tags`: [ "backup_output" ]
 89 | * `type`: syslog
 90 |  
 91 | ---
 92 | 
 93 | With fields defined in this way, we can use them for outputs:
 94 | ```ruby
 95 | output {
 96 |   if "backup_output" in [tags] AND [backup_state] != "OK" {
 97 |     pagerduty {
 98 |       service_key => "secrets"
 99 |       event_type => "trigger"
100 |       incident_key => "logstash/%{backup_name}/%{backup_state}"
101 |       description => "Backup failure on %{backup_name}. RPO is not being met."
102 |       details => "%{backup_state}: %{backup_message}"
103 |     }
104 |   }
105 | }
106 | ```
107 | Which will issue a PagerDuty incident in the event of a failed backup. The
108 | fields we populated in the grok expression are used to provide information in the
109 | incident. This usage would be much harder if we had to extract the text we wanted
110 | from within large fields.
111 | 


--------------------------------------------------------------------------------
/examples/large_distributed_source.md:
--------------------------------------------------------------------------------
 1 | # Large Distributed Source
 2 | This example configuration shows what a logstash configuration on a log-producing
 3 | node might look like in a large, distributed environment.
 4 | 
 5 | ```ruby
 6 | input {
 7 |   file {
 8 |     paths => [ "/var/log/syslog" ]
 9 |     type  => "syslog"
10 |   }
11 |   file {
12 |     paths => [
13 |       "/var/log/auth.log",
14 |       "/var/log/audit/audit.log"
15 |     ]
16 |     type => "audit"
17 |   }
18 | }
19 | 
20 | input {
21 |   file {
22 |     paths => [ "/var/log/product-a/*.log" ]
23 |     type  => "product-a"
24 |   }
25 |   file {
26 |     paths => [ "/var/log/product-q/*.log" ]
27 |     type  => "product-q"
28 |   }
29 | }
30 | 
31 | output {
32 |   if [type] == "audit" {
33 |     redis {
34 |       host      => [ "audit.security.internal" ]
35 |       data_type => "list"
36 |       key       => "audit_log"
37 |     }
38 |   } else {
39 |     kafka {
40 |       broker_list => "keeper1.devops.internal:9092,keeper2.devops.internal:9092"
41 |       topic_id    => "%{type}"
42 |     }
43 |   }
44 | }
45 | ```    
46 | 
47 | 


--------------------------------------------------------------------------------
/examples/large_routed_router.md:
--------------------------------------------------------------------------------
 1 | # Large Routed - Router
 2 | This example configuration shows what the logstash routing-tier could look
 3 | like in an environment that has a routing tier.
 4 | 
 5 | ```ruby
 6 | input {
 7 |   kafka {
 8 |     zk_connect => "keeper1.devops.internal:9092,keeper2.devops.internal:9092"
 9 |     topic_id   => "logstash_ingest"
10 |   }
11 | }
12 | 
13 | filter {
14 |   if [type] == "syslog" and ( [source] == "/var/log/auth.log" or [source] == "/var/log/audit/audit.log" ) {
15 |     mutate {
16 |       add_tag => [ "audit" ]
17 |     }
18 |   }
19 | 
20 |   if [type] == "audit" {
21 |     mutate {
22 |       add_tag => [ "audit" ]
23 |     }
24 |   }
25 | }
26 | 
27 | output {
28 |   if "audit" in [tags] {
29 |     redis {
30 |       host      => [ "audit.security.internal" ]
31 |       data_type => "list"
32 |       key       => "audit_log"
33 |     }
34 |   } else {
35 |     kafka {
36 |       broker_list => "keeper1.devops.internal:9092,keeper2.devops.internal:9092"
37 |       topic_id    => "%{type}"
38 |     }
39 |   }
40 | }
41 | 
42 | ```
43 | 


--------------------------------------------------------------------------------
/examples/multiple_filter.md:
--------------------------------------------------------------------------------
 1 | # A Filter Example with Multiple Filter Blocks.
 2 | In this example, we have two `filter { }` blocks, which manipulate similar fields.
 3 | 
 4 | ```ruby
 5 | # Parse an authentication header and get details
 6 | filter {
 7 |   if [message] =~ "Authentication_request" {
 8 |     grok {
 9 |       match => {
10 |         message => "Authentication_request: %{GREEDYDATA:auth_message}$"
11 |       }
12 |     }
13 |     add_field => {
14 |       "sub_type" => "authentication"
15 |     }
16 |   }
17 | }
18 | 
19 | filter {
20 |   if [sub_type] == "authentication" {
21 |     grok {
22 |       match => {
23 |         auth_message => "%{WORD:auth_type} / %{WORD:auth_user} / %{WORD:application}"
24 |       }
25 |     }
26 |   }
27 | }
28 | 
29 | ```
30 | How does the order of declaration change the order of logic?
31 | 


--------------------------------------------------------------------------------
/examples/multiple_filter_lines.md:
--------------------------------------------------------------------------------
 1 | # A Filter Example with Single Filter Block.
 2 | In this example, we have one `filter { }` block, which manipulate similar fields.
 3 | 
 4 | ```ruby
 5 | # Parse an authentication header and get details
 6 | filter {
 7 |   if [message] =~ "Authentication_request: " {
 8 |     grok {
 9 |       match => {
10 |         message => "Authentication_request: %{GREEDYDATA:auth_message}"
11 |       }
12 |     }
13 |     add_field => {
14 |       "sub_type" => "authentication"
15 |     }
16 |   }
17 | 
18 |   # Parse messages like "auth_type=saml auth_user=hildegard@example.com application=testapp"
19 |   if [sub_type] == "authentication" {
20 |     kv {
21 |       source => "auth_message"
22 |     }
23 |   }
24 | }
25 | 
26 | ```
27 | How does the order of declaration change the order of logic?
28 | 


--------------------------------------------------------------------------------
/examples/output_hipchat.md:
--------------------------------------------------------------------------------
 1 | # Example Output for HipChat
 2 | Sends a notice to HipChat when a type of queue gets clogged.
 3 | 
 4 | ```ruby
 5 | output {
 6 |   if [queue_size] > 30 AND [queue_name] =~ "system_*" {
 7 |     hipchat {
 8 |       room_id        => "12932"
 9 |       token          => "secrets"
10 |       from           => "%{queue_name}"
11 |       format         => "This queue has %{queue_size} jobs in it. It probably needs a good kicking."
12 |       color          => "yellow"
13 |       trigger_notify => true
14 |     }
15 |   }
16 | }
17 | 
18 | ```
19 | 
20 | This one sends a notice when a backup is finished.
21 | ```ruby
22 | output {
23 |   if [backup_status] == "finished" {
24 |     hipchat {
25 |       room_id => "12932"
26 |       token   => "secrets"
27 |       from    => "backup_events"
28 |       format  => "The backup job %{backup_job_id} on %{backup_node} has finished with %{backup_size/1024}GB in the VTL."
29 |       color   => "green"
30 |     }
31 |   }
32 | }
33 | ```
34 | 


--------------------------------------------------------------------------------
/examples/threading_example_parallel.md:
--------------------------------------------------------------------------------
 1 | # Threading Example: Parallelized
 2 | In this example logstash config, care has been taken to ensure that
 3 | multiple threads can be used for maximum throughput. Compare it to
 4 | [the singlethreaded version](examples/threading_example_singlethread.md).
 5 | 
 6 | ```ruby
 7 | input {
 8 |   file {
 9 |     path => [ '/var/log/applogs/*.log' ]
10 |     tags => [ 'applogs' ]
11 |   }
12 | }
13 | 
14 | input {
15 |   file {
16 |     path => [ '/var/log/debuglogs/*.log' ]
17 |     tags => [ 'applogs', 'debuglogs' ]
18 |   }
19 | }
20 | 
21 | input {
22 |   file {
23 |     path => [ '/var/log/syslog' ]
24 |     tags => [ 'syslog' ]
25 |   }
26 | }
27 | 
28 | input {
29 |   file {
30 |     path => [ '/var/log/apache2/*.log' ]
31 |     tags => [ 'apache' ]
32 |   }
33 | }
34 | 
35 | output {
36 |   elasticsearch {
37 |     hosts => [ 'elastic.prod.internal' ]
38 |   }
39 | }
40 | 
41 | output{  
42 |   # use IAM credentials to bypass credential-in-cleartext problem.
43 |   s3 {
44 |     bucket => "mycorp_logging_bucket"
45 |     size_file => 1024
46 |     time_file => 10
47 |   }
48 | }
49 | 
50 | ```
51 | 
52 | ## Questions:
53 | 
54 | * How does performance differ between logstash 1.5 and 2.2 and higher? 
55 | * Does the lack of a filter-stage affect performance?
56 | 


--------------------------------------------------------------------------------
/examples/threading_example_singlethread.md:
--------------------------------------------------------------------------------
 1 | # Threading Example: Single threaded
 2 | In this example logstash config, little care has been taken for the threading
 3 | model. Compare it to 
 4 | [the parallelized version](examples/threading_example_parallel.md).
 5 | 
 6 | ```ruby
 7 | input {
 8 |   file {
 9 |     path => [
10 |       '/var/log/applogs/*.log',
11 |       '/var/log/debuglogs/*.log',
12 |       '/var/log/syslog',
13 |       '/var/log/apache2/*.log'
14 |     ]
15 |   }
16 | }
17 | 
18 | filter {
19 |   if [path] =~ '/var/log/applogs' {
20 |     mutate {
21 |       add_tag => [ 'applogs' ]
22 |     }
23 |   } else if [path] =~ '/var/log/debuglogs' {
24 |     mutate {
25 |       add_tag => [ 'applogs' 'debuglogs' ]
26 |     }
27 |   } else if [path] =~ '/var/log/apache' {
28 |     mutate {
29 |       add_tag => [ 'apache' ]
30 |     }
31 |   } else if [path] =~ '/var/log/syslog' {
32 |     mutate {
33 |       add_tag => [ 'syslog' ]
34 |     }
35 |   }
36 | }
37 | 
38 | output {
39 |   elasticsearch {
40 |     hosts => [ 'elastic.prod.internal' ]
41 |   }
42 |   
43 |   # use IAM credentials to bypass credential-in-cleartext problem.
44 |   s3 {
45 |     bucket => "mycorp_logging_bucket"
46 |     size_file => 1024
47 |     time_file => 10
48 |   }
49 | }
50 | 
51 | ```


--------------------------------------------------------------------------------
/examples/winlogbeat.md:
--------------------------------------------------------------------------------
 1 | # WinLogBeat Examples
 2 | [WinLogBeat is part of the Beats framework, designed to replace the `eventlog` input
 3 | on logstash](https://www.elastic.co/guide/en/beats/winlogbeat/current/_overview.html).
 4 | Here are a couple of examples of FileBeat configurations.
 5 | 
 6 | ```yaml
 7 | winlogbeat:
 8 |   registry_file: C:/ProgramData/winlogbeat/.winlogbeat.yml
 9 | 
10 |   event_logs:
11 |     - name: ForwardedEvents
12 |       
13 | 
14 | output:
15 |   logstash:
16 |     hosts: [ "prodstash.prod.internal:5044" ]
17 | 
18 | ```
19 | This configuration monitors the 'ForwardedEvents' event-log, outputting to a
20 | LogStash instance running the `beats` input on port 5044. The ForwardedEvents
21 | event-log is where a system configured to [forward events](https://msdn.microsoft.com/en-us/library/bb870973(v=vs.85).aspx)
22 | deposits its logs. This can be useful if you are not allowed to install
23 | WinLogBeat on your Domain Controllers.
24 | 
25 | 


--------------------------------------------------------------------------------
/hiera.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: 5
 3 | defaults:
 4 |   datadir: /etc/puppet-hiera/
 5 |   data_hash: yaml_data
 6 | hierarchy:
 7 |   - name: "Yaml lookup hierarchy"
 8 |     paths:
 9 |       - "node-type/%{node_type}.yaml"
10 |       - "env-type/%{env_type}.yaml"
11 |       - common
12 | 


--------------------------------------------------------------------------------
/manifests/site.pp:
--------------------------------------------------------------------------------
 1 | # The base defines for the various node-types.
 2 | 
 3 | node default {
 4 | 
 5 |   case $::node_type {
 6 |     'onebox_nasa':   { include roles::onebox_nasa }
 7 |     'onebox_syslog': { include roles::onebox_syslog }
 8 |     'apache':        { include roles::apache }
 9 |     'escluster':     { include roles::escluster }
10 |     'mdcluster':     { include roles::mdcluster }
11 |   }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/modules/profiles/files/apache/sysadmin1138-net.basis:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hellosign/logstash-fundamentals/d99189b93d4c57312eaaeb8150435a2eefd41a27/modules/profiles/files/apache/sysadmin1138-net.basis


--------------------------------------------------------------------------------
/modules/profiles/files/base/hcl.vim:
--------------------------------------------------------------------------------
 1 | 
 2 | if exists("b:current_syntax")
 3 |   finish
 4 | endif
 5 | 
 6 | syn match hclEqual '='
 7 | syn match hclSimpleString '"[^\"]*"'
 8 | syn region hclComment display oneline start='\%\(^\|\s\)#' end='$'
 9 | syn region hclInterpolation display oneline start='(' end=')' contains=hclInterpolation,hclSimpleString
10 | syn region hclSmartString display oneline start='"' end='"\s*$' contains=hclInterpolation
11 | 
12 | syn keyword hclRootKeywords variable provider resource nextgroup=hclString,hclString skipwhite
13 | syn keyword hclRootKeywords default nextgroup=hclEquals skipwhite
14 | 
15 | 
16 | syn keyword hclAwsResourcesKeywords availability_zones desired_capacity force_delete health_check_grace_period health_check_type launch_configuration load_balancers max_size min_size name vpc_zone_identifier nextgroup=hclEquals,hclString skipwhite
17 | syn keyword hclAwsResourcesKeywords allocated_storage availability_zone backup_retention_period backup_window db_subnet_group_name engine engine_version final_snapshot_identifier identifier instance_class iops maintenance_window multi_az name password port publicly_accessible security_group_names skip_final_snapshot username vpc_security_group_ids nextgroup=hclEquals,hclString skipwhite
18 | syn keyword hclAwsResourcesKeywords cidr description ingress name security_group_id security_group_name security_group_owner_id source_security_group_id nextgroup=hclEquals,hclString skipwhite
19 | syn keyword hclAwsResourcesKeywords description name subnet_ids nextgroup=hclEquals,hclString skipwhite
20 | syn keyword hclAwsResourcesKeywords instance vpc nextgroup=hclEquals,hclString skipwhite
21 | syn keyword hclAwsResourcesKeywords availability_zones health_check healthy_threshold instance_port instance_protocol instances internal interval lb_port lb_protocol listener name security_groups ssl_certificate_id subnets target timeout unhealthy_threshold nextgroup=hclEquals,hclString skipwhite
22 | syn keyword hclAwsResourcesKeywords ami associate_public_ip_address availability_zone ebs_optimized iam_instance_profile instance_type key_name private_ip security_groups source_dest_check subnet_id tags user_data nextgroup=hclEquals,hclString skipwhite
23 | syn keyword hclAwsResourcesKeywords vpc_id nextgroup=hclEquals,hclString skipwhite
24 | syn keyword hclAwsResourcesKeywords iam_instance_profile image_id instance_type key_name name name_prefix security_groups user_data nextgroup=hclEquals,hclString skipwhite
25 | syn keyword hclAwsResourcesKeywords name records ttl type zone_id nextgroup=hclEquals,hclString skipwhite
26 | syn keyword hclAwsResourcesKeywords name nextgroup=hclEquals,hclString skipwhite
27 | syn keyword hclAwsResourcesKeywords route_table_id subnet_id nextgroup=hclEquals,hclString skipwhite
28 | syn keyword hclAwsResourcesKeywords cidr_block gateway_id instance_id route vpc_id nextgroup=hclEquals,hclString skipwhite
29 | syn keyword hclAwsResourcesKeywords acl bucket nextgroup=hclEquals,hclString skipwhite
30 | syn keyword hclAwsResourcesKeywords cidr_blocks description from_port ingress name owner_id protocol security_groups self tags to_port vpc_id nextgroup=hclEquals,hclString skipwhite
31 | syn keyword hclAwsResourcesKeywords availability_zone- cidr_block map_public_ip_on_launch vpc_id nextgroup=hclEquals,hclString skipwhite
32 | syn keyword hclAwsResourcesKeywords cidr_block enable_dns_hostnames enable_dns_support tags nextgroup=hclEquals,hclString skipwhite
33 | 
34 | 
35 | hi def link hclComment                  Comment
36 | hi def link hclEqual                    Operator
37 | hi def link hclRootKeywords             Statement
38 | hi def link hclAwsResourcesKeywords     Type
39 | hi def link hclSmartString              String
40 | hi def link hclInterpolation            String
41 | hi def link hclSimpleString             PreProc
42 | 
43 | let b:current_syntax = "hcl"
44 | 


--------------------------------------------------------------------------------
/modules/profiles/files/logstash/templates/onebox.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "template" : "onebox-*",
 3 |   "settings" : {
 4 |     "index" : {
 5 |       "refresh_interval" : "5s"
 6 |     }
 7 |   },
 8 |   "mappings" : {
 9 |     "_default_" : {
10 |       "dynamic_templates" : [ {
11 |         "message_field" : {
12 |           "mapping" : {
13 |             "index" : "analyzed",
14 |             "omit_norms" : true,
15 |             "fielddata" : {
16 |               "format" : "disabled"
17 |             },
18 |             "type" : "string"
19 |           },
20 |           "match_mapping_type" : "string",
21 |           "match" : "message"
22 |         }
23 |       }, {
24 |         "string_fields" : {
25 |           "mapping" : {
26 |             "index" : "analyzed",
27 |             "omit_norms" : true,
28 |             "fielddata" : {
29 |               "format" : "disabled"
30 |             },
31 |             "type" : "string",
32 |             "fields" : {
33 |               "raw" : {
34 |                 "index" : "not_analyzed",
35 |                 "ignore_above" : 256,
36 |                 "type" : "string"
37 |               }
38 |             }
39 |           },
40 |           "match_mapping_type" : "string",
41 |           "match" : "*"
42 |         }
43 |       } ],
44 |       "properties" : {
45 |         "@timestamp" : {
46 |           "type" : "date"
47 |         },
48 |         "geoip" : {
49 |           "dynamic" : true,
50 |           "properties" : {
51 |             "location" : {
52 |               "type" : "geo_point"
53 |             },
54 |             "longitude" : {
55 |               "type" : "float"
56 |             },
57 |             "latitude" : {
58 |               "type" : "float"
59 |             },
60 |             "ip" : {
61 |               "type" : "ip"
62 |             }
63 |           }
64 |         },
65 |         "user_mentions" : {
66 |           "properties" : {
67 |             "id" : {
68 |               "type": "integer"
69 |             }
70 |           }
71 |         },
72 |         "@version" : {
73 |           "index" : "not_analyzed",
74 |           "type" : "string"
75 |         }
76 |       },
77 |       "_all" : {
78 |         "enabled" : true,
79 |         "omit_norms" : true
80 |       }
81 |     }
82 |   },
83 |   "aliases" : { }
84 | }
85 | 
86 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/apache_stub.pp:
--------------------------------------------------------------------------------
 1 | # Creates a stub of the apache log-files, without actually installing apache.
 2 | class profiles::apache_stub {
 3 | 
 4 |   file {
 5 |     '/var/log/apache2':
 6 |       ensure => directory,
 7 |       owner  => 'root',
 8 |       group  => 'root',
 9 |       mode   => '0755';
10 |    '/var/log/apache2/vhosts':
11 |       ensure => directory,
12 |       owner  => 'root',
13 |       group  => 'root',
14 |       mode   => '0755';
15 |    '/var/log/apache2/vhosts/sysadmin1138-net.log':
16 |       ensure => present,
17 |       owner  => 'root',
18 |       group  => 'root',
19 |       mode   => '0644',
20 |       before => Service['logstash'];
21 |    '/var/log/apache2/vhosts/sysadmin1138-net.basis':
22 |       ensure  => present,
23 |       owner   => 'root',
24 |       group   => 'root',
25 |       mode    => '0644',
26 |       source  => 'puppet:///modules/profiles/apache/sysadmin1138-net.basis',
27 |       require => Package['logstash'],
28 |       before  => Service['logstash'];
29 |   }
30 | 
31 |   # This sneakiness is to ensure this log-file is parsed by logstash.
32 |   # Don't do this in prod, kids.
33 |   exec { 'Wait 30s for logstash to launch': 
34 |     command => 'sleep 1',
35 |     path    => [ '/usr/bin', '/bin', '/usr/sbin' ],
36 |     notify  => Exec['dumplogs'],
37 |     require => [ Service['logstash'], File['/var/log/apache2/vhosts/sysadmin1138-net.basis'] ],
38 |   }
39 | 
40 |   exec { 'dumplogs':
41 |     command     => "sleep 30; bzcat /var/log/apache2/vhosts/sysadmin1138-net.basis >> /var/log/apache2/vhosts/sysadmin1138-net.log",
42 |     path        => [ '/usr/bin', '/bin', '/usr/sbin' ],
43 |     refreshonly => true,
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/base.pp:
--------------------------------------------------------------------------------
 1 | # Provides the base modifications that need to happen on all nodes.
 2 | class profiles::base {
 3 | 
 4 |   include apt
 5 | 
 6 |   case $virtual {
 7 |     'xenu':   {
 8 |       $vm_user = 'ubuntu'
 9 |     }
10 |     [ 'virtualbox', 'kvm' ]:  {
11 |       $vm_user = 'vagrant'
12 |     }
13 |     default:  {
14 |       $vm_user = 'vagrant'
15 |     }
16 |   }
17 | 
18 |   file { [  "/home/${vm_user}/.bash_aliases",
19 |             '/root/.bash_aliases' ]:
20 |     owner   => $vm_user,
21 |     group   => $vm_user,
22 |     mode    => '0640',
23 |     content => template('profiles/base/bash_aliases'),
24 |   }
25 | 
26 |   # Provide vim syntax hilighting for classes.
27 |   file { '/usr/share/vim/vim74/syntax/hcl.vim':
28 |     owner  => 'root',
29 |     group  => 'root',
30 |     mode   => '0644',
31 |     source => 'puppet:///modules/profiles/base/hcl.vim',
32 |   }
33 | 
34 |   service { [ 'puppet-agent', 'pxp-agent', 'mcollective' ]:
35 |     enable => false,
36 |     ensure => stopped
37 |   }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/curator.pp:
--------------------------------------------------------------------------------
 1 | # Ensure curator is installed
 2 | class profiles::curator (
 3 |   $elastic_host = '127.0.0.1'
 4 | ) {
 5 | 
 6 |   apt::source { 'curator':
 7 |     ensure   => present,
 8 |     location => "http://packages.elastic.co/curator/4/debian",
 9 |     release  => 'stable',
10 |     repos    => 'main',
11 |     include  => {
12 |       'source' => false
13 |     },
14 |     require  => Apt::Key['elastic'],
15 |     notify   => Exec['apt_update']
16 |   }
17 | 
18 |   ensure_packages ( 'elasticsearch-curator', { require => Exec['apt_update'] } )
19 | 
20 |   file {
21 |     '/etc/curator':
22 |       ensure  => directory,
23 |       owner   => 'root',
24 |       group   => 'root',
25 |       mode    => '0750';
26 |     '/etc/curator/curator.yml':
27 |       ensure  => file,
28 |       owner   => 'root',
29 |       group   => 'root',
30 |       mode    => '0640',
31 |       content => template('profiles/curator/config.yml');
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/curator_job.pp:
--------------------------------------------------------------------------------
 1 | # Sets up a curator job
 2 | define profiles::curator_job (
 3 |   $jobfile,
 4 |   $cron_weekday = '*',
 5 |   $cron_hour    = 1,
 6 |   $cron_minute  = 10,
 7 | ) {
 8 | 
 9 |   $clean_name = shell_escape($name)
10 | 
11 |   file { "/etc/curator/${clean_name}.yml":
12 |     ensure  => present,
13 |     owner   => 'root',
14 |     group   => 'root',
15 |     mode    => '0640',
16 |     content => $jobfile,
17 |   }
18 | 
19 |   cron { $name:
20 |     command => "/usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/${clean_name}.yml",
21 |     user    => 'root',
22 |     weekday => $cron_weekday,
23 |     hour    => $cron_hour,
24 |     minute  => $cron_minute,
25 |   }
26 | 
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/elastic_key.pp:
--------------------------------------------------------------------------------
 1 | # Installs the elasticsearch GPG key for apt, to avoid dependency cycles.
 2 | class profiles::elastic_key {
 3 | 
 4 |   apt::key { 'elastic':
 5 |       id     => '46095ACC8548582C1A2699A9D27D666CD88E42B4',
 6 |       source => 'http://packages.elasticsearch.org/GPG-KEY-elasticsearch'
 7 |   }
 8 | 
 9 |   apt::source { 'elasticsearch':
10 |     ensure   => present,
11 |     location => "https://artifacts.elastic.co/packages/5.x/apt",
12 |     release  => 'stable',
13 |     repos    => 'main',
14 |     include  => {
15 |       'source' => false
16 |     },
17 |     require  => Apt::Key['elastic'],
18 |     notify   => Exec['apt_update']
19 |   }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/escluster.pp:
--------------------------------------------------------------------------------
 1 | # Installs a network facing elasticsearch that does everything
 2 | class profiles::escluster (
 3 |   $instance_name = logstash
 4 | ) {
 5 | 
 6 |   include profiles::elastic_key
 7 | 
 8 |   # We need java, this gets it.
 9 |   ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } )
10 | 
11 |   # Get the 'half of RAM' number, to be used as the heap-size for ElasticSearch.
12 |   # Why half? The other half is used for block-cache.
13 | #  $heap_size = inline_template("<%= (@memorysize_mb.to_f / 2).to_i %>")
14 |   $heap_size = $memory['system']['available_bytes'] / 2
15 | 
16 |   # This construct is needed to tell elasticsearch "Bind where we can see you".
17 |   # This can't be 0.0.0.0 because this address is advertised.
18 |   $es_config = {
19 |     'network' => {
20 |       'host' => $networking['interfaces']['enp0s8']['ip']
21 |     }
22 |   }
23 | 
24 |   # Installs the elasticsearch base install, but not an instance.
25 |   class { 'elasticsearch':
26 | #    version      => '5.4.1',
27 |     manage_repo  => false,
28 |     repo_version => '5.x',
29 |     api_protocol => 'http',
30 |     config       => $es_config,
31 |     jvm_options  => [
32 |       "-Xms${heap_size}",
33 |       "-Xmx${heap_size}"
34 |     ],
35 |     require      => Exec['apt_update']
36 |   }
37 | 
38 |   # Installs a specific instance. This puppet module allows installing multiple
39 |   # ES instances on the same host. 'service elasticsearch-logstash stop' will
40 |   # stop it.
41 |   elasticsearch::instance { $instance_name: }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/init.pp:
--------------------------------------------------------------------------------
1 | # Roots the profiles class
2 | class profiles {}
3 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/kibana_local.pp:
--------------------------------------------------------------------------------
 1 | # Installs Kibana, in local mode with no proxies.
 2 | class profiles::kibana_local {
 3 | 
 4 |   class { '::kibana':
 5 |     ensure => '5.4.1',
 6 |     config => {
 7 |       'server.port'       => 3010,
 8 |       'server.host'       => $networking['interfaces']['enp0s8']['ip'],
 9 |       'elasticsearch.url' => 'http://localhost:9200',
10 |     }
11 |   }
12 | 
13 | }
14 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/kibana_network.pp:
--------------------------------------------------------------------------------
 1 | # Installs Kibana, in network mode with no proxies.
 2 | class profiles::kibana_network {
 3 | 
 4 |   $elasticsearch_ip = lookup('escluster_ip', { default_value => 'localhost' } )
 5 | 
 6 |   class { '::kibana':
 7 |     manage_repo => true,
 8 |     config      => {
 9 |       'server.port'       => 3010,
10 |       'server.host'       => $networking['interfaces']['enp0s8']['ip'],
11 |       'elasticsearch.url' => "http://${elasticsearch_ip}:9200"
12 |     }
13 |   }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logredis.pp:
--------------------------------------------------------------------------------
 1 | # Creates a redis server for use with logstash.
 2 | class profiles::logredis {
 3 | 
 4 |   apt::ppa { 'ppa:chris-lea/redis-server': }
 5 | 
 6 |   class { 'redis' :
 7 |     bind           => '0.0.0.0',
 8 |     service_ensure => 'running',
 9 |     package_name   => 'redis-server',
10 |     require        => Exec['apt_update']
11 |   }
12 | 
13 |   # Part of hotrodding Redis is to set vm.overcommit_memory to 1.
14 |   file_line { 'redis_overcommit':
15 |     path   => '/etc/sysctl.conf',
16 |     line   => 'vm.overcommit_memory = 1',
17 |     notify => Exec['vm_overcommit'],
18 |   }
19 | 
20 |   exec { 'vm_overcommit':
21 |     command     => 'sysctl vm.overcommit_memory=1',
22 |     path        => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'],
23 |     refreshonly => true
24 |   }
25 | 
26 |   # The next is to set transparent_hugepage to never, as this has a severe
27 |   # impact on performance when redis is loaded.
28 |   ini_subsetting { 'set-grub-hugepages':
29 |     ensure     => present,
30 |     path       => '/etc/default/grub',
31 |     setting    => 'GRUB_CMDLINE_LINUX',
32 |     subsetting => 'transparent_hugepage',
33 |     value      => "=never",
34 |     notify     => Exec['update-grub-hugepages'],
35 |   }
36 | 
37 |   exec { 'update-grub-hugepages':
38 |     command     => '/usr/sbin/update-grub',
39 |     refreshonly => true,
40 |     notify      => Exec['onetime-hugepages'],
41 |   }
42 | 
43 |   exec { 'onetime-hugepages':
44 |     command     => 'echo never > /sys/kernel/mm/transparent_hugepage/enabled',
45 |     path        => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'],
46 |     refreshonly => true,
47 |   }
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash.pp:
--------------------------------------------------------------------------------
 1 | # Installs the base logstash. Config-items will be handled in other profiles.
 2 | # Example: logstash::configfile { 'get_tweetstream': }
 3 | class profiles::logstash (
 4 |   $run_as_root = false,
 5 |   $workers = false,
 6 |   $ls_heap = '256m'
 7 | ) {
 8 | 
 9 |   include profiles::elastic_key
10 | 
11 |   ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } )
12 | 
13 |   if $run_as_root {
14 |     $ls_user  = 'root'
15 |     $ls_group = 'root'
16 |   } else {
17 |     $ls_user  = 'logstash'
18 |     $ls_group = 'logstash'
19 |   }
20 | 
21 |   if $workers {
22 |     $ls_opts = '-w 1'
23 |   } else {
24 |     $ls_opts = "-w ${workers}"
25 |   }
26 | 
27 |   $config_hash = {
28 |     'LS_USER'      => $ls_user,
29 |     'LS_GROUP'     => $ls_group,
30 |     'LS_OPTS'      => $ls_opts,
31 |     'LS_HEAP_SIZE' => $ls_heap
32 |   }
33 | 
34 |   class { '::logstash':
35 |     manage_repo     => false,
36 | #    version         => '1:5.4.1-1',
37 |     startup_options => $config_hash,
38 |     require         => Exec['apt_update']
39 |   }
40 | 
41 | }
42 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/filter_apache.pp:
--------------------------------------------------------------------------------
1 | # Deploys the apache-log parsing filter for logstash
2 | class profiles::logstash::filter_apache {
3 | 
4 |   logstash::configfile { '40-filter_apache':
5 |     content => template('profiles/logstash/filter/apache'),
6 |   }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/filter_syslog.pp:
--------------------------------------------------------------------------------
1 | # Deploys a logstash filters for parsing syslog-file entries
2 | class profiles::logstash::filter_syslog {
3 | 
4 |   logstash::configfile { '40-filter_syslog':
5 |     content => template('profiles/logstash/filter/syslog'),
6 |   }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/input_apache.pp:
--------------------------------------------------------------------------------
1 | # Deploys the apache file-fetcher for Logstash
2 | class profiles::logstash::input_apache {
3 | 
4 |   logstash::configfile { '30-input_apache':
5 |     content => template('profiles/logstash/input/apache'),
6 |   }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/input_nasa_feeds.pp:
--------------------------------------------------------------------------------
 1 | # This uses the twitter input to gather a bunch of NASA twitter feeds.
 2 | class profiles::logstash::input_nasa_feeds {
 3 | 
 4 |   $logstash_twitter           = lookup( 'logstash::twitter', { merge => deep, default_value => {} } )
 5 |   $twitter_consumer_key       = $logstash_twitter['consumer_key']
 6 |   $twitter_consumer_secret    = $logstash_twitter['consumer_secret']
 7 |   $twitter_oauth_token        = $logstash_twitter['oauth_token']
 8 |   $twitter_oauth_token_secret = $logstash_twitter['oauth_token_secret']
 9 |   $nasa_keywords              = $logstash_twitter['keywords']
10 | 
11 |   logstash::configfile { '30-input_nasa_feeds':
12 |     content => template('profiles/logstash/input/nasa_feeds'),
13 |   }
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/input_redis.pp:
--------------------------------------------------------------------------------
 1 | # An input that listens to a redis-list. Used for parsing-nodes
 2 | class profiles::logstash::input_redis {
 3 | 
 4 |   $redis_ip = lookup('redis_ip', { default_value => '127.0.0.1' } )
 5 | 
 6 |   logstash::configfile { '30-input_redis':
 7 |     content => template('profiles/logstash/input/redis'),
 8 |   }
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/input_syslog_file.pp:
--------------------------------------------------------------------------------
1 | # Deploys the Logstash input for pull syslog file data.
2 | class profiles::logstash::input_syslog_file {
3 | 
4 |   logstash::configfile { '30-input_syslog_file':
5 |     content => template('profiles/logstash/input/syslog_file'),
6 |   }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/input_syslog_server.pp:
--------------------------------------------------------------------------------
1 | # Runs the 'syslog' input for logstash
2 | class profiles::logstash::input_syslog_server {
3 | 
4 |   logstash::configfile { '30-input_syslog_server':
5 |     content => template('profiles/logstash/input/syslog_server'),
6 |   }
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/output_escluster.pp:
--------------------------------------------------------------------------------
 1 | # Outputs logstash to a given IP address, instead of locally.
 2 | class profiles::logstash::output_escluster {
 3 | 
 4 |   $escluster_ip = lookup('escluster_ip', { default_value => '127.0.0.1' } )
 5 | 
 6 |   logstash::configfile { '50-output_direct_es':
 7 |     content => template('profiles/logstash/output/escluster'),
 8 |   }
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/output_onebox.pp:
--------------------------------------------------------------------------------
 1 | # Outputs to the local ES repo for onebox installs.
 2 | class profiles::logstash::output_onebox {
 3 | 
 4 |   # Because onebox uses an index named something other than 'logstash', we
 5 |   # have to import our own template. This copies the template-file to the
 6 |   # local file-system, and the configfile fragment will use that to 
 7 |   # update the ES Cluster templates. New indices will get those mappings.
 8 | 
 9 |   file { '/etc/logstash/logstash.json':
10 |     owner   => 'logstash',
11 |     group   => 'logstash',
12 |     source  => 'puppet:///modules/profiles/logstash/templates/onebox.json',
13 |     require => Logstash::Configfile['50-output_onebox_es'],
14 |     notify  => Service['logstash'],
15 |   }
16 | 
17 |   logstash::configfile { '50-output_onebox_es':
18 |     content => template('profiles/logstash/output/onebox_es'),
19 |   }
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/logstash/output_redis.pp:
--------------------------------------------------------------------------------
 1 | # Outputs to a redis server for queuing
 2 | class profiles::logstash::output_redis {
 3 | 
 4 |   $redis_ip = lookup('redis_ip', { default_value => '127.0.0.1' } )
 5 | 
 6 |   logstash::configfile { '50-output_redis':
 7 |     content => template('profiles/logstash/output/redis'),
 8 |   }
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/modules/profiles/manifests/onebox_es.pp:
--------------------------------------------------------------------------------
 1 | # Installs the onebox elasticsearch server and instance.
 2 | class profiles::onebox_es (
 3 |   $instance_name = 'logstash'
 4 | ) {
 5 | 
 6 |   include profiles::elastic_key
 7 | 
 8 |   ensure_packages ( 'openjdk-8-jre-headless', {
 9 |     require => Exec['apt_update'],
10 |   } )
11 | 
12 |   # Installs the elasticsearch base install, but not an instance.
13 |   class { 'elasticsearch':
14 |     version      => '5.4.1',
15 |     manage_repo  => false,
16 |     repo_version => '5.x',
17 |     api_host     => 'localhost',
18 |     api_protocol => 'http',
19 |     jvm_options  => [
20 |       '-Xms512m',
21 |       '-Xmx512m'
22 |     ],
23 |     require      => Exec['apt_update'],
24 |   }
25 | 
26 |   # Installs a specific instance. This puppet module allows installing multiple
27 |   # ES instances on the same host. 'service elasticsearch-onebox_nasa stop' will
28 |   # stop it.
29 |   elasticsearch::instance { $instance_name: }
30 | 
31 |   # Needed later, ensures the given instance is up and running before it
32 |   # passes.
33 | #  es_instance_conn_validator { $instance_name:
34 | #    server  => 'localhost',
35 | #    port    => '9200',
36 | #    require => [ Elasticsearch::Instance[$instance_name] ],
37 | #  }
38 | 
39 | }
40 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/base/bash_aliases:
--------------------------------------------------------------------------------
1 | PS1='(<%= @node_type %>) \u@\H:\w\$ '
2 | 
3 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/curator/config.yml:
--------------------------------------------------------------------------------
 1 | client:
 2 |   hosts:
 3 |     - <%= @elastic_host %>
 4 |   port: 9200
 5 |   use_ssl: False
 6 |   timeout: 30
 7 |   master_only: False
 8 | 
 9 | logging:
10 |   loglevel: INFO
11 |   logfile: /var/log/curator.log
12 |   logformat: json
13 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/curator/onebox_delete.yml:
--------------------------------------------------------------------------------
 1 | actions:
 2 |   1:
 3 |     action: delete_indices
 4 |     description: Remove old indices
 5 |     filters:
 6 |       - filtertype: pattern
 7 |         kind: prefix
 8 |         value: onebox-
 9 |       - filtertype: age
10 |         source: name
11 |         direction: older
12 |         timestring: '%Y.%m.%d'
13 |         unit: days
14 |         unit_count: 7
15 | 
16 |       
17 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/filter/apache:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/filter/apache
 2 | filter {
 3 |   # This says, only do this parsing on events having the 'apache' tag.
 4 |   if [type] == "apache" {
 5 |     # Because there isn't a stand-alone parser for Apache logs, we have to build one
 6 |     # ourselves. This requires grok.
 7 |     # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html
 8 |     grok {
 9 |       match => {
10 |         "message" => "^%{COMBINEDAPACHELOG}"
11 |       }
12 |     }
13 |   }
14 |   # The 'COMBINEDAPACHELOG' and 'HTTPD24_ERRORLOG' are patterns that ship with logstash.
15 |   # You can see their defines here: 
16 |   # https://github.com/logstash-plugins/logstash-patterns-core/blob/v4.0.2/patterns/grok-patterns#L96-L98
17 |   if [type] == "apache-error" {
18 |     grok {
19 |       match => {
20 |         "message" => "^%{HTTPD24_ERRORLOG}"
21 |       }
22 |     }
23 |   }
24 |   if [type] == "apache" {
25 |     # I renamed my comment-file ages ago. Anything going to mt-comment is a comment-spam bot.
26 |     if [request] == "/cgi-bin/mt/mt-comments.cgi" {
27 |       mutate {
28 |         add_field => {
29 |           "is_spam" => true
30 |           "blog_target" => "comment-spam"
31 |         }
32 |       }
33 |     }
34 |     # Flag traffic going to the old blog.
35 |     if [request] =~ "^\/blog" {
36 |       mutate {
37 |         add_field => { "blog_target" => "blogger" }
38 |       }
39 |     # Flag traffic going to the new blog.
40 |     } else if [request] =~ "^\/mt\/blog" {
41 |       mutate {
42 |         add_field => { "blog_target" => "movabletype" }
43 |       }
44 |     # Flag traffic linking directly from images.
45 |     } else if [request] =~ "^\/images" {
46 |       mutate {
47 |         add_field => { "blog_target" => "images" }
48 |       }
49 |     }
50 | 
51 |     # Set the timestamp of the event to the one in the Apache logs.
52 |     date {
53 |       match => ["timestamp", "dd/MMM/YYYY:HH:mm:ss Z"]
54 |     }
55 | 
56 |     # if we got this far, it isn't spam.
57 |     if [is_spam] != "true" {
58 |       mutate {
59 |         add_field => { "is_spam" => "false" }
60 |       }
61 |     }
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/filter/syslog:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/filter/syslog_file
 2 | filter {
 3 |   if [type] =~ "^syslog" {
 4 |     # Syslog parsing is handled through Grok.
 5 |     # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html
 6 |     grok {
 7 |       # This will create a new field called SYSLOGMESSAGE, that contains the
 8 |       # data part of a syslog line.
 9 |       #
10 |       # If given a line like:
11 |       # Sep  9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds.
12 |       # 'message' will equal "Sep  9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds."
13 |       # 'SYSLOGMESSAGE' will equal "bound to 192.0.2.153 -- renewal in 1367 seconds."
14 |       # 'timestamp' will equal "Sep  9 19:09:50"
15 |       # 'logsource' will equal "ip-192-0-2-153"
16 |       # 'program' will equal "dhclient"
17 |       #
18 |       match => {
19 |         "message" => "^%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}$"
20 |       }
21 |     }
22 |   }
23 |   if [type] =~ "^syslog" and "_grokparsefailure" not in [tags] {
24 |     # This replaces 'message' with the contents of 'SYSLOGMESSAGE', but only if
25 |     # the grok statement just above actually worked.
26 |     # After this filter, the fields will be:
27 |     # 'message' will equal "bound to 192.0.2.153 -- renewal in 1367 seconds."
28 |     # 'timestamp' will equal "Sep  9 19:09:50"
29 |     # 'logsource' will equal "ip-192-0-2-153"
30 |     # 'program' will equal "dhclient"
31 |     # And SYSLOGMESSAGE will not be present at all.
32 |     mutate {
33 |       replace      => { "message" => "%{SYSLOGMESSAGE}" }
34 |       remove_field => [ "SYSLOGMESSAGE" ]
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/apache:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/apache
 2 | input {
 3 |   # This uses the 'file' input to deal with Apache logs. The documentation is
 4 |   # here: https://www.elastic.co/guide/en/logstash/2.4/plugins-inputs-file.html
 5 |   # Parsing of these will happen in the 'filter' stage. This is where the magic
 6 |   # happens, turning strings into searchable fields.
 7 |   #
 8 |   # Tags are added to better allow us to filter events later in the pipeline.
 9 |   #
10 |   file {
11 |     path => [
12 |       "/var/log/apache2/access.log",
13 |       "/var/log/apache2/other_vhosts_access.log",
14 |       "/var/log/apache2/vhosts/*.log"
15 |     ]
16 |     type => "apache"
17 |   }
18 |   file {
19 |     path => [
20 |       "/var/log/apache2/error.log"
21 |     ]
22 |     type => "apache-error"
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/journald:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/journald
 2 | input {
 3 |   # This is not a bundled plugin. You will have to install it yourself.
 4 |   # It's listed as a proof-of-concept, so be careful of quality.
 5 |   journald {
 6 |     lowercase => true
 7 |     seekto => "head"
 8 |     thisboot => true
 9 |     type => "journald"
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/nasa_feeds:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/nasa_feeds
 2 | <%- if @twitter_consumer_key == false -%>
 3 | # input {
 4 | #   # To get these strings, go here: https://dev.twitter.com/apps/new
 5 | #   # Then update the puppet-hiera/node-type/onebox_nasa.yaml
 6 | #   # See also: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-twitter.html
 7 | #   twitter {
 8 | #     consumer_key => 'string'
 9 | #     consumer_secret => 'string'
10 | #     oauth_token => 'string'
11 | #     oauth_token_secret => 'string'
12 | #     keywords => [ 'string', 'string' ]
13 | #   }
14 | # }
15 | <%- else -%> 
16 | input {
17 |   # Documentation: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-twitter.html
18 |   twitter {
19 |     consumer_key => "<%= @twitter_consumer_key %>"
20 |     consumer_secret => "<%= @twitter_consumer_secret %>"
21 |     oauth_token => "<%= @twitter_oauth_token %>"
22 |     oauth_token_secret => "<%= @twitter_oauth_token_secret %>"
23 |     keywords => [ "<%= @nasa_keywords.join('", "') %>" ]
24 |     type => "twitter"
25 |   }
26 | }
27 | <%- end -%>
28 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/redis:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/redis
 2 | input {
 3 |   # Documented here: https://www.elastic.co/guide/en/logstash/2.4/plugins-inputss-redis.html
 4 |   #
 5 |   # This reads events off of the 'medium_cluster' list on the locally installed
 6 |   # 'redis' server. They are placed there by shipper nodes processing jobs. In
 7 |   # larger environments, the 'batch_count' attribute is used to pull more jobs off
 8 |   # of the redis server and reduce load on it.
 9 |   redis {
10 |     host        => [ "<%= @redis_ip %>" ]
11 |     data_type   => "list"
12 |     codec       => "json"
13 |     key         => "medium_cluster"
14 |     batch_count => 50
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/syslog_file:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/syslog_file
 2 | input {
 3 |   file {
 4 |     path => [
 5 |       "/var/log/syslog",
 6 |       "/var/log/auth.log",
 7 |       "/var/log/cron.log"
 8 |     ]
 9 |     type => "syslog-files"
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/input/syslog_server:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/input/syslog_server
 2 | input {
 3 |   syslog {
 4 |     # By default, it will listen on 0.0.0.0:514
 5 |     # Binding to TCP and UDP:514 requires running as root on Ubuntu.
 6 |     # Documentation: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-syslog.html
 7 |     port => 514
 8 |     type => "syslog-server"
 9 |   }
10 | }
11 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/output/escluster:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/output/escluster
 2 | output {
 3 |   # Documented here: https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html
 4 |   #
 5 |   # The ElasticSearch output is rather complex, and can handle many cases. As you would expect for an
 6 |   # output to another Elastic product. This example simply dumps everything into indexes prefixed with
 7 |   # "logstash" and rotates daily.
 8 |   #
 9 |   elasticsearch {
10 |     hosts => [ "<%= @escluster_ip %>" ]
11 |     index => "logstash-%{+YYYY.MM.dd}"
12 |     manage_template => true
13 |   }
14 | }
15 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/output/onebox_es:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/output/onebox_es
 2 | output {
 3 |   # Documented here: https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html
 4 |   #
 5 |   # The ElasticSearch output is rather complex, and can handle many cases. As you would expect for an
 6 |   # output to another Elastic product. This example simply dumps everything into indexes prefixed with
 7 |   # "onebox" and rotating daily.
 8 |   #
 9 |   elasticsearch {
10 |     hosts              => [ "127.0.0.1" ]
11 |     index              => "onebox-%{+YYYY.MM.dd}"
12 |     template           => "/etc/logstash/logstash.json"
13 |     manage_template    => true
14 |     template_overwrite => true
15 |   }
16 | }
17 | 


--------------------------------------------------------------------------------
/modules/profiles/templates/logstash/output/redis:
--------------------------------------------------------------------------------
 1 | # From modules/profiles/templates/logstash/output/redis
 2 | output {
 3 |   # Documented here: https://www.elastic.co/guide/en/logstash/2.4/plugins-outputs-redis.html
 4 |   #
 5 |   # Outputs to the redis-server for processing by the parser nodes.
 6 |   # 
 7 |   # For production usage, consider setting batch, batch_event, and batch_timeout
 8 |   # to take advantage of bulk updates. It add some latency to your log pipeline,
 9 |   # but it takes load off of the redis node.
10 |   redis {
11 |     host      => [ "<%= @redis_ip %>" ]
12 |     data_type => "list"
13 |     codec     => "json"
14 |     key       => "medium_cluster"
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/apache.pp:
--------------------------------------------------------------------------------
 1 | # Sets up the logstash environment for an Apache server, outputting
 2 | # to a variety of things, based on environment.
 3 | class roles::apache {
 4 | 
 5 |   include profiles::base
 6 |   include profiles::apache_stub
 7 |   include profiles::kibana_network
 8 | 
 9 |   # Running as root to read the syslog file.
10 |   # However, if you add the 'logstash' user to the 'adm' group,
11 |   # you can read these files normally. Exercise for the reader.
12 |   class { 'profiles::logstash':
13 |     run_as_root => true,
14 |   }
15 | 
16 |   include profiles::logstash::input_syslog_file
17 |   include profiles::logstash::input_apache
18 | 
19 |   # Change our behavior based on environment.
20 |   # Small:  Parse locally, send to ElasticSearch.
21 |   # Medium: Input locally, send to Redis, parse later.
22 |   case $::env_type {
23 |     'small':  {
24 |       include profiles::logstash::filter_syslog
25 |       include profiles::logstash::filter_apache
26 |       include profiles::logstash::output_escluster
27 |      }
28 |     'medium': { include profiles::logstash::output_redis }
29 |     default:  { include profiles::logstash::output_escluster }
30 |   }
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/escluster.pp:
--------------------------------------------------------------------------------
 1 | # Turns the box into an all-in-one ElasticSearch box.
 2 | class roles::escluster {
 3 | 
 4 |   include profiles::base
 5 |   include profiles::escluster
 6 | 
 7 |   # Running as root to read the syslog file.
 8 |   # However, if you add the 'logstash' user to the 'adm' group,
 9 |   # you can read these files normally. Exercise for the reader.
10 |   class { 'profiles::logstash':
11 |     run_as_root => true,
12 |   }
13 | 
14 |   include profiles::logstash::input_syslog_file
15 |   include profiles::logstash::filter_syslog
16 |   # Since this IS the ES box, output to itself.
17 |   include profiles::logstash::output_escluster
18 | 
19 | }
20 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/init.pp:
--------------------------------------------------------------------------------
1 | # This is the base of the 'roles' class.
2 | class roles {}
3 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/mdcluster.pp:
--------------------------------------------------------------------------------
 1 | # Creates a combined elasticsearch/redis cluster.
 2 | # Don't do this in prod!
 3 | class roles::mdcluster {
 4 | 
 5 |   include profiles::base
 6 |   include profiles::escluster
 7 |   include profiles::logredis
 8 | 
 9 |   # Running as root to read the syslog file.
10 |   # However, if you add the 'logstash' user to the 'adm' group,
11 |   # you can read these files normally. Exercise for the reader.
12 |   class { 'profiles::logstash':
13 |     run_as_root => true,
14 |   }
15 | 
16 |   # Read from the redis list, as this is a parser node.
17 |   include profiles::logstash::input_redis
18 | 
19 |   # Fetch the local syslog, since we do that.
20 |   include profiles::logstash::input_syslog_file
21 | 
22 |   # Include appropriate filters for all that we do.
23 |   include profiles::logstash::filter_syslog
24 |   include profiles::logstash::filter_apache
25 |   # Since this IS the ES box, output to itself.
26 |   include profiles::logstash::output_escluster
27 | 
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/onebox_nasa.pp:
--------------------------------------------------------------------------------
 1 | # This role constructs an example LogStash box that pulls information from
 2 | # various NASA twitter feeds. It contains:
 3 | # - LogStash
 4 | # - ElasticSearch
 5 | # - Kibana
 6 | #
 7 | class roles::onebox_nasa {
 8 | 
 9 |   include profiles::base
10 | 
11 |   #### Set up the local elasticsearch
12 |   class { 'profiles::onebox_es':
13 |     instance_name => 'onebox_nasa'
14 |   }
15 | 
16 |   include profiles::logstash
17 |   include profiles::logstash::output_onebox
18 |   include profiles::logstash::input_nasa_feeds
19 | 
20 |   # Next, set up Kibana.
21 | 
22 |   class { 'profiles::kibana_local':
23 |     require => Service['elasticsearch-instance-onebox_nasa']
24 |   }
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/modules/roles/manifests/onebox_syslog.pp:
--------------------------------------------------------------------------------
 1 | # This role constructs an example LogStash box that sets up a syslog-plugin
 2 | # based logstash service. It contains:
 3 | # - LogStash
 4 | # - ElasticSearch
 5 | # - Kibana
 6 | #
 7 | class roles::onebox_syslog {
 8 | 
 9 |   include profiles::base
10 | 
11 |   #### Set up the local elasticsearch
12 |   class { 'profiles::onebox_es':
13 |     instance_name => 'onebox_syslog'
14 |   }
15 | 
16 |   # We need java, this gets it.
17 |   ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } )
18 | 
19 |   ## Next, set up logstash. Note the use of the 'require'.
20 |   #  Unlike onebox_nasa, we need to run as root in order to bind
21 |   #  UDP/514, so we're setting run_as_root = true.
22 |   class { 'profiles::logstash':
23 |     run_as_root => true,
24 |   }
25 | 
26 |   include profiles::logstash::output_onebox
27 |   include profiles::logstash::input_syslog_server
28 |   include profiles::logstash::input_syslog_file
29 |   include profiles::logstash::filter_syslog
30 | 
31 |   # Next, set up Kibana.
32 | 
33 |   class { 'profiles::kibana_local': }
34 | 
35 |   # Next, get Curator set up
36 | 
37 |   include profiles::curator
38 | 
39 |   # Remove onebox indexes older than a week.
40 |   profiles::curator_job { 'onebox_delete':
41 |     jobfile     => template('profiles/curator/onebox_delete.yml'),
42 |     cron_hour   => '3',
43 |     cron_minute => '5',
44 |   }
45 | 
46 | }
47 | 


--------------------------------------------------------------------------------
/prep_environment:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | # This script preps the repo for use as a teaching tool.
 4 | 
 5 | echo "Checking out submodules"
 6 | git submodule init
 7 | git submodule update
 8 | if [ ! -d ../puppet-hiera && -d ./puppet-hiera ]; then
 9 |   echo "Setting up puppet-hiera directory."
10 |   cp -a puppet-hiera ../
11 | elif [ ! -f ../puppet-hiera/common.yaml ]; then
12 |   echo "It seems ../puppet-hiera exists, and doesn't have the right files."
13 |   echo "It should contain the contents of ./puppet-hiera."
14 | fi
15 | 


--------------------------------------------------------------------------------
/puppet-hiera/common.yaml:
--------------------------------------------------------------------------------
1 | logstash:
2 |   twitter:
3 |     follows:
4 |       - evil
5 | 


--------------------------------------------------------------------------------
/puppet-hiera/env-type/medium.yaml:
--------------------------------------------------------------------------------
1 | escluster_ip: '192.168.99.10'
2 | redis_ip:     '192.168.99.10'
3 | 


--------------------------------------------------------------------------------
/puppet-hiera/env-type/small.yaml:
--------------------------------------------------------------------------------
1 | escluster_ip: '192.168.99.10'
2 | 


--------------------------------------------------------------------------------
/puppet-hiera/hiera.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | version: 5
 3 | defaults:
 4 |   datadir: /etc/puppet-hiera/
 5 |   data_hash: yaml_data
 6 | hierarchy:
 7 |   - name: "Yaml lookup hierarchy"
 8 |     paths:
 9 |       - "node-type/%{node_type}.yaml"
10 |       - "env-type/%{env_type}.yaml"
11 |       - common
12 | 


--------------------------------------------------------------------------------
/puppet-hiera/node-type/onebox_nasa.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # To make the twitter demo work, you will need your own twitter API
 3 | # credentials: https://apps.twitter.com/app/new
 4 | logstash::twitter:
 5 |   consumer_key: false
 6 |   consumer_secret: false
 7 |   oauth_token: false
 8 |   oauth_token_secret: false
 9 |   keywords:
10 |     - nasa
11 |     - LISA16
12 | 


--------------------------------------------------------------------------------
/vup:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -f /etc/lsb-release ]; then
 4 |   . /etc/lsb-release
 5 |   UBU_RELEASE=$DISTRIB_CODENAME
 6 | else
 7 |   UBU_RELEASE='trusty'
 8 | fi
 9 | 
10 | function puppet_up {
11 | echo "Setting up Puppetlabs repo."
12 | echo "# Puppetlabs products
13 | deb http://apt.puppetlabs.com ${UBU_RELEASE} PC1
14 | " > /etc/apt/sources.list.d/puppetlabs.list
15 | 
16 | echo "Pinning puppet-agent to puppetlabs"
17 | echo "Package: puppet-agent
18 | Pin: origin "apt.puppetlabs.org"
19 | Pin-Priority: 1000
20 | " > /etc/apt/preferences.d/puppetlabs.pref
21 | 
22 | curl -s http://apt.puppetlabs.com/pubkey.gpg | apt-key add -
23 | apt-get update
24 | apt-get remove puppet-common -y
25 | apt-get purge puppet-common -y
26 | apt-get install puppet-agent=1.10.3-1${UBU_RELEASE} -y
27 | ln -s /opt/puppetlabs/bin/facter /usr/local/bin/facter
28 | ln -s /opt/puppetlabs/bin/puppet /usr/local/bin/puppet
29 | }
30 | 
31 | echo "Setting up noexec /tmp..."
32 | if [ -f /root/puppet_upgraded ]; then
33 |   echo "Puppet already upgraded, skipping to next provisioner."
34 |   exit 0
35 | else
36 |   echo "Disabling Chef client..."
37 |   service chef-client stop
38 |   update-rc.d chef-client disable
39 |   echo "Running puppet..."
40 |   puppet_up
41 |   touch /root/puppet_upgraded
42 | fi
43 | 


--------------------------------------------------------------------------------