├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── LICENSE ├── README.md ├── Vagrantfile ├── environments └── production │ └── .gitkeep ├── examples ├── complex_logstash.md ├── curator_cleanup.md ├── curator_snapshot_complex.md ├── field_cleanup.md ├── filebeat.md ├── grokking_rules.md ├── grokking_syslog.md ├── large_distributed_source.md ├── large_routed_router.md ├── multiple_filter.md ├── multiple_filter_lines.md ├── output_hipchat.md ├── threading_example_parallel.md ├── threading_example_singlethread.md └── winlogbeat.md ├── hiera.yaml ├── manifests └── site.pp ├── modules ├── profiles │ ├── files │ │ ├── apache │ │ │ └── sysadmin1138-net.basis │ │ ├── base │ │ │ └── hcl.vim │ │ └── logstash │ │ │ └── templates │ │ │ └── onebox.json │ ├── manifests │ │ ├── apache_stub.pp │ │ ├── base.pp │ │ ├── curator.pp │ │ ├── curator_job.pp │ │ ├── elastic_key.pp │ │ ├── escluster.pp │ │ ├── init.pp │ │ ├── kibana_local.pp │ │ ├── kibana_network.pp │ │ ├── logredis.pp │ │ ├── logstash.pp │ │ ├── logstash │ │ │ ├── filter_apache.pp │ │ │ ├── filter_syslog.pp │ │ │ ├── input_apache.pp │ │ │ ├── input_nasa_feeds.pp │ │ │ ├── input_redis.pp │ │ │ ├── input_syslog_file.pp │ │ │ ├── input_syslog_server.pp │ │ │ ├── output_escluster.pp │ │ │ ├── output_onebox.pp │ │ │ └── output_redis.pp │ │ └── onebox_es.pp │ └── templates │ │ ├── base │ │ └── bash_aliases │ │ ├── curator │ │ ├── config.yml │ │ └── onebox_delete.yml │ │ └── logstash │ │ ├── filter │ │ ├── apache │ │ └── syslog │ │ ├── input │ │ ├── apache │ │ ├── journald │ │ ├── nasa_feeds │ │ ├── redis │ │ ├── syslog_file │ │ └── syslog_server │ │ └── output │ │ ├── escluster │ │ ├── onebox_es │ │ └── redis └── roles │ └── manifests │ ├── apache.pp │ ├── escluster.pp │ ├── init.pp │ ├── mdcluster.pp │ ├── onebox_nasa.pp │ └── onebox_syslog.pp ├── prep_environment ├── puppet-hiera ├── common.yaml ├── env-type │ ├── medium.yaml │ └── small.yaml ├── hiera.yaml └── node-type │ └── onebox_nasa.yaml └── vup /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "modules/elasticsearch"] 2 | path = modules/elasticsearch 3 | url = https://github.com/elastic/puppet-elasticsearch.git 4 | branch = tag/5.4.3 5 | [submodule "modules/stdlib"] 6 | path = modules/stdlib 7 | url = https://github.com/puppetlabs/puppetlabs-stdlib.git 8 | branch = releases/tag/4.19.0 9 | [submodule "modules/apt"] 10 | path = modules/apt 11 | url = https://github.com/puppetlabs/puppetlabs-apt.git 12 | branch = release/tag/4.1.0 13 | [submodule "modules/yum"] 14 | path = modules/yum 15 | url = https://github.com/CERIT-SC/puppet-yum.git 16 | branch = releases/tag/0.9.8 17 | [submodule "modules/datacat"] 18 | path = modules/datacat 19 | url = https://github.com/richardc/puppet-datacat.git 20 | branch = 5a6114cd41823160bdb01dad6eb0e2af85a8fa69 21 | [submodule "modules/logstash"] 22 | path = modules/logstash 23 | url = https://github.com/elastic/puppet-logstash.git 24 | branch = releases/tag/5.2.1 25 | [submodule "modules/kibana4"] 26 | path = modules/kibana4 27 | url = https://github.com/lesaux/puppet-kibana4.git 28 | branch = v1.0.17 29 | [submodule "modules/kibana"] 30 | path = modules/kibana 31 | url = https://github.com/elastic/puppet-kibana.git 32 | branch = releases/tag/5.1.0 33 | [submodule "modules/apache"] 34 | path = modules/apache 35 | url = https://github.com/puppetlabs/puppetlabs-apache.git 36 | branch = 1.10.0 37 | [submodule "modules/redis"] 38 | path = modules/redis 39 | url = https://github.com/arioch/puppet-redis.git 40 | branch = 1.2.3 41 | [submodule "modules/inifile"] 42 | path = modules/inifile 43 | url = https://github.com/puppetlabs/puppetlabs-inifile.git 44 | branch = 1.6.0 45 | [submodule "modules/concat"] 46 | path = modules/concat 47 | url = https://github.com/puppetlabs/puppetlabs-concat.git 48 | branch = 4.0.1 49 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.2.1 [2017-09-02] 4 | 5 | #### Release Notes 6 | This is mostly a documentation update. No significant technical changes. 7 | 8 | * Update examples under `examples/` 9 | 10 | ## v0.2.0 [2017-06-18] 11 | 12 | #### Release Notes 13 | This updates the existing repository to support the following: 14 | 15 | * Puppet 4 for provisioning 16 | * Updated Vagrant provisioning to reflect the Puppet 4 changes 17 | * Update the base box used from Ubuntu 14.04 to Ubuntu 16.10 18 | * Update the ElasticSearch, LogStash, and Kibana versions to 2.4 to 5.4 19 | 20 | ## v0.1.0 [2016-10-16] 21 | 22 | #### Release Notes 23 | Initial release, shipped with the LISA 2016 tutorial USB key. 24 | 25 | This release contains definitions for the following Vagrant boxes: 26 | 27 | * `onebox_nasa`: An all-in-one box to demo the twitter logstash integration, with Kibana. 28 | * `onebox_syslog`: An all-in-one box to demo a syslog-server, with curator and Kibana. 29 | * `small_escluster`: An all-in-one ElasticSearch cluster-box, with minimal on-box Logstash. 30 | * `small_apache`: An emulated Apache server, with logstash, that ships data to the `small_escluster` box. With Kibana. 31 | * `medium_mdcluster`: A combined ElasticSearch cluster-box with, redis, and complex Logstash parsing rules. 32 | * `medium_apache`: Like `small_apache`, but only ships events to the redis server on `medium_mdcluster`. Hosts Kibana. 33 | 34 | At this time, this repo supports the following versions of Elastic products: 35 | 36 | * **Logstash**: 2.4 37 | * **ElasticSearch**: 2.4 38 | * **Kibana**: 4.6 39 | * **Curator**: 4.1.2 40 | 41 | ElasticStack 5.0 is in beta, and not yet supported by all of the puppet components. 42 | As a result, 5 is not yet implemented here. By the time of the LISA conference, 43 | this support may have been added in. 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (C) 2016 hellosign.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Logstash Fundamentals 2 | This repo is intended to be a learning tool for [Logstash by Elastic.co](https://www.elastic.co/guide/en/logstash/current/index.html). 3 | The examples here should get you familiar with the basic structure of Logstash, 4 | and on the way to a working proof-of-concept. 5 | 6 | This uses [Puppet 3.8](https://docs.puppet.com/puppet/3.8/reference/) for provisioning 7 | the vagrant boxes. The intent is to also show how Logstash could be managed through 8 | a configuration management product, and to break down the installation components. 9 | By using modules from Puppet Forge, this allows quick setup. 10 | 11 | ## Requirements 12 | 13 | * Vagrant version 1.7 or newer. 14 | * Virtual Box, or VMware Fusion. 15 | * At least 10GB of free disk-space for boxes. 16 | * Internet connection capable of downloading ~300MB (Java and Logstash/ElasticSearch packages) each vagrant run without you losing interest. 17 | 18 | ## Setup 19 | All of the demos use an Ubuntu Trusty (14.04) box. You will need Linux skills 20 | to move around the filesystem and examine files. 21 | 22 | ### Linux and Mac 23 | 1. Clone this repo. 24 | 1. While in the repo, run `./prep_environment` 25 | * This will checkout the submodules and copy the Hiera details to the parent directory, where Vagrant will use them. 26 | * This was done to allow you to make your own changes without worrying about committing secrets. 27 | 28 | ### Windows 29 | 1. Clone this repo. 30 | 1. While in the repo, run `git submodule init`, or equivalent. 31 | 1. Then run, `git submodule update`, or equivalent. 32 | 1. Copy the `puppet-hiera` directory to the parent directory of this repo. 33 | * This allows Vagrant to use it for its work, and to allow you to make your own changes without worrying about committing secrets. 34 | 35 | ## License 36 | ``` 37 | The MIT License (MIT) 38 | 39 | Copyright (C) 2016 hellosign.com 40 | 41 | Permission is hereby granted, free of charge, to any person obtaining a copy 42 | of this software and associated documentation files (the "Software"), to deal 43 | in the Software without restriction, including without limitation the rights 44 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 45 | copies of the Software, and to permit persons to whom the Software is 46 | furnished to do so, subject to the following conditions: 47 | 48 | The above copyright notice and this permission notice shall be included in all 49 | copies or substantial portions of the Software. 50 | 51 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 52 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 53 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 54 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 55 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 56 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 57 | SOFTWARE. 58 | ``` 59 | 60 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # vi: set syntax=ruby 2 | Vagrant.require_version ">= 1.7.0" 3 | 4 | # Create some happy little VMs to demo on. 5 | 6 | small_environment = { 7 | :escluster => '192.168.99.10', 8 | :apache => '192.168.99.20' 9 | } 10 | 11 | medium_environment = { 12 | :mdcluster => '192.168.99.10', 13 | :apache => '192.168.99.20' 14 | } 15 | 16 | Vagrant.configure("2") do |config| 17 | 18 | # Define the base box we want to play with, and some always-on-everything 19 | # items. 20 | config.vm.box = 'bento/ubuntu-16.10' 21 | config.vm.provision "shell", :path => "vup" 22 | # Define the Hiera directory 23 | # Use this for your own. 24 | config.vm.synced_folder "../puppet-hiera", "/etc/puppet-hiera" 25 | # Use this for the repo's version of hiera 26 | # config.vm.synced_folder "hiera/" "/etc/puppet-hiera" 27 | 28 | puppet_common = proc do |puppet| 29 | puppet.manifests_path = "manifests" 30 | puppet.manifest_file = "site.pp" 31 | puppet.module_path = "modules" 32 | puppet.environment_path = "environments" 33 | puppet.environment = "production" 34 | puppet.hiera_config_path = "hiera.yaml" 35 | puppet.working_directory = "/tmp/vagrant-puppet" 36 | end 37 | 38 | config.vm.define :onebox_nasa do |onebox_nasa| 39 | onebox_nasa.vm.hostname = 'oneboxnasa' 40 | onebox_nasa.vm.network "private_network", ip: "192.168.99.20" 41 | onebox_nasa.vm.provision :puppet do |puppet| 42 | puppet_common.call(puppet) 43 | puppet.facter = { 44 | "node_type" => 'onebox_nasa', 45 | "hostname" => 'onebox_nasa', 46 | "env_type" => 'onebox' 47 | } 48 | end 49 | onebox_nasa.vm.provider :virtualbox do |vb| 50 | vb.memory = '1256' 51 | vb.cpus = 2 52 | vb.customize ["modifyvm", :id, "--nictype1", "virtio"] 53 | end 54 | onebox_nasa.vm.provider :vmware_fusion do |vb| 55 | vb.vmx["memsize"] = '1256' 56 | vb.vmx["numvcpus"] = 2 57 | end 58 | end 59 | 60 | config.vm.define :onebox_syslog do |onebox_syslog| 61 | onebox_syslog.vm.hostname = 'oneboxsyslog' 62 | onebox_syslog.vm.network "private_network", ip: "192.168.99.20" 63 | onebox_syslog.vm.provision :puppet do |puppet| 64 | puppet_common.call(puppet) 65 | puppet.facter = { 66 | "node_type" => 'onebox_syslog', 67 | "hostname" => 'onebox_syslog', 68 | "env_type" => 'onebox' 69 | } 70 | end 71 | onebox_syslog.vm.provider :virtualbox do |vb| 72 | vb.memory = '1256' 73 | vb.cpus = 2 74 | vb.customize ["modifyvm", :id, "--nictype1", "virtio"] 75 | end 76 | onebox_syslog.vm.provider :vmware_fusion do |vb| 77 | vb.vmx["memsize"] = '1256' 78 | vb.vmx["numvcpus"] = 2 79 | end 80 | end 81 | 82 | # This iterator builds the Vagrant definitions for all of the small_environment 83 | # machines, defined at the top. 84 | small_environment.keys.each do |node_name| 85 | config.vm.define "small_#{node_name}" do |node| 86 | node.vm.hostname = "#{node_name}" 87 | node.vm.network :private_network, ip: small_environment[node_name] 88 | node.vm.provision :puppet do |puppet| 89 | puppet_common.call(puppet) 90 | puppet.facter = { 91 | "node_type" => "#{node_name}", 92 | "env_type" => "small" 93 | } 94 | end 95 | node.vm.provider :virtualbox do |vb| 96 | vb.memory = '1256' 97 | vb.cpus = '2' 98 | vb.customize ["modifyvm", :id, "--nictype1", "virtio"] 99 | end 100 | node.vm.provider :vmware_fusion do |vb| 101 | vb.vmx["memsize"] = '1024' 102 | vb.vmx["numcpus"] = 2 103 | end 104 | end 105 | end 106 | 107 | # This iterator builds the Vagrant definitions for all of the medium_environment 108 | # machines, defined at the top. 109 | medium_environment.keys.each do |node_name| 110 | config.vm.define "medium_#{node_name}" do |node| 111 | node.vm.hostname = "#{node_name}" 112 | node.vm.network :private_network, ip: medium_environment[node_name] 113 | node.vm.provision :puppet do |puppet| 114 | puppet_common.call(puppet) 115 | puppet.facter = { 116 | "node_type" => "#{node_name}", 117 | "env_type" => "medium" 118 | } 119 | end 120 | node.vm.provider :virtualbox do |vb| 121 | vb.memory = '1256' 122 | vb.cpus = '2' 123 | vb.customize ["modifyvm", :id, "--nictype1", "virtio"] 124 | end 125 | node.vm.provider :vmware_fusion do |vb| 126 | vb.vmx["memsize"] = '1024' 127 | vb.vmx["numcpus"] = 2 128 | end 129 | end 130 | end 131 | 132 | end 133 | -------------------------------------------------------------------------------- /environments/production/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellosign/logstash-fundamentals/d99189b93d4c57312eaaeb8150435a2eefd41a27/environments/production/.gitkeep -------------------------------------------------------------------------------- /examples/complex_logstash.md: -------------------------------------------------------------------------------- 1 | # Example Complex LogStash File 2 | In this example, we're loading data from N sources, applying several filters, 3 | and outputting it to two different sources. This example includes some organic 4 | cruft! Like a real, live config-file would. 5 | 6 | 7 | ```ruby 8 | # Pull in syslog data 9 | input { 10 | file { 11 | path => [ 12 | "/var/log/syslog", 13 | "/var/log/auth.log" 14 | ] 15 | type => "syslog" 16 | } 17 | } 18 | 19 | # Pull in application-log data. They emit data in JSON form. 20 | input { 21 | file { 22 | path => [ 23 | "/var/log/app/worker_info.log", 24 | "/var/log/app/broker_info.log", 25 | "/var/log/app/supervisor.log" 26 | ] 27 | exclude => "*.gz" 28 | type => "applog" 29 | codec => "json" 30 | } 31 | } 32 | 33 | # Set up a couple of UDP listeners for network-based logging. 34 | # Perhaps we're experimenting with not logging to files! 35 | input { 36 | udp { 37 | port => "8192" 38 | host => "localhost" 39 | type => "applog" 40 | codec => "json" 41 | } 42 | udp { 43 | port => "8193" 44 | host => "localhost" 45 | type => "controllog" 46 | codec => "json" 47 | } 48 | } 49 | 50 | filter { 51 | # The broad filter on Syslog. 52 | if [type] == "syslog" { 53 | grok { 54 | match => { 55 | "message" => "%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}" 56 | } 57 | } 58 | 59 | # Turn the log timestamp into a true event timestamp. 60 | date { 61 | match => [ "timestamp", "MMM d HH:mm:ss", "MMM dd HH:mm:ss" ] 62 | } 63 | } 64 | 65 | # App-logs are already formatted thanks to JSON, so much less grokking. 66 | # But we still have to do a few things. 67 | 68 | # Parse the timestamp in the network inputs. 69 | if [type] == "applog" OR [type] == "controllog" { 70 | date { 71 | match => [ "info.timestamp", "ISO8601" ] 72 | } 73 | } 74 | 75 | # Drop the debug lines in the info feeds. 76 | if [type] == "applog" AND [info][message] =~ "^DEBUG:" { 77 | drop {} 78 | } 79 | 80 | # Parse the metrics data encoded in a field. 81 | if [type] == "applog" AND [info][message] =~ "^metrics: " { 82 | grok { 83 | match => { 84 | "info.message" => "^metrics: %{GREEDYDATA:metrics_raw}$" 85 | } 86 | tag => [ "metrics" ] 87 | } 88 | } 89 | 90 | # Parse that key-value field we just found. And drop the 'raw' field. 91 | if "metrics" in [tags] { 92 | kv { 93 | source => "metrics_raw" 94 | target => "metrics" 95 | remove_field => "metrics_raw" 96 | } 97 | } 98 | 99 | if ([type] == "applog" OR [type] == "controllog") AND [supervisor][event_type] == "auth" { 100 | mutate { 101 | add_tag => [ "audit" ] 102 | } 103 | } else { 104 | mutate { 105 | add_tag => [ "logline" ] 106 | } 107 | } 108 | } 109 | 110 | # Finally, the outputs 111 | output { 112 | 113 | if "logline" in [tags] { 114 | elasticsearch { 115 | hosts => [ 116 | "localhost", 117 | "logelastic.prod.internal" 118 | ] 119 | template_name => "logstash" 120 | index => "logstash-{+YYYY.MM.dd}" 121 | } 122 | } else if "audit" in [tags] { 123 | elasticsearch { 124 | hosts => [ 125 | "localhost", 126 | "logelastic.prod.internal" 127 | ] 128 | template_name => "audit" 129 | index => "audit-{+xxxx.ww}" 130 | } 131 | } 132 | 133 | if "metrics" in [tags] { 134 | influxdb { 135 | host => "influx.prod.internal" 136 | db => "logstash" 137 | measurement => "appstats" 138 | # This next bit only works because it is already a hash. 139 | data_points => "%{metrics}" 140 | } 141 | } 142 | } 143 | 144 | ``` 145 | -------------------------------------------------------------------------------- /examples/curator_cleanup.md: -------------------------------------------------------------------------------- 1 | # Curator Cleanup 2 | In this example, old indices and snapshots are cleaned up from the ElasticSearch cluster. 3 | 4 | ```yaml 5 | actions: 6 | 1: 7 | action: delete_indices 8 | description: "Removes logstash indices older than 28 days." 9 | filters: 10 | - filtertype: pattern 11 | kind: prefix 12 | value: logstash- 13 | - filtertype: age 14 | source: name 15 | timestring: '%Y.%m.%d' 16 | direction: older 17 | unit: days 18 | unit_count: 28 19 | 2: 20 | action: delete_snapshot 21 | description: "Remove logstash backups older than 6 months" 22 | options: 23 | repository: logstash_backup 24 | filters: 25 | - filtertype: pattern 26 | kind: prefix 27 | value: 'logstash-' 28 | - filtertype: age 29 | source: creation_date 30 | direction: older 31 | unit: months 32 | unit_count: 6 33 | ``` 34 | 35 | -------------------------------------------------------------------------------- /examples/curator_snapshot_complex.md: -------------------------------------------------------------------------------- 1 | # Two Curator examples. A snapshot-yesterday example, and a snapshot-hourly. 2 | 3 | This uses a trick of filtering to figure out what 'yesterday' is, and snapshot 4 | that. Makes sure you're snapshotting a quiet index! 5 | ```yaml 6 | actions: 7 | 1: 8 | action: snapshot 9 | description: Snapshot the yesterday index for onebox. 10 | options: 11 | repository: logstash_backup 12 | name: onebox-%Y.%m.%d 13 | filters: 14 | - filtertype: pattern 15 | kind: prefix 16 | value: 'onebox-' 17 | - filtertype: age 18 | source: name 19 | timestring: %Y.%m.%d 20 | direction: older 21 | unit: days 22 | unit_count: 1 23 | - filtertype: count 24 | count: 1 25 | reverse: True 26 | ``` 27 | This works by filtering on the pattern (`onebox-`) to get just the indices we 28 | care about. Then by filtering on the age of the index, to get the list of indices 29 | older than a day. Finally, we pull exactly one index out, which is the newest 30 | index in that list of old indexes. 31 | 32 | This next example, an hourly snapshot is taken of the 'audit' index, and 33 | a second job to delete the old ones. To make it interesting, the 'audit' index 34 | rotates weekly, not daily. 35 | 36 | ```yaml 37 | actions: 38 | 1: 39 | action: snapshot 40 | description: "Hourly snapshot of the audit index" 41 | options: 42 | repository: logstash_backup 43 | name: hraudit-%Y%m%d%H 44 | filters: 45 | - filtertype: pattern 46 | kind: timestring 47 | value: '%G.%V' 48 | - filtertype: pattern 49 | kind: prefix 50 | value: 'audit-' 51 | 2: 52 | action: delete_snapshot 53 | description: "Remove old hourly snapshots of the audit index" 54 | options: 55 | repository: logstash_backup 56 | filters: 57 | - filtertype: pattern 58 | kind: prefix 59 | value: 'hraudit-' 60 | - filtertype: age 61 | source: creation_date 62 | direction: older 63 | unit: hours 64 | unit_count: 26 65 | ``` 66 | Then another one to snapshot it after the next week has started. As these have 67 | a seven year retention period (ick), there is no snapshot-removal step. 68 | 69 | ```yaml 70 | actions: 71 | 1: 72 | action: snapshot 73 | description: "Snapshot the last-week index for audit" 74 | options: 75 | repository: logstash_backup 76 | name: audit-%G.%V 77 | filters: 78 | - filtertype: pattern 79 | kind: prefix 80 | value: 'audit-' 81 | - filtertype: age 82 | source: name 83 | timestring: %G.%V 84 | direction: older 85 | unit: weeks 86 | unit_count: 1 87 | - filtertype: count 88 | count: 1 89 | reverse: True 90 | 91 | ``` 92 | These would be launched through cron. The executions would look something like: 93 | 94 | ```shell 95 | /usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/snap_audit-hourly.yml 96 | /usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/snap_audit-weekly.yml 97 | ``` 98 | 99 | ## Restoring those snapshots 100 | You can restore with curator as well! Here is an example of restoring from 101 | the most recent hourly backup of that audit index. It's smart enough to 102 | know that you want the 'most recent' unless told otherwise. 103 | 104 | ```yaml 105 | actions: 106 | 1: 107 | action: restore 108 | description: "Restore the most recent 'audit' snapshot." 109 | options: 110 | repository: logstash-backup 111 | partial: False 112 | filters: 113 | - filtertype: pattern 114 | kind: prefix 115 | value: 'hraudit-' 116 | - filtertype: state 117 | state: SUCCESS 118 | ``` 119 | You can set this yaml file somewhere for use in your disaster-recovery 120 | runbooks. 121 | -------------------------------------------------------------------------------- /examples/field_cleanup.md: -------------------------------------------------------------------------------- 1 | # Field Cleanup Example 2 | In this example, we show a filter-block that casts various fields to specific 3 | data-types. This is done to ensure type-conversions are handled correctly, and 4 | to ensure that the generated index can be loaded after a future ElasticSearch 5 | 2.x upgrade. 6 | 7 | ```ruby 8 | filter { 9 | 10 | mutate { 11 | convert => { 12 | "priority" => "string", 13 | "value" => "float" 14 | "response_code" => "long" 15 | } 16 | } 17 | 18 | if "metric" in [tags] { 19 | mutate { 20 | convert => { "metric_value" => "float" } 21 | } 22 | } 23 | 24 | if [type] == "cheese_api" { 25 | mutate { 26 | convert => { 27 | "status_code" => "long", 28 | "runtime" => "float" 29 | } 30 | remove_field => [ "subtype" ] 31 | } 32 | } 33 | 34 | } 35 | ``` 36 | 37 | Put this type of block at the end of your filter-chains, the last step before 38 | the pipeline enters the `output {}` stage. This is a cleanup step. For larger 39 | environments, you'll still need this even after getting to ES 2.x, simply to 40 | catch problems earlier. 41 | -------------------------------------------------------------------------------- /examples/filebeat.md: -------------------------------------------------------------------------------- 1 | # FileBeat Examples 2 | [FileBeat is part of the Beats framework, designed to replace the `file` input 3 | on logstash](https://www.elastic.co/guide/en/beats/filebeat/current/index.html). 4 | Here are a couple of examples of FileBeat configurations. 5 | 6 | ```yaml 7 | filebeat: 8 | prospectors: 9 | - 10 | paths: 11 | - "/var/log/syslog" 12 | - "/var/log/auth.log" 13 | input_type: log 14 | document_type: syslog 15 | - 16 | paths: 17 | - "/var/log/apache2/*.log" 18 | input_type: log 19 | document_type: apache 20 | output: 21 | logstash: 22 | hosts: [ "prodstash.prod.internal:5044" ] 23 | 24 | ``` 25 | This configuration monitors two system logfiles, setting their LogStash type to 26 | be `syslog`, and monitors apache logs, setting their LogStash type to `apache`. 27 | Completed events are then sent to a LogStash instance running the `beats` input 28 | on port 5044. 29 | 30 | ```yaml 31 | filebeat: 32 | prospectors: 33 | - 34 | paths: 35 | - "/var/log/syslog" 36 | - "/var/log/auth.log" 37 | input_type: log 38 | document_type: syslog 39 | - 40 | paths: 41 | - "/var/log/app/api_callback*" 42 | input_type: log 43 | document_type: applog 44 | exclude_files: [ '\.gz$' ] 45 | exclude_lines: [ '^DEBUG:.*' ] 46 | fields: 47 | application: "myapp" 48 | app_component: "callbacks" 49 | - 50 | paths: 51 | - "/var/log/app/workers/*.log" 52 | input_type: log 53 | document_type: applog 54 | exclude_files: [ '\.gz$' ] 55 | exclude_lines: [ '^DEBUG:.*' ] 56 | fields: 57 | application: "myapp" 58 | app_component: "workers" 59 | output: 60 | redis: 61 | host: "logredis.prod.internal:6379" 62 | index: "filebeat_prod" 63 | ``` 64 | This more complex example pulls some system log information, like the above 65 | example, but also pulls in some application-specific logs. It then uses FileBeat 66 | filters to configure it to reject `DEBUG` loglines, and not parse logfiles that 67 | have been gzipped. It then adds appropriate fields to the events. This uses the 68 | redis output, dumping events into the `filebeat_prod` key. 69 | -------------------------------------------------------------------------------- /examples/grokking_rules.md: -------------------------------------------------------------------------------- 1 | # Grok Performance Rules 2 | 3 | 1. Parse failures are very expensive. Structure your filters to avoid them. 4 | 1. Do your broad captures early in your filter sections. 5 | 1. Refine broadly captured fields in later grok filters. 6 | 1. Anchor your regexes. [This reduces substring searching.](https://www.elastic.co/blog/do-you-grok-grok) 7 | 1. Only use `%{GREEDYDATA}` at the end of a capture. Reduces back-tracking. 8 | 1. Use an internal-log format standard. Greatly eases grok construction. 9 | 1. Convert your plain language `Created account #{x} in zone #{y} with email #{z}` log-statements to something machine parseable. 10 | 11 | ## Avoid the dictionary anti-pattern: 12 | 13 | Don't do this: 14 | 15 | ```ruby 16 | filter { 17 | if [type] == "applog" 18 | grok { 19 | match => { 20 | "message" => [ 21 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:acct_action} account %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id} with email %{EMAILADDRESS:email_address}", 22 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Account %{BASE16NUM:acct_num} %{WORD:acct_action} from zone %{BASE16NUM:zone_id}", 23 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:acct_action} zone %{BASE16NUM:zone_id} account %{BASE16NUM:acct_num} for %{GREEDYDATA:suspension_reason}", 24 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{WORD:zone_action} new zone: %{BASE16NUM:zone_id}", 25 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Zone %{BASE16NUM:zone_id} %{WORD:zone_action}", 26 | "%{SYSLOGTIMESTAMP} \[%{WORD:component}\] %{GREEDYDATA:app_logline}" 27 | ] 28 | } 29 | } 30 | } 31 | ``` 32 | The temptation with grok matches is to treat it like a dictionary. Since matches 33 | are run in order, start with the most specific filters and get broader, finishing 34 | with a catch-all statement to sweep up the remainers. *This will destroy your performance*. 35 | Remember, each grok-miss is expensive. Constructing it like this ensures that most 36 | log-lines will get missed several times before getting matched. 37 | 38 | This is terse, and shows your intent. However, it's *really bad*. 39 | 40 | The above can be made to perform much better without modifying the log-format. 41 | At the very least, move the prefix in each capture to it's own expression, and 42 | grok on the remaining ones in a later dictionary: 43 | 44 | ```ruby 45 | filter { 46 | if [type] == "applog" 47 | grok { 48 | match => { 49 | "message" => [ 50 | "^%{SYSLOGTIMESTAMP} \[%WORD:component}\] %{GREEDYDATA:app_logline}$" 51 | ] 52 | } 53 | } 54 | if [component] == "account" { 55 | grok { 56 | match => { 57 | "app_logline" => [ 58 | "^%{WORD:acct_action} account %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id} with email %{EMAILADDRESS:email_address}$", 59 | "^%{SYSLOGTIMESTAMP} \[%{WORD:component}\] Account %{BASE16NUM:acct_num} %{WORD:acct_action} from zone %{BASE16NUM:zone_id}$", 60 | "^%{WORD:acct_action} zone %{BASE16NUM:zone_id} account %{BASE16NUM:acct_num} for %{GREEDYDATA:suspension_reason}$" 61 | ] 62 | } 63 | } 64 | } 65 | if [component] == "zone" { 66 | grok { 67 | match => { 68 | "app_logline" => [ 69 | "^%{WORD:zone_action} new zone: %{BASE16NUM:zone_id}$", 70 | "^Zone %{BASE16NUM:zone_id} %{WORD:zone_action}$" 71 | ] 72 | } 73 | } 74 | } 75 | } 76 | } 77 | ``` 78 | 79 | This is much longer, but it will perform *much* better. While it still uses 80 | dictionaries, the grok expressions are now anchored (see the `^` and `$` 81 | characters) which will improve performance. Also, we use conditional statements 82 | to avoid grok-parsing lines against patterns we already know won't match. 83 | 84 | If we move towards grok-ready logging statements, we can make these: 85 | ``` 86 | Created account #{x} in zone #{y} with email #{z} 87 | Account #{x} deleted from zone #{y} 88 | Suspended zone #{y} account #{x} for #{s} 89 | Created new zone #{y} 90 | Zone #{y} deleted 91 | ``` 92 | Into these easier to parse versions: 93 | ``` 94 | [account] Created account #{x} in zone #{y} with email #{z} 95 | [account] Deleted account #{x} in zone #{y} 96 | [account] Suspended account #{x} in zone #{y} for #{s} 97 | [zone] Created #{y} 98 | [zone] Deleted #{y} 99 | ``` 100 | Which means we can do away with dictionaries entirely: 101 | 102 | ```ruby 103 | filter { 104 | if [type] == "applog" 105 | grok { 106 | match => { 107 | "message" => [ 108 | "^%{SYSLOGTIMESTAMP} \[%WORD:component}\] %{GREEDYDATA:app_logline}$" 109 | ] 110 | } 111 | } 112 | if [component] == "account" { 113 | grok { 114 | match => { 115 | "app_logline" => [ 116 | "^{WORD:acct_action} %{BASE16NUM:acct_num} in zone %{BASE16NUM:zone_id}( %{GREEDYDATA:acct_extra})$" 117 | ] 118 | } 119 | } 120 | } else if [component] == "zone" { 121 | grok { 122 | match => { 123 | "app_logline" => [ 124 | "^%{WORD:zone_action} %{BASE16NUM:zone_id}$" 125 | ] 126 | } 127 | } 128 | } 129 | if [acct_action] == "Created" { 130 | grok { 131 | match => { 132 | "acct_extra" => [ 133 | "with email address %{EMAILADDRESS:email_address}$" 134 | ] 135 | } 136 | } 137 | mutate { 138 | remove_field => [ "acct_extra" ] 139 | } 140 | } else if [acct_action] == "Suspended" { 141 | grok { 142 | match => { 143 | "acct_extra" => [ 144 | "for %{GREEDYDATA:suspension_reason}$" 145 | ] 146 | } 147 | } 148 | mutate { 149 | remove_field => [ "acct_extra" ] 150 | } 151 | } 152 | } 153 | } 154 | ``` 155 | This version avoids dictionaries all together, and uses conditionals to ensure 156 | that each grok-expression is only matched against a string that is highly 157 | likely to match. 158 | 159 | ## Skipping to the end to see what the closing paragraph is 160 | 161 | 1. Use regex-anchors. `^` and `$` will give you the biggest bang for your performance optimization time. 162 | 2. Tiering your matches lets you group filter-statements on a component in ways that are easier for an engineering group to maintain. 163 | -------------------------------------------------------------------------------- /examples/grokking_syslog.md: -------------------------------------------------------------------------------- 1 | # Grokking Syslog 2 | In this example, we progressively build some syslog grokking. We are looking 3 | for output from backup scripts. The output of the scripts is well known, which 4 | allows us to build some simple grok expressions and give our events rich data 5 | to work with. 6 | 7 | It all begins with syslog parsing. This is taken from [one of the filters we use in vagrant builds](modules/profiles/templates/logstash/filter/syslog_file) 8 | 9 | ```ruby 10 | filter { 11 | if [type] == "syslog-file" { 12 | # Syslog parsing is handled through Grok. 13 | # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html 14 | grok { 15 | # This will create a new field called SYSLOGMESSAGE, that contains the 16 | # data part of a syslog line. 17 | # 18 | # If given a line like: 19 | # Sep 9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds. 20 | # SYSLOGMESSAGE will equal "bound to 192.0.2.153 -- renewal in 1367 seconds." 21 | # 22 | match => { 23 | "message" => "%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}" 24 | } 25 | } 26 | } 27 | } 28 | ``` 29 | This will give us a variety of fields to work with, such as: 30 | 31 | * `program`: The program that issued the log-line. 32 | * `pid`: The PID of the program. 33 | * `logsource`: The machine that recorded the message. 34 | * `SYSLOGMESSAGE`: The message-part of the syslog line 35 | 36 | If we are given some log-lines such as these: 37 | 38 | ``` 39 | May 19 19:22:06 ip-172-16-2-4 pii-repo-backup[4982]: ALARM Unable to isolate framulator, backup not taken. 40 | May 20 07:01:02 ip-172-16-2-4 pii-repo-backup[5122]: OK Hourly backup success. 41 | ``` 42 | 43 | We can construct patterns to match these. We already know that `SYSLOGMESSAGE` 44 | will be set to `ALARM Unable to isolate framulator, backup not taken.` So let's 45 | construct a pattern to extract the meaningful information. 46 | 47 | A good tool for figuring out how to grok this is [grokdebug.herokuapp.com](http://grokdebug.herokuapp.com/). 48 | It allows you to paste in a log-line, and progressively build your expression. 49 | Remember, you can name your fields by using either `%{PATTERN:field_name}` or 50 | `(?regex)`. Use the former if you're using a built in pattern, the 51 | latter, if you're building your own regex. 52 | 53 | A simple capture for these events could be this: 54 | ```ruby 55 | %{WORD:backup_state} %{GREEDYDATA:backup_message} 56 | ``` 57 | It's not all that efficient, but it gets the job done. However, the internal 58 | standard for backup-output has only a few states defined. A more targeted capture 59 | would look like this: 60 | ```ruby 61 | ^(?OK|WARN|ALARM|CRIT) %{GREEDYDATA:backup_message}$ 62 | ``` 63 | We can now start building our Grok expression. 64 | 65 | For best efficiency, we need to place this Grok expression *after* the above expression. 66 | This allows us to filter on a specific field and reduce the per-cycle computational 67 | overhead. Since we know all of our backup scripts end with "-backup": 68 | ```ruby 69 | if [program] =~ "-backup$" { 70 | grok { 71 | match => { 72 | "SYSLOGMESSAGE" => "^(?OK|WARN|ALARM|CRIT) %{GREEDYDATA:backup_message}$" 73 | "program" => "^%{DATA:backup_name}-backup$" 74 | } 75 | add_tag => [ "backup_output" ] 76 | } 77 | } 78 | ``` 79 | The conditional looks for strings ending with `-backup`, and then applies the 80 | grok expression to it. We use two matches; one on `SYSLOGMESSAGE` to pull out the 81 | `backup_state` and `backup_message` fields, and a second on the `program` field 82 | to pull out the `backup_name` field. Finally, we tag the event with `backup_output` 83 | for use later on and to ease finding the event in reporting. 84 | 85 | * `backup_name`: pii-repo 86 | * `backup_state`: ALARM 87 | * `backup_message`: Unable to isolate framulator, backup not taken. 88 | * `tags`: [ "backup_output" ] 89 | * `type`: syslog 90 | 91 | --- 92 | 93 | With fields defined in this way, we can use them for outputs: 94 | ```ruby 95 | output { 96 | if "backup_output" in [tags] AND [backup_state] != "OK" { 97 | pagerduty { 98 | service_key => "secrets" 99 | event_type => "trigger" 100 | incident_key => "logstash/%{backup_name}/%{backup_state}" 101 | description => "Backup failure on %{backup_name}. RPO is not being met." 102 | details => "%{backup_state}: %{backup_message}" 103 | } 104 | } 105 | } 106 | ``` 107 | Which will issue a PagerDuty incident in the event of a failed backup. The 108 | fields we populated in the grok expression are used to provide information in the 109 | incident. This usage would be much harder if we had to extract the text we wanted 110 | from within large fields. 111 | -------------------------------------------------------------------------------- /examples/large_distributed_source.md: -------------------------------------------------------------------------------- 1 | # Large Distributed Source 2 | This example configuration shows what a logstash configuration on a log-producing 3 | node might look like in a large, distributed environment. 4 | 5 | ```ruby 6 | input { 7 | file { 8 | paths => [ "/var/log/syslog" ] 9 | type => "syslog" 10 | } 11 | file { 12 | paths => [ 13 | "/var/log/auth.log", 14 | "/var/log/audit/audit.log" 15 | ] 16 | type => "audit" 17 | } 18 | } 19 | 20 | input { 21 | file { 22 | paths => [ "/var/log/product-a/*.log" ] 23 | type => "product-a" 24 | } 25 | file { 26 | paths => [ "/var/log/product-q/*.log" ] 27 | type => "product-q" 28 | } 29 | } 30 | 31 | output { 32 | if [type] == "audit" { 33 | redis { 34 | host => [ "audit.security.internal" ] 35 | data_type => "list" 36 | key => "audit_log" 37 | } 38 | } else { 39 | kafka { 40 | broker_list => "keeper1.devops.internal:9092,keeper2.devops.internal:9092" 41 | topic_id => "%{type}" 42 | } 43 | } 44 | } 45 | ``` 46 | 47 | -------------------------------------------------------------------------------- /examples/large_routed_router.md: -------------------------------------------------------------------------------- 1 | # Large Routed - Router 2 | This example configuration shows what the logstash routing-tier could look 3 | like in an environment that has a routing tier. 4 | 5 | ```ruby 6 | input { 7 | kafka { 8 | zk_connect => "keeper1.devops.internal:9092,keeper2.devops.internal:9092" 9 | topic_id => "logstash_ingest" 10 | } 11 | } 12 | 13 | filter { 14 | if [type] == "syslog" and ( [source] == "/var/log/auth.log" or [source] == "/var/log/audit/audit.log" ) { 15 | mutate { 16 | add_tag => [ "audit" ] 17 | } 18 | } 19 | 20 | if [type] == "audit" { 21 | mutate { 22 | add_tag => [ "audit" ] 23 | } 24 | } 25 | } 26 | 27 | output { 28 | if "audit" in [tags] { 29 | redis { 30 | host => [ "audit.security.internal" ] 31 | data_type => "list" 32 | key => "audit_log" 33 | } 34 | } else { 35 | kafka { 36 | broker_list => "keeper1.devops.internal:9092,keeper2.devops.internal:9092" 37 | topic_id => "%{type}" 38 | } 39 | } 40 | } 41 | 42 | ``` 43 | -------------------------------------------------------------------------------- /examples/multiple_filter.md: -------------------------------------------------------------------------------- 1 | # A Filter Example with Multiple Filter Blocks. 2 | In this example, we have two `filter { }` blocks, which manipulate similar fields. 3 | 4 | ```ruby 5 | # Parse an authentication header and get details 6 | filter { 7 | if [message] =~ "Authentication_request" { 8 | grok { 9 | match => { 10 | message => "Authentication_request: %{GREEDYDATA:auth_message}$" 11 | } 12 | } 13 | add_field => { 14 | "sub_type" => "authentication" 15 | } 16 | } 17 | } 18 | 19 | filter { 20 | if [sub_type] == "authentication" { 21 | grok { 22 | match => { 23 | auth_message => "%{WORD:auth_type} / %{WORD:auth_user} / %{WORD:application}" 24 | } 25 | } 26 | } 27 | } 28 | 29 | ``` 30 | How does the order of declaration change the order of logic? 31 | -------------------------------------------------------------------------------- /examples/multiple_filter_lines.md: -------------------------------------------------------------------------------- 1 | # A Filter Example with Single Filter Block. 2 | In this example, we have one `filter { }` block, which manipulate similar fields. 3 | 4 | ```ruby 5 | # Parse an authentication header and get details 6 | filter { 7 | if [message] =~ "Authentication_request: " { 8 | grok { 9 | match => { 10 | message => "Authentication_request: %{GREEDYDATA:auth_message}" 11 | } 12 | } 13 | add_field => { 14 | "sub_type" => "authentication" 15 | } 16 | } 17 | 18 | # Parse messages like "auth_type=saml auth_user=hildegard@example.com application=testapp" 19 | if [sub_type] == "authentication" { 20 | kv { 21 | source => "auth_message" 22 | } 23 | } 24 | } 25 | 26 | ``` 27 | How does the order of declaration change the order of logic? 28 | -------------------------------------------------------------------------------- /examples/output_hipchat.md: -------------------------------------------------------------------------------- 1 | # Example Output for HipChat 2 | Sends a notice to HipChat when a type of queue gets clogged. 3 | 4 | ```ruby 5 | output { 6 | if [queue_size] > 30 AND [queue_name] =~ "system_*" { 7 | hipchat { 8 | room_id => "12932" 9 | token => "secrets" 10 | from => "%{queue_name}" 11 | format => "This queue has %{queue_size} jobs in it. It probably needs a good kicking." 12 | color => "yellow" 13 | trigger_notify => true 14 | } 15 | } 16 | } 17 | 18 | ``` 19 | 20 | This one sends a notice when a backup is finished. 21 | ```ruby 22 | output { 23 | if [backup_status] == "finished" { 24 | hipchat { 25 | room_id => "12932" 26 | token => "secrets" 27 | from => "backup_events" 28 | format => "The backup job %{backup_job_id} on %{backup_node} has finished with %{backup_size/1024}GB in the VTL." 29 | color => "green" 30 | } 31 | } 32 | } 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/threading_example_parallel.md: -------------------------------------------------------------------------------- 1 | # Threading Example: Parallelized 2 | In this example logstash config, care has been taken to ensure that 3 | multiple threads can be used for maximum throughput. Compare it to 4 | [the singlethreaded version](examples/threading_example_singlethread.md). 5 | 6 | ```ruby 7 | input { 8 | file { 9 | path => [ '/var/log/applogs/*.log' ] 10 | tags => [ 'applogs' ] 11 | } 12 | } 13 | 14 | input { 15 | file { 16 | path => [ '/var/log/debuglogs/*.log' ] 17 | tags => [ 'applogs', 'debuglogs' ] 18 | } 19 | } 20 | 21 | input { 22 | file { 23 | path => [ '/var/log/syslog' ] 24 | tags => [ 'syslog' ] 25 | } 26 | } 27 | 28 | input { 29 | file { 30 | path => [ '/var/log/apache2/*.log' ] 31 | tags => [ 'apache' ] 32 | } 33 | } 34 | 35 | output { 36 | elasticsearch { 37 | hosts => [ 'elastic.prod.internal' ] 38 | } 39 | } 40 | 41 | output{ 42 | # use IAM credentials to bypass credential-in-cleartext problem. 43 | s3 { 44 | bucket => "mycorp_logging_bucket" 45 | size_file => 1024 46 | time_file => 10 47 | } 48 | } 49 | 50 | ``` 51 | 52 | ## Questions: 53 | 54 | * How does performance differ between logstash 1.5 and 2.2 and higher? 55 | * Does the lack of a filter-stage affect performance? 56 | -------------------------------------------------------------------------------- /examples/threading_example_singlethread.md: -------------------------------------------------------------------------------- 1 | # Threading Example: Single threaded 2 | In this example logstash config, little care has been taken for the threading 3 | model. Compare it to 4 | [the parallelized version](examples/threading_example_parallel.md). 5 | 6 | ```ruby 7 | input { 8 | file { 9 | path => [ 10 | '/var/log/applogs/*.log', 11 | '/var/log/debuglogs/*.log', 12 | '/var/log/syslog', 13 | '/var/log/apache2/*.log' 14 | ] 15 | } 16 | } 17 | 18 | filter { 19 | if [path] =~ '/var/log/applogs' { 20 | mutate { 21 | add_tag => [ 'applogs' ] 22 | } 23 | } else if [path] =~ '/var/log/debuglogs' { 24 | mutate { 25 | add_tag => [ 'applogs' 'debuglogs' ] 26 | } 27 | } else if [path] =~ '/var/log/apache' { 28 | mutate { 29 | add_tag => [ 'apache' ] 30 | } 31 | } else if [path] =~ '/var/log/syslog' { 32 | mutate { 33 | add_tag => [ 'syslog' ] 34 | } 35 | } 36 | } 37 | 38 | output { 39 | elasticsearch { 40 | hosts => [ 'elastic.prod.internal' ] 41 | } 42 | 43 | # use IAM credentials to bypass credential-in-cleartext problem. 44 | s3 { 45 | bucket => "mycorp_logging_bucket" 46 | size_file => 1024 47 | time_file => 10 48 | } 49 | } 50 | 51 | ``` -------------------------------------------------------------------------------- /examples/winlogbeat.md: -------------------------------------------------------------------------------- 1 | # WinLogBeat Examples 2 | [WinLogBeat is part of the Beats framework, designed to replace the `eventlog` input 3 | on logstash](https://www.elastic.co/guide/en/beats/winlogbeat/current/_overview.html). 4 | Here are a couple of examples of FileBeat configurations. 5 | 6 | ```yaml 7 | winlogbeat: 8 | registry_file: C:/ProgramData/winlogbeat/.winlogbeat.yml 9 | 10 | event_logs: 11 | - name: ForwardedEvents 12 | 13 | 14 | output: 15 | logstash: 16 | hosts: [ "prodstash.prod.internal:5044" ] 17 | 18 | ``` 19 | This configuration monitors the 'ForwardedEvents' event-log, outputting to a 20 | LogStash instance running the `beats` input on port 5044. The ForwardedEvents 21 | event-log is where a system configured to [forward events](https://msdn.microsoft.com/en-us/library/bb870973(v=vs.85).aspx) 22 | deposits its logs. This can be useful if you are not allowed to install 23 | WinLogBeat on your Domain Controllers. 24 | 25 | -------------------------------------------------------------------------------- /hiera.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 5 3 | defaults: 4 | datadir: /etc/puppet-hiera/ 5 | data_hash: yaml_data 6 | hierarchy: 7 | - name: "Yaml lookup hierarchy" 8 | paths: 9 | - "node-type/%{node_type}.yaml" 10 | - "env-type/%{env_type}.yaml" 11 | - common 12 | -------------------------------------------------------------------------------- /manifests/site.pp: -------------------------------------------------------------------------------- 1 | # The base defines for the various node-types. 2 | 3 | node default { 4 | 5 | case $::node_type { 6 | 'onebox_nasa': { include roles::onebox_nasa } 7 | 'onebox_syslog': { include roles::onebox_syslog } 8 | 'apache': { include roles::apache } 9 | 'escluster': { include roles::escluster } 10 | 'mdcluster': { include roles::mdcluster } 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /modules/profiles/files/apache/sysadmin1138-net.basis: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hellosign/logstash-fundamentals/d99189b93d4c57312eaaeb8150435a2eefd41a27/modules/profiles/files/apache/sysadmin1138-net.basis -------------------------------------------------------------------------------- /modules/profiles/files/base/hcl.vim: -------------------------------------------------------------------------------- 1 | 2 | if exists("b:current_syntax") 3 | finish 4 | endif 5 | 6 | syn match hclEqual '=' 7 | syn match hclSimpleString '"[^\"]*"' 8 | syn region hclComment display oneline start='\%\(^\|\s\)#' end='$' 9 | syn region hclInterpolation display oneline start='(' end=')' contains=hclInterpolation,hclSimpleString 10 | syn region hclSmartString display oneline start='"' end='"\s*$' contains=hclInterpolation 11 | 12 | syn keyword hclRootKeywords variable provider resource nextgroup=hclString,hclString skipwhite 13 | syn keyword hclRootKeywords default nextgroup=hclEquals skipwhite 14 | 15 | 16 | syn keyword hclAwsResourcesKeywords availability_zones desired_capacity force_delete health_check_grace_period health_check_type launch_configuration load_balancers max_size min_size name vpc_zone_identifier nextgroup=hclEquals,hclString skipwhite 17 | syn keyword hclAwsResourcesKeywords allocated_storage availability_zone backup_retention_period backup_window db_subnet_group_name engine engine_version final_snapshot_identifier identifier instance_class iops maintenance_window multi_az name password port publicly_accessible security_group_names skip_final_snapshot username vpc_security_group_ids nextgroup=hclEquals,hclString skipwhite 18 | syn keyword hclAwsResourcesKeywords cidr description ingress name security_group_id security_group_name security_group_owner_id source_security_group_id nextgroup=hclEquals,hclString skipwhite 19 | syn keyword hclAwsResourcesKeywords description name subnet_ids nextgroup=hclEquals,hclString skipwhite 20 | syn keyword hclAwsResourcesKeywords instance vpc nextgroup=hclEquals,hclString skipwhite 21 | syn keyword hclAwsResourcesKeywords availability_zones health_check healthy_threshold instance_port instance_protocol instances internal interval lb_port lb_protocol listener name security_groups ssl_certificate_id subnets target timeout unhealthy_threshold nextgroup=hclEquals,hclString skipwhite 22 | syn keyword hclAwsResourcesKeywords ami associate_public_ip_address availability_zone ebs_optimized iam_instance_profile instance_type key_name private_ip security_groups source_dest_check subnet_id tags user_data nextgroup=hclEquals,hclString skipwhite 23 | syn keyword hclAwsResourcesKeywords vpc_id nextgroup=hclEquals,hclString skipwhite 24 | syn keyword hclAwsResourcesKeywords iam_instance_profile image_id instance_type key_name name name_prefix security_groups user_data nextgroup=hclEquals,hclString skipwhite 25 | syn keyword hclAwsResourcesKeywords name records ttl type zone_id nextgroup=hclEquals,hclString skipwhite 26 | syn keyword hclAwsResourcesKeywords name nextgroup=hclEquals,hclString skipwhite 27 | syn keyword hclAwsResourcesKeywords route_table_id subnet_id nextgroup=hclEquals,hclString skipwhite 28 | syn keyword hclAwsResourcesKeywords cidr_block gateway_id instance_id route vpc_id nextgroup=hclEquals,hclString skipwhite 29 | syn keyword hclAwsResourcesKeywords acl bucket nextgroup=hclEquals,hclString skipwhite 30 | syn keyword hclAwsResourcesKeywords cidr_blocks description from_port ingress name owner_id protocol security_groups self tags to_port vpc_id nextgroup=hclEquals,hclString skipwhite 31 | syn keyword hclAwsResourcesKeywords availability_zone- cidr_block map_public_ip_on_launch vpc_id nextgroup=hclEquals,hclString skipwhite 32 | syn keyword hclAwsResourcesKeywords cidr_block enable_dns_hostnames enable_dns_support tags nextgroup=hclEquals,hclString skipwhite 33 | 34 | 35 | hi def link hclComment Comment 36 | hi def link hclEqual Operator 37 | hi def link hclRootKeywords Statement 38 | hi def link hclAwsResourcesKeywords Type 39 | hi def link hclSmartString String 40 | hi def link hclInterpolation String 41 | hi def link hclSimpleString PreProc 42 | 43 | let b:current_syntax = "hcl" 44 | -------------------------------------------------------------------------------- /modules/profiles/files/logstash/templates/onebox.json: -------------------------------------------------------------------------------- 1 | { 2 | "template" : "onebox-*", 3 | "settings" : { 4 | "index" : { 5 | "refresh_interval" : "5s" 6 | } 7 | }, 8 | "mappings" : { 9 | "_default_" : { 10 | "dynamic_templates" : [ { 11 | "message_field" : { 12 | "mapping" : { 13 | "index" : "analyzed", 14 | "omit_norms" : true, 15 | "fielddata" : { 16 | "format" : "disabled" 17 | }, 18 | "type" : "string" 19 | }, 20 | "match_mapping_type" : "string", 21 | "match" : "message" 22 | } 23 | }, { 24 | "string_fields" : { 25 | "mapping" : { 26 | "index" : "analyzed", 27 | "omit_norms" : true, 28 | "fielddata" : { 29 | "format" : "disabled" 30 | }, 31 | "type" : "string", 32 | "fields" : { 33 | "raw" : { 34 | "index" : "not_analyzed", 35 | "ignore_above" : 256, 36 | "type" : "string" 37 | } 38 | } 39 | }, 40 | "match_mapping_type" : "string", 41 | "match" : "*" 42 | } 43 | } ], 44 | "properties" : { 45 | "@timestamp" : { 46 | "type" : "date" 47 | }, 48 | "geoip" : { 49 | "dynamic" : true, 50 | "properties" : { 51 | "location" : { 52 | "type" : "geo_point" 53 | }, 54 | "longitude" : { 55 | "type" : "float" 56 | }, 57 | "latitude" : { 58 | "type" : "float" 59 | }, 60 | "ip" : { 61 | "type" : "ip" 62 | } 63 | } 64 | }, 65 | "user_mentions" : { 66 | "properties" : { 67 | "id" : { 68 | "type": "integer" 69 | } 70 | } 71 | }, 72 | "@version" : { 73 | "index" : "not_analyzed", 74 | "type" : "string" 75 | } 76 | }, 77 | "_all" : { 78 | "enabled" : true, 79 | "omit_norms" : true 80 | } 81 | } 82 | }, 83 | "aliases" : { } 84 | } 85 | 86 | -------------------------------------------------------------------------------- /modules/profiles/manifests/apache_stub.pp: -------------------------------------------------------------------------------- 1 | # Creates a stub of the apache log-files, without actually installing apache. 2 | class profiles::apache_stub { 3 | 4 | file { 5 | '/var/log/apache2': 6 | ensure => directory, 7 | owner => 'root', 8 | group => 'root', 9 | mode => '0755'; 10 | '/var/log/apache2/vhosts': 11 | ensure => directory, 12 | owner => 'root', 13 | group => 'root', 14 | mode => '0755'; 15 | '/var/log/apache2/vhosts/sysadmin1138-net.log': 16 | ensure => present, 17 | owner => 'root', 18 | group => 'root', 19 | mode => '0644', 20 | before => Service['logstash']; 21 | '/var/log/apache2/vhosts/sysadmin1138-net.basis': 22 | ensure => present, 23 | owner => 'root', 24 | group => 'root', 25 | mode => '0644', 26 | source => 'puppet:///modules/profiles/apache/sysadmin1138-net.basis', 27 | require => Package['logstash'], 28 | before => Service['logstash']; 29 | } 30 | 31 | # This sneakiness is to ensure this log-file is parsed by logstash. 32 | # Don't do this in prod, kids. 33 | exec { 'Wait 30s for logstash to launch': 34 | command => 'sleep 1', 35 | path => [ '/usr/bin', '/bin', '/usr/sbin' ], 36 | notify => Exec['dumplogs'], 37 | require => [ Service['logstash'], File['/var/log/apache2/vhosts/sysadmin1138-net.basis'] ], 38 | } 39 | 40 | exec { 'dumplogs': 41 | command => "sleep 30; bzcat /var/log/apache2/vhosts/sysadmin1138-net.basis >> /var/log/apache2/vhosts/sysadmin1138-net.log", 42 | path => [ '/usr/bin', '/bin', '/usr/sbin' ], 43 | refreshonly => true, 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /modules/profiles/manifests/base.pp: -------------------------------------------------------------------------------- 1 | # Provides the base modifications that need to happen on all nodes. 2 | class profiles::base { 3 | 4 | include apt 5 | 6 | case $virtual { 7 | 'xenu': { 8 | $vm_user = 'ubuntu' 9 | } 10 | [ 'virtualbox', 'kvm' ]: { 11 | $vm_user = 'vagrant' 12 | } 13 | default: { 14 | $vm_user = 'vagrant' 15 | } 16 | } 17 | 18 | file { [ "/home/${vm_user}/.bash_aliases", 19 | '/root/.bash_aliases' ]: 20 | owner => $vm_user, 21 | group => $vm_user, 22 | mode => '0640', 23 | content => template('profiles/base/bash_aliases'), 24 | } 25 | 26 | # Provide vim syntax hilighting for classes. 27 | file { '/usr/share/vim/vim74/syntax/hcl.vim': 28 | owner => 'root', 29 | group => 'root', 30 | mode => '0644', 31 | source => 'puppet:///modules/profiles/base/hcl.vim', 32 | } 33 | 34 | service { [ 'puppet-agent', 'pxp-agent', 'mcollective' ]: 35 | enable => false, 36 | ensure => stopped 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /modules/profiles/manifests/curator.pp: -------------------------------------------------------------------------------- 1 | # Ensure curator is installed 2 | class profiles::curator ( 3 | $elastic_host = '127.0.0.1' 4 | ) { 5 | 6 | apt::source { 'curator': 7 | ensure => present, 8 | location => "http://packages.elastic.co/curator/4/debian", 9 | release => 'stable', 10 | repos => 'main', 11 | include => { 12 | 'source' => false 13 | }, 14 | require => Apt::Key['elastic'], 15 | notify => Exec['apt_update'] 16 | } 17 | 18 | ensure_packages ( 'elasticsearch-curator', { require => Exec['apt_update'] } ) 19 | 20 | file { 21 | '/etc/curator': 22 | ensure => directory, 23 | owner => 'root', 24 | group => 'root', 25 | mode => '0750'; 26 | '/etc/curator/curator.yml': 27 | ensure => file, 28 | owner => 'root', 29 | group => 'root', 30 | mode => '0640', 31 | content => template('profiles/curator/config.yml'); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /modules/profiles/manifests/curator_job.pp: -------------------------------------------------------------------------------- 1 | # Sets up a curator job 2 | define profiles::curator_job ( 3 | $jobfile, 4 | $cron_weekday = '*', 5 | $cron_hour = 1, 6 | $cron_minute = 10, 7 | ) { 8 | 9 | $clean_name = shell_escape($name) 10 | 11 | file { "/etc/curator/${clean_name}.yml": 12 | ensure => present, 13 | owner => 'root', 14 | group => 'root', 15 | mode => '0640', 16 | content => $jobfile, 17 | } 18 | 19 | cron { $name: 20 | command => "/usr/local/bin/curator --config /etc/curator/curator.yml /etc/curator/${clean_name}.yml", 21 | user => 'root', 22 | weekday => $cron_weekday, 23 | hour => $cron_hour, 24 | minute => $cron_minute, 25 | } 26 | 27 | 28 | } 29 | -------------------------------------------------------------------------------- /modules/profiles/manifests/elastic_key.pp: -------------------------------------------------------------------------------- 1 | # Installs the elasticsearch GPG key for apt, to avoid dependency cycles. 2 | class profiles::elastic_key { 3 | 4 | apt::key { 'elastic': 5 | id => '46095ACC8548582C1A2699A9D27D666CD88E42B4', 6 | source => 'http://packages.elasticsearch.org/GPG-KEY-elasticsearch' 7 | } 8 | 9 | apt::source { 'elasticsearch': 10 | ensure => present, 11 | location => "https://artifacts.elastic.co/packages/5.x/apt", 12 | release => 'stable', 13 | repos => 'main', 14 | include => { 15 | 'source' => false 16 | }, 17 | require => Apt::Key['elastic'], 18 | notify => Exec['apt_update'] 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /modules/profiles/manifests/escluster.pp: -------------------------------------------------------------------------------- 1 | # Installs a network facing elasticsearch that does everything 2 | class profiles::escluster ( 3 | $instance_name = logstash 4 | ) { 5 | 6 | include profiles::elastic_key 7 | 8 | # We need java, this gets it. 9 | ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } ) 10 | 11 | # Get the 'half of RAM' number, to be used as the heap-size for ElasticSearch. 12 | # Why half? The other half is used for block-cache. 13 | # $heap_size = inline_template("<%= (@memorysize_mb.to_f / 2).to_i %>") 14 | $heap_size = $memory['system']['available_bytes'] / 2 15 | 16 | # This construct is needed to tell elasticsearch "Bind where we can see you". 17 | # This can't be 0.0.0.0 because this address is advertised. 18 | $es_config = { 19 | 'network' => { 20 | 'host' => $networking['interfaces']['enp0s8']['ip'] 21 | } 22 | } 23 | 24 | # Installs the elasticsearch base install, but not an instance. 25 | class { 'elasticsearch': 26 | # version => '5.4.1', 27 | manage_repo => false, 28 | repo_version => '5.x', 29 | api_protocol => 'http', 30 | config => $es_config, 31 | jvm_options => [ 32 | "-Xms${heap_size}", 33 | "-Xmx${heap_size}" 34 | ], 35 | require => Exec['apt_update'] 36 | } 37 | 38 | # Installs a specific instance. This puppet module allows installing multiple 39 | # ES instances on the same host. 'service elasticsearch-logstash stop' will 40 | # stop it. 41 | elasticsearch::instance { $instance_name: } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /modules/profiles/manifests/init.pp: -------------------------------------------------------------------------------- 1 | # Roots the profiles class 2 | class profiles {} 3 | -------------------------------------------------------------------------------- /modules/profiles/manifests/kibana_local.pp: -------------------------------------------------------------------------------- 1 | # Installs Kibana, in local mode with no proxies. 2 | class profiles::kibana_local { 3 | 4 | class { '::kibana': 5 | ensure => '5.4.1', 6 | config => { 7 | 'server.port' => 3010, 8 | 'server.host' => $networking['interfaces']['enp0s8']['ip'], 9 | 'elasticsearch.url' => 'http://localhost:9200', 10 | } 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /modules/profiles/manifests/kibana_network.pp: -------------------------------------------------------------------------------- 1 | # Installs Kibana, in network mode with no proxies. 2 | class profiles::kibana_network { 3 | 4 | $elasticsearch_ip = lookup('escluster_ip', { default_value => 'localhost' } ) 5 | 6 | class { '::kibana': 7 | manage_repo => true, 8 | config => { 9 | 'server.port' => 3010, 10 | 'server.host' => $networking['interfaces']['enp0s8']['ip'], 11 | 'elasticsearch.url' => "http://${elasticsearch_ip}:9200" 12 | } 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logredis.pp: -------------------------------------------------------------------------------- 1 | # Creates a redis server for use with logstash. 2 | class profiles::logredis { 3 | 4 | apt::ppa { 'ppa:chris-lea/redis-server': } 5 | 6 | class { 'redis' : 7 | bind => '0.0.0.0', 8 | service_ensure => 'running', 9 | package_name => 'redis-server', 10 | require => Exec['apt_update'] 11 | } 12 | 13 | # Part of hotrodding Redis is to set vm.overcommit_memory to 1. 14 | file_line { 'redis_overcommit': 15 | path => '/etc/sysctl.conf', 16 | line => 'vm.overcommit_memory = 1', 17 | notify => Exec['vm_overcommit'], 18 | } 19 | 20 | exec { 'vm_overcommit': 21 | command => 'sysctl vm.overcommit_memory=1', 22 | path => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'], 23 | refreshonly => true 24 | } 25 | 26 | # The next is to set transparent_hugepage to never, as this has a severe 27 | # impact on performance when redis is loaded. 28 | ini_subsetting { 'set-grub-hugepages': 29 | ensure => present, 30 | path => '/etc/default/grub', 31 | setting => 'GRUB_CMDLINE_LINUX', 32 | subsetting => 'transparent_hugepage', 33 | value => "=never", 34 | notify => Exec['update-grub-hugepages'], 35 | } 36 | 37 | exec { 'update-grub-hugepages': 38 | command => '/usr/sbin/update-grub', 39 | refreshonly => true, 40 | notify => Exec['onetime-hugepages'], 41 | } 42 | 43 | exec { 'onetime-hugepages': 44 | command => 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', 45 | path => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'], 46 | refreshonly => true, 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash.pp: -------------------------------------------------------------------------------- 1 | # Installs the base logstash. Config-items will be handled in other profiles. 2 | # Example: logstash::configfile { 'get_tweetstream': } 3 | class profiles::logstash ( 4 | $run_as_root = false, 5 | $workers = false, 6 | $ls_heap = '256m' 7 | ) { 8 | 9 | include profiles::elastic_key 10 | 11 | ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } ) 12 | 13 | if $run_as_root { 14 | $ls_user = 'root' 15 | $ls_group = 'root' 16 | } else { 17 | $ls_user = 'logstash' 18 | $ls_group = 'logstash' 19 | } 20 | 21 | if $workers { 22 | $ls_opts = '-w 1' 23 | } else { 24 | $ls_opts = "-w ${workers}" 25 | } 26 | 27 | $config_hash = { 28 | 'LS_USER' => $ls_user, 29 | 'LS_GROUP' => $ls_group, 30 | 'LS_OPTS' => $ls_opts, 31 | 'LS_HEAP_SIZE' => $ls_heap 32 | } 33 | 34 | class { '::logstash': 35 | manage_repo => false, 36 | # version => '1:5.4.1-1', 37 | startup_options => $config_hash, 38 | require => Exec['apt_update'] 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/filter_apache.pp: -------------------------------------------------------------------------------- 1 | # Deploys the apache-log parsing filter for logstash 2 | class profiles::logstash::filter_apache { 3 | 4 | logstash::configfile { '40-filter_apache': 5 | content => template('profiles/logstash/filter/apache'), 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/filter_syslog.pp: -------------------------------------------------------------------------------- 1 | # Deploys a logstash filters for parsing syslog-file entries 2 | class profiles::logstash::filter_syslog { 3 | 4 | logstash::configfile { '40-filter_syslog': 5 | content => template('profiles/logstash/filter/syslog'), 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/input_apache.pp: -------------------------------------------------------------------------------- 1 | # Deploys the apache file-fetcher for Logstash 2 | class profiles::logstash::input_apache { 3 | 4 | logstash::configfile { '30-input_apache': 5 | content => template('profiles/logstash/input/apache'), 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/input_nasa_feeds.pp: -------------------------------------------------------------------------------- 1 | # This uses the twitter input to gather a bunch of NASA twitter feeds. 2 | class profiles::logstash::input_nasa_feeds { 3 | 4 | $logstash_twitter = lookup( 'logstash::twitter', { merge => deep, default_value => {} } ) 5 | $twitter_consumer_key = $logstash_twitter['consumer_key'] 6 | $twitter_consumer_secret = $logstash_twitter['consumer_secret'] 7 | $twitter_oauth_token = $logstash_twitter['oauth_token'] 8 | $twitter_oauth_token_secret = $logstash_twitter['oauth_token_secret'] 9 | $nasa_keywords = $logstash_twitter['keywords'] 10 | 11 | logstash::configfile { '30-input_nasa_feeds': 12 | content => template('profiles/logstash/input/nasa_feeds'), 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/input_redis.pp: -------------------------------------------------------------------------------- 1 | # An input that listens to a redis-list. Used for parsing-nodes 2 | class profiles::logstash::input_redis { 3 | 4 | $redis_ip = lookup('redis_ip', { default_value => '127.0.0.1' } ) 5 | 6 | logstash::configfile { '30-input_redis': 7 | content => template('profiles/logstash/input/redis'), 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/input_syslog_file.pp: -------------------------------------------------------------------------------- 1 | # Deploys the Logstash input for pull syslog file data. 2 | class profiles::logstash::input_syslog_file { 3 | 4 | logstash::configfile { '30-input_syslog_file': 5 | content => template('profiles/logstash/input/syslog_file'), 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/input_syslog_server.pp: -------------------------------------------------------------------------------- 1 | # Runs the 'syslog' input for logstash 2 | class profiles::logstash::input_syslog_server { 3 | 4 | logstash::configfile { '30-input_syslog_server': 5 | content => template('profiles/logstash/input/syslog_server'), 6 | } 7 | 8 | } 9 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/output_escluster.pp: -------------------------------------------------------------------------------- 1 | # Outputs logstash to a given IP address, instead of locally. 2 | class profiles::logstash::output_escluster { 3 | 4 | $escluster_ip = lookup('escluster_ip', { default_value => '127.0.0.1' } ) 5 | 6 | logstash::configfile { '50-output_direct_es': 7 | content => template('profiles/logstash/output/escluster'), 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/output_onebox.pp: -------------------------------------------------------------------------------- 1 | # Outputs to the local ES repo for onebox installs. 2 | class profiles::logstash::output_onebox { 3 | 4 | # Because onebox uses an index named something other than 'logstash', we 5 | # have to import our own template. This copies the template-file to the 6 | # local file-system, and the configfile fragment will use that to 7 | # update the ES Cluster templates. New indices will get those mappings. 8 | 9 | file { '/etc/logstash/logstash.json': 10 | owner => 'logstash', 11 | group => 'logstash', 12 | source => 'puppet:///modules/profiles/logstash/templates/onebox.json', 13 | require => Logstash::Configfile['50-output_onebox_es'], 14 | notify => Service['logstash'], 15 | } 16 | 17 | logstash::configfile { '50-output_onebox_es': 18 | content => template('profiles/logstash/output/onebox_es'), 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /modules/profiles/manifests/logstash/output_redis.pp: -------------------------------------------------------------------------------- 1 | # Outputs to a redis server for queuing 2 | class profiles::logstash::output_redis { 3 | 4 | $redis_ip = lookup('redis_ip', { default_value => '127.0.0.1' } ) 5 | 6 | logstash::configfile { '50-output_redis': 7 | content => template('profiles/logstash/output/redis'), 8 | } 9 | 10 | } 11 | -------------------------------------------------------------------------------- /modules/profiles/manifests/onebox_es.pp: -------------------------------------------------------------------------------- 1 | # Installs the onebox elasticsearch server and instance. 2 | class profiles::onebox_es ( 3 | $instance_name = 'logstash' 4 | ) { 5 | 6 | include profiles::elastic_key 7 | 8 | ensure_packages ( 'openjdk-8-jre-headless', { 9 | require => Exec['apt_update'], 10 | } ) 11 | 12 | # Installs the elasticsearch base install, but not an instance. 13 | class { 'elasticsearch': 14 | version => '5.4.1', 15 | manage_repo => false, 16 | repo_version => '5.x', 17 | api_host => 'localhost', 18 | api_protocol => 'http', 19 | jvm_options => [ 20 | '-Xms512m', 21 | '-Xmx512m' 22 | ], 23 | require => Exec['apt_update'], 24 | } 25 | 26 | # Installs a specific instance. This puppet module allows installing multiple 27 | # ES instances on the same host. 'service elasticsearch-onebox_nasa stop' will 28 | # stop it. 29 | elasticsearch::instance { $instance_name: } 30 | 31 | # Needed later, ensures the given instance is up and running before it 32 | # passes. 33 | # es_instance_conn_validator { $instance_name: 34 | # server => 'localhost', 35 | # port => '9200', 36 | # require => [ Elasticsearch::Instance[$instance_name] ], 37 | # } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /modules/profiles/templates/base/bash_aliases: -------------------------------------------------------------------------------- 1 | PS1='(<%= @node_type %>) \u@\H:\w\$ ' 2 | 3 | -------------------------------------------------------------------------------- /modules/profiles/templates/curator/config.yml: -------------------------------------------------------------------------------- 1 | client: 2 | hosts: 3 | - <%= @elastic_host %> 4 | port: 9200 5 | use_ssl: False 6 | timeout: 30 7 | master_only: False 8 | 9 | logging: 10 | loglevel: INFO 11 | logfile: /var/log/curator.log 12 | logformat: json 13 | -------------------------------------------------------------------------------- /modules/profiles/templates/curator/onebox_delete.yml: -------------------------------------------------------------------------------- 1 | actions: 2 | 1: 3 | action: delete_indices 4 | description: Remove old indices 5 | filters: 6 | - filtertype: pattern 7 | kind: prefix 8 | value: onebox- 9 | - filtertype: age 10 | source: name 11 | direction: older 12 | timestring: '%Y.%m.%d' 13 | unit: days 14 | unit_count: 7 15 | 16 | 17 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/filter/apache: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/filter/apache 2 | filter { 3 | # This says, only do this parsing on events having the 'apache' tag. 4 | if [type] == "apache" { 5 | # Because there isn't a stand-alone parser for Apache logs, we have to build one 6 | # ourselves. This requires grok. 7 | # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html 8 | grok { 9 | match => { 10 | "message" => "^%{COMBINEDAPACHELOG}" 11 | } 12 | } 13 | } 14 | # The 'COMBINEDAPACHELOG' and 'HTTPD24_ERRORLOG' are patterns that ship with logstash. 15 | # You can see their defines here: 16 | # https://github.com/logstash-plugins/logstash-patterns-core/blob/v4.0.2/patterns/grok-patterns#L96-L98 17 | if [type] == "apache-error" { 18 | grok { 19 | match => { 20 | "message" => "^%{HTTPD24_ERRORLOG}" 21 | } 22 | } 23 | } 24 | if [type] == "apache" { 25 | # I renamed my comment-file ages ago. Anything going to mt-comment is a comment-spam bot. 26 | if [request] == "/cgi-bin/mt/mt-comments.cgi" { 27 | mutate { 28 | add_field => { 29 | "is_spam" => true 30 | "blog_target" => "comment-spam" 31 | } 32 | } 33 | } 34 | # Flag traffic going to the old blog. 35 | if [request] =~ "^\/blog" { 36 | mutate { 37 | add_field => { "blog_target" => "blogger" } 38 | } 39 | # Flag traffic going to the new blog. 40 | } else if [request] =~ "^\/mt\/blog" { 41 | mutate { 42 | add_field => { "blog_target" => "movabletype" } 43 | } 44 | # Flag traffic linking directly from images. 45 | } else if [request] =~ "^\/images" { 46 | mutate { 47 | add_field => { "blog_target" => "images" } 48 | } 49 | } 50 | 51 | # Set the timestamp of the event to the one in the Apache logs. 52 | date { 53 | match => ["timestamp", "dd/MMM/YYYY:HH:mm:ss Z"] 54 | } 55 | 56 | # if we got this far, it isn't spam. 57 | if [is_spam] != "true" { 58 | mutate { 59 | add_field => { "is_spam" => "false" } 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/filter/syslog: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/filter/syslog_file 2 | filter { 3 | if [type] =~ "^syslog" { 4 | # Syslog parsing is handled through Grok. 5 | # Documentation: https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-grok.html 6 | grok { 7 | # This will create a new field called SYSLOGMESSAGE, that contains the 8 | # data part of a syslog line. 9 | # 10 | # If given a line like: 11 | # Sep 9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds. 12 | # 'message' will equal "Sep 9 19:09:50 ip-192-0-2-153 dhclient: bound to 192.0.2.153 -- renewal in 1367 seconds." 13 | # 'SYSLOGMESSAGE' will equal "bound to 192.0.2.153 -- renewal in 1367 seconds." 14 | # 'timestamp' will equal "Sep 9 19:09:50" 15 | # 'logsource' will equal "ip-192-0-2-153" 16 | # 'program' will equal "dhclient" 17 | # 18 | match => { 19 | "message" => "^%{SYSLOGBASE}%{SPACE}%{GREEDYDATA:SYSLOGMESSAGE}$" 20 | } 21 | } 22 | } 23 | if [type] =~ "^syslog" and "_grokparsefailure" not in [tags] { 24 | # This replaces 'message' with the contents of 'SYSLOGMESSAGE', but only if 25 | # the grok statement just above actually worked. 26 | # After this filter, the fields will be: 27 | # 'message' will equal "bound to 192.0.2.153 -- renewal in 1367 seconds." 28 | # 'timestamp' will equal "Sep 9 19:09:50" 29 | # 'logsource' will equal "ip-192-0-2-153" 30 | # 'program' will equal "dhclient" 31 | # And SYSLOGMESSAGE will not be present at all. 32 | mutate { 33 | replace => { "message" => "%{SYSLOGMESSAGE}" } 34 | remove_field => [ "SYSLOGMESSAGE" ] 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/apache: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/apache 2 | input { 3 | # This uses the 'file' input to deal with Apache logs. The documentation is 4 | # here: https://www.elastic.co/guide/en/logstash/2.4/plugins-inputs-file.html 5 | # Parsing of these will happen in the 'filter' stage. This is where the magic 6 | # happens, turning strings into searchable fields. 7 | # 8 | # Tags are added to better allow us to filter events later in the pipeline. 9 | # 10 | file { 11 | path => [ 12 | "/var/log/apache2/access.log", 13 | "/var/log/apache2/other_vhosts_access.log", 14 | "/var/log/apache2/vhosts/*.log" 15 | ] 16 | type => "apache" 17 | } 18 | file { 19 | path => [ 20 | "/var/log/apache2/error.log" 21 | ] 22 | type => "apache-error" 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/journald: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/journald 2 | input { 3 | # This is not a bundled plugin. You will have to install it yourself. 4 | # It's listed as a proof-of-concept, so be careful of quality. 5 | journald { 6 | lowercase => true 7 | seekto => "head" 8 | thisboot => true 9 | type => "journald" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/nasa_feeds: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/nasa_feeds 2 | <%- if @twitter_consumer_key == false -%> 3 | # input { 4 | # # To get these strings, go here: https://dev.twitter.com/apps/new 5 | # # Then update the puppet-hiera/node-type/onebox_nasa.yaml 6 | # # See also: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-twitter.html 7 | # twitter { 8 | # consumer_key => 'string' 9 | # consumer_secret => 'string' 10 | # oauth_token => 'string' 11 | # oauth_token_secret => 'string' 12 | # keywords => [ 'string', 'string' ] 13 | # } 14 | # } 15 | <%- else -%> 16 | input { 17 | # Documentation: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-twitter.html 18 | twitter { 19 | consumer_key => "<%= @twitter_consumer_key %>" 20 | consumer_secret => "<%= @twitter_consumer_secret %>" 21 | oauth_token => "<%= @twitter_oauth_token %>" 22 | oauth_token_secret => "<%= @twitter_oauth_token_secret %>" 23 | keywords => [ "<%= @nasa_keywords.join('", "') %>" ] 24 | type => "twitter" 25 | } 26 | } 27 | <%- end -%> 28 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/redis: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/redis 2 | input { 3 | # Documented here: https://www.elastic.co/guide/en/logstash/2.4/plugins-inputss-redis.html 4 | # 5 | # This reads events off of the 'medium_cluster' list on the locally installed 6 | # 'redis' server. They are placed there by shipper nodes processing jobs. In 7 | # larger environments, the 'batch_count' attribute is used to pull more jobs off 8 | # of the redis server and reduce load on it. 9 | redis { 10 | host => [ "<%= @redis_ip %>" ] 11 | data_type => "list" 12 | codec => "json" 13 | key => "medium_cluster" 14 | batch_count => 50 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/syslog_file: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/syslog_file 2 | input { 3 | file { 4 | path => [ 5 | "/var/log/syslog", 6 | "/var/log/auth.log", 7 | "/var/log/cron.log" 8 | ] 9 | type => "syslog-files" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/input/syslog_server: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/input/syslog_server 2 | input { 3 | syslog { 4 | # By default, it will listen on 0.0.0.0:514 5 | # Binding to TCP and UDP:514 requires running as root on Ubuntu. 6 | # Documentation: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-syslog.html 7 | port => 514 8 | type => "syslog-server" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/output/escluster: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/output/escluster 2 | output { 3 | # Documented here: https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html 4 | # 5 | # The ElasticSearch output is rather complex, and can handle many cases. As you would expect for an 6 | # output to another Elastic product. This example simply dumps everything into indexes prefixed with 7 | # "logstash" and rotates daily. 8 | # 9 | elasticsearch { 10 | hosts => [ "<%= @escluster_ip %>" ] 11 | index => "logstash-%{+YYYY.MM.dd}" 12 | manage_template => true 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/output/onebox_es: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/output/onebox_es 2 | output { 3 | # Documented here: https://www.elastic.co/guide/en/logstash/current/plugins-outputs-elasticsearch.html 4 | # 5 | # The ElasticSearch output is rather complex, and can handle many cases. As you would expect for an 6 | # output to another Elastic product. This example simply dumps everything into indexes prefixed with 7 | # "onebox" and rotating daily. 8 | # 9 | elasticsearch { 10 | hosts => [ "127.0.0.1" ] 11 | index => "onebox-%{+YYYY.MM.dd}" 12 | template => "/etc/logstash/logstash.json" 13 | manage_template => true 14 | template_overwrite => true 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /modules/profiles/templates/logstash/output/redis: -------------------------------------------------------------------------------- 1 | # From modules/profiles/templates/logstash/output/redis 2 | output { 3 | # Documented here: https://www.elastic.co/guide/en/logstash/2.4/plugins-outputs-redis.html 4 | # 5 | # Outputs to the redis-server for processing by the parser nodes. 6 | # 7 | # For production usage, consider setting batch, batch_event, and batch_timeout 8 | # to take advantage of bulk updates. It add some latency to your log pipeline, 9 | # but it takes load off of the redis node. 10 | redis { 11 | host => [ "<%= @redis_ip %>" ] 12 | data_type => "list" 13 | codec => "json" 14 | key => "medium_cluster" 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /modules/roles/manifests/apache.pp: -------------------------------------------------------------------------------- 1 | # Sets up the logstash environment for an Apache server, outputting 2 | # to a variety of things, based on environment. 3 | class roles::apache { 4 | 5 | include profiles::base 6 | include profiles::apache_stub 7 | include profiles::kibana_network 8 | 9 | # Running as root to read the syslog file. 10 | # However, if you add the 'logstash' user to the 'adm' group, 11 | # you can read these files normally. Exercise for the reader. 12 | class { 'profiles::logstash': 13 | run_as_root => true, 14 | } 15 | 16 | include profiles::logstash::input_syslog_file 17 | include profiles::logstash::input_apache 18 | 19 | # Change our behavior based on environment. 20 | # Small: Parse locally, send to ElasticSearch. 21 | # Medium: Input locally, send to Redis, parse later. 22 | case $::env_type { 23 | 'small': { 24 | include profiles::logstash::filter_syslog 25 | include profiles::logstash::filter_apache 26 | include profiles::logstash::output_escluster 27 | } 28 | 'medium': { include profiles::logstash::output_redis } 29 | default: { include profiles::logstash::output_escluster } 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /modules/roles/manifests/escluster.pp: -------------------------------------------------------------------------------- 1 | # Turns the box into an all-in-one ElasticSearch box. 2 | class roles::escluster { 3 | 4 | include profiles::base 5 | include profiles::escluster 6 | 7 | # Running as root to read the syslog file. 8 | # However, if you add the 'logstash' user to the 'adm' group, 9 | # you can read these files normally. Exercise for the reader. 10 | class { 'profiles::logstash': 11 | run_as_root => true, 12 | } 13 | 14 | include profiles::logstash::input_syslog_file 15 | include profiles::logstash::filter_syslog 16 | # Since this IS the ES box, output to itself. 17 | include profiles::logstash::output_escluster 18 | 19 | } 20 | -------------------------------------------------------------------------------- /modules/roles/manifests/init.pp: -------------------------------------------------------------------------------- 1 | # This is the base of the 'roles' class. 2 | class roles {} 3 | -------------------------------------------------------------------------------- /modules/roles/manifests/mdcluster.pp: -------------------------------------------------------------------------------- 1 | # Creates a combined elasticsearch/redis cluster. 2 | # Don't do this in prod! 3 | class roles::mdcluster { 4 | 5 | include profiles::base 6 | include profiles::escluster 7 | include profiles::logredis 8 | 9 | # Running as root to read the syslog file. 10 | # However, if you add the 'logstash' user to the 'adm' group, 11 | # you can read these files normally. Exercise for the reader. 12 | class { 'profiles::logstash': 13 | run_as_root => true, 14 | } 15 | 16 | # Read from the redis list, as this is a parser node. 17 | include profiles::logstash::input_redis 18 | 19 | # Fetch the local syslog, since we do that. 20 | include profiles::logstash::input_syslog_file 21 | 22 | # Include appropriate filters for all that we do. 23 | include profiles::logstash::filter_syslog 24 | include profiles::logstash::filter_apache 25 | # Since this IS the ES box, output to itself. 26 | include profiles::logstash::output_escluster 27 | 28 | 29 | } 30 | -------------------------------------------------------------------------------- /modules/roles/manifests/onebox_nasa.pp: -------------------------------------------------------------------------------- 1 | # This role constructs an example LogStash box that pulls information from 2 | # various NASA twitter feeds. It contains: 3 | # - LogStash 4 | # - ElasticSearch 5 | # - Kibana 6 | # 7 | class roles::onebox_nasa { 8 | 9 | include profiles::base 10 | 11 | #### Set up the local elasticsearch 12 | class { 'profiles::onebox_es': 13 | instance_name => 'onebox_nasa' 14 | } 15 | 16 | include profiles::logstash 17 | include profiles::logstash::output_onebox 18 | include profiles::logstash::input_nasa_feeds 19 | 20 | # Next, set up Kibana. 21 | 22 | class { 'profiles::kibana_local': 23 | require => Service['elasticsearch-instance-onebox_nasa'] 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /modules/roles/manifests/onebox_syslog.pp: -------------------------------------------------------------------------------- 1 | # This role constructs an example LogStash box that sets up a syslog-plugin 2 | # based logstash service. It contains: 3 | # - LogStash 4 | # - ElasticSearch 5 | # - Kibana 6 | # 7 | class roles::onebox_syslog { 8 | 9 | include profiles::base 10 | 11 | #### Set up the local elasticsearch 12 | class { 'profiles::onebox_es': 13 | instance_name => 'onebox_syslog' 14 | } 15 | 16 | # We need java, this gets it. 17 | ensure_packages ( 'openjdk-8-jre-headless', { require => Exec['apt_update'] } ) 18 | 19 | ## Next, set up logstash. Note the use of the 'require'. 20 | # Unlike onebox_nasa, we need to run as root in order to bind 21 | # UDP/514, so we're setting run_as_root = true. 22 | class { 'profiles::logstash': 23 | run_as_root => true, 24 | } 25 | 26 | include profiles::logstash::output_onebox 27 | include profiles::logstash::input_syslog_server 28 | include profiles::logstash::input_syslog_file 29 | include profiles::logstash::filter_syslog 30 | 31 | # Next, set up Kibana. 32 | 33 | class { 'profiles::kibana_local': } 34 | 35 | # Next, get Curator set up 36 | 37 | include profiles::curator 38 | 39 | # Remove onebox indexes older than a week. 40 | profiles::curator_job { 'onebox_delete': 41 | jobfile => template('profiles/curator/onebox_delete.yml'), 42 | cron_hour => '3', 43 | cron_minute => '5', 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /prep_environment: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # 3 | # This script preps the repo for use as a teaching tool. 4 | 5 | echo "Checking out submodules" 6 | git submodule init 7 | git submodule update 8 | if [ ! -d ../puppet-hiera && -d ./puppet-hiera ]; then 9 | echo "Setting up puppet-hiera directory." 10 | cp -a puppet-hiera ../ 11 | elif [ ! -f ../puppet-hiera/common.yaml ]; then 12 | echo "It seems ../puppet-hiera exists, and doesn't have the right files." 13 | echo "It should contain the contents of ./puppet-hiera." 14 | fi 15 | -------------------------------------------------------------------------------- /puppet-hiera/common.yaml: -------------------------------------------------------------------------------- 1 | logstash: 2 | twitter: 3 | follows: 4 | - evil 5 | -------------------------------------------------------------------------------- /puppet-hiera/env-type/medium.yaml: -------------------------------------------------------------------------------- 1 | escluster_ip: '192.168.99.10' 2 | redis_ip: '192.168.99.10' 3 | -------------------------------------------------------------------------------- /puppet-hiera/env-type/small.yaml: -------------------------------------------------------------------------------- 1 | escluster_ip: '192.168.99.10' 2 | -------------------------------------------------------------------------------- /puppet-hiera/hiera.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 5 3 | defaults: 4 | datadir: /etc/puppet-hiera/ 5 | data_hash: yaml_data 6 | hierarchy: 7 | - name: "Yaml lookup hierarchy" 8 | paths: 9 | - "node-type/%{node_type}.yaml" 10 | - "env-type/%{env_type}.yaml" 11 | - common 12 | -------------------------------------------------------------------------------- /puppet-hiera/node-type/onebox_nasa.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # To make the twitter demo work, you will need your own twitter API 3 | # credentials: https://apps.twitter.com/app/new 4 | logstash::twitter: 5 | consumer_key: false 6 | consumer_secret: false 7 | oauth_token: false 8 | oauth_token_secret: false 9 | keywords: 10 | - nasa 11 | - LISA16 12 | -------------------------------------------------------------------------------- /vup: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f /etc/lsb-release ]; then 4 | . /etc/lsb-release 5 | UBU_RELEASE=$DISTRIB_CODENAME 6 | else 7 | UBU_RELEASE='trusty' 8 | fi 9 | 10 | function puppet_up { 11 | echo "Setting up Puppetlabs repo." 12 | echo "# Puppetlabs products 13 | deb http://apt.puppetlabs.com ${UBU_RELEASE} PC1 14 | " > /etc/apt/sources.list.d/puppetlabs.list 15 | 16 | echo "Pinning puppet-agent to puppetlabs" 17 | echo "Package: puppet-agent 18 | Pin: origin "apt.puppetlabs.org" 19 | Pin-Priority: 1000 20 | " > /etc/apt/preferences.d/puppetlabs.pref 21 | 22 | curl -s http://apt.puppetlabs.com/pubkey.gpg | apt-key add - 23 | apt-get update 24 | apt-get remove puppet-common -y 25 | apt-get purge puppet-common -y 26 | apt-get install puppet-agent=1.10.3-1${UBU_RELEASE} -y 27 | ln -s /opt/puppetlabs/bin/facter /usr/local/bin/facter 28 | ln -s /opt/puppetlabs/bin/puppet /usr/local/bin/puppet 29 | } 30 | 31 | echo "Setting up noexec /tmp..." 32 | if [ -f /root/puppet_upgraded ]; then 33 | echo "Puppet already upgraded, skipping to next provisioner." 34 | exit 0 35 | else 36 | echo "Disabling Chef client..." 37 | service chef-client stop 38 | update-rc.d chef-client disable 39 | echo "Running puppet..." 40 | puppet_up 41 | touch /root/puppet_upgraded 42 | fi 43 | --------------------------------------------------------------------------------