├── .gemrc
├── Makefile
├── Dockerfile
├── README.md
└── fluentd.conf


/.gemrc:
--------------------------------------------------------------------------------
1 | gem: --no-document


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | release:
2 | 	docker build -t atlassianlabs/fluentd:$(tag) .
3 | 	docker push atlassianlabs/fluentd:$(tag)
4 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ruby:2.2.5-slim
 2 | 
 3 | MAINTAINER Micros <micros@atlassian.com>
 4 | 
 5 | COPY .gemrc /root/
 6 | 
 7 | # Temporary pin google-protobuf to 3.0.0.alpha.4.0
 8 | # There are some concerns on the performance of the latest version
 9 | RUN apt-get update -y && apt-get install -yy \
10 |       build-essential \
11 |       zlib1g-dev \
12 |       libjemalloc1 && \
13 |     gem install fluentd:0.12.23 && \
14 |     gem install google-protobuf -v 3.0.0.alpha.4.0 --pre && \
15 |       fluent-gem install \
16 |       fluent-plugin-ec2-metadata:0.0.9 \
17 |       fluent-plugin-hostname:0.0.2 \
18 |       fluent-plugin-retag:0.0.1 \
19 |       fluent-plugin-kinesis:1.0.1 \
20 |       fluent-plugin-elasticsearch:1.4.0 \
21 |       fluent-plugin-record-modifier:0.4.1 \
22 |       fluent-plugin-multi-format-parser:0.0.2 \
23 |       fluent-plugin-kinesis-aggregation:0.2.2 \
24 |       fluent-plugin-concat:0.4.0 \
25 |       fluent-plugin-parser:0.6.1 \
26 |       fluent-plugin-statsd-event:0.1.1 && \
27 |     apt-get purge -y build-essential && \
28 |     apt-get autoremove -y && \
29 |     apt-get clean && \
30 |     rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
31 | 
32 | RUN mkdir -p /var/log/fluent
33 | 
34 | # port monitor forward debug
35 | EXPOSE 24220   24224   24230
36 | 
37 | ENV LD_PRELOAD "/usr/lib/x86_64-linux-gnu/libjemalloc.so.1"
38 | CMD ["fluentd", "-c", "/etc/fluent/fluentd.conf"]
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | fluentd docker image with ruby 2.2.x and support for both elasticsearch and kinesis
 4 | 
 5 | # Configuration
 6 | 
 7 | Create a `fluentd.conf` file following fluentd's configuration file
 8 | format. Add any input that you wish to and add the kinesis output
 9 | (see example file in this repository).
10 | 
11 | ## Installed plugins
12 | 
13 | The following plugins are installed in the docker image:
14 | 
15 | 1. fluent-plugin-ec2-metadata
16 | 1. fluent-plugin-hostname
17 | 1. fluent-plugin-retag
18 | 1. fluent-plugin-kinesis
19 | 1. fluent-plugin-kinesis-aggregation
20 | 1. fluent-plugin-elasticsearch
21 | 1. fluent-plugin-record-modifier
22 | 1. fluent-plugin-multi-format-parser
23 | 1. fluent-plugin-concat
24 | 
25 | Here is an example configuration to use them. Note that this uses
26 | the old 'match/retag' approach; ideally you should use filter plugins
27 | like record-modifier (see fluentd.conf), but unfortunately there
28 | is no filter equivalent for ec2metadata yet.
29 | 
30 | ```
31 | <match syslog>
32 |   type hostname
33 |   key_name ec2_hostname
34 |   add_prefix hostname
35 | </match>
36 | <match hostname.syslog>
37 |   type ec2_metadata
38 |   output_tag ec2.${tag}
39 |   <record>
40 |     ec2_instance_id   ${instance_id}
41 |     ec2_instance_type ${instance_type}
42 |     ec2_az            ${availability_zone}
43 |     service_id        my-service-uuid
44 |     env               my-environment
45 |   </record>
46 | </match>
47 | <match ec2.hostname.**>
48 |   type retag
49 |   remove_prefix ec2.hostname
50 | </match>
51 | ```
52 | 
53 | # Run
54 | 
55 | Mount the volumes where your logs are if needed and the path to the
56 | fluentd configuration file. Pass aws credentials via environment
57 | variables:
58 | 
59 |     docker run --ulimit nofile=65536:65536 -p 24224 -p 24220 -p 24230 \
60 |             -v /var/log:/fluentd/log -v `pwd`:/etc/fluent \
61 |             atlassianlabs/fluentd:0.4.0
62 | 
63 | # Release
64 | First register a Docker Hub account and ask one of the existing member to add you into the atlassianlabs team. Then you can run the following command to release a new version:
65 | 
66 | ```
67 | make release tag=<the new version number>
68 | ```
69 | 


--------------------------------------------------------------------------------
/fluentd.conf:
--------------------------------------------------------------------------------
  1 | # example fluentd configuration file
  2 | 
  3 | # Don't ship fluentd's own log to Kinesis. Fun blow-ups can occur
  4 | # (like it attempting to emit errors about too long messages that fail
  5 | # with the entire too long message in it...)
  6 | # Note that even though we set type null, the fluentd log still goes
  7 | # to stdout.
  8 | <match fluent.**>
  9 |   type null
 10 | </match>
 11 | 
 12 | <source>
 13 |   type forward
 14 | </source>
 15 | 
 16 | <source>
 17 |   type monitor_agent
 18 | </source>
 19 | 
 20 | <source>
 21 |   type debug_agent
 22 | </source>
 23 | 
 24 | # Tail some arbitrary file.
 25 | <source>
 26 |   type tail
 27 |   path /fluentd/log/file.log
 28 |   pos_file /fluentd/log/file.log.pos
 29 |   tag your_awesome_logs
 30 | 
 31 |   # An example of parsing JSON loglines, with a fallback
 32 |   # if the JSON is unable to be parsed.
 33 |   format multi_format
 34 |   <pattern>
 35 |     format json
 36 |     time_key time
 37 |   </pattern>
 38 |   <pattern>
 39 |     format none
 40 |   </pattern>
 41 | </source>
 42 | 
 43 | <filter **>
 44 |   type record_transformer
 45 |   <record>
 46 |     # LaaS requirements
 47 |     serviceId <YOUR LAAS SERVICE ID>
 48 |     environment <YOUR LAAS ENVIRONMENT>
 49 |   </record>
 50 | </filter>
 51 | 
 52 | <match **>
 53 |   type kinesis
 54 | 
 55 |   region us-west-1
 56 | 
 57 |   # Either use role_arn, or keys (prefer role if possible)
 58 |   # role_arn <YOUR AWS ROLE>
 59 |   aws_key_id <YOUR AWS KEY>
 60 |   aws_sec_key <YOUR AWS SECRET>
 61 | 
 62 |   stream_name logs
 63 |   # Restricts logs to a single shard, but helps make sure
 64 |   # loglines appear in order. If you are shipping a _lot_ of
 65 |   # logs, you may want to use 'random_partition_key true' instead,
 66 |   # and go multi-thread/process.
 67 |   partition_key_expr record['serviceId'] + '-' + record['environment']
 68 | 
 69 |   # Don't break on non-ascii/multi-byte chars (i.e. utf-8).
 70 |   use_yajl true
 71 | 
 72 |   # This is actually the default config, but it seems better to
 73 |   # make it explicit.
 74 |   time_key time
 75 |   include_time_key true
 76 |   tag_key tag
 77 |   include_tag_key true
 78 | 
 79 |   # Note that retries are handled by kinesis plugin so that
 80 |   # it can handle some record failures (i.e. when not all records fail).
 81 |   # If it's still failing after its internal retries fail, it
 82 |   # throws away the chunk _without_ causing an error, so fluentd
 83 |   # proper will not retry. Unfortunately, there's no way to specify
 84 |   # an equivalent of max_retry_wait (keeps exponentially backing off),
 85 |   # so if we set the retries too high it can take a long time to
 86 |   # come back after an outage.
 87 |   # These 12 retries will go for approx 2 hours, based on this delay:
 88 |   #    2^n * 0.5
 89 |   # This means that, once the stream becomes unthrottled/responsive
 90 |   # again, it can take us up to ~ 1 hour to try again.
 91 |   retries_on_putrecords 13 # implies 13 calls - i.e. 12 retries
 92 | 
 93 |   # If we fail talking to Kinesis completely though (i.e. some
 94 |   # API exception), we do want fluentd to retry... though I imagine
 95 |   # there is the danger of resubmitting log records since the
 96 |   # whole chunk will be retried.
 97 |   retry_wait 1s
 98 |   max_retry_wait 300s # ~5 minutes
 99 |   retry_limit 300 # ~24 hours max time to retry
100 | 
101 |   # no memory, because 1gb buffer and fluentd recommend not using
102 |   # memory (file is actually faster, apparently). Also, good if
103 |   # fluentd is forced to restart.
104 |   buffer_type file
105 |   buffer_path /fluentd/log/fluentd.*.buffer
106 |   # If the box is going down, we're probably not going to see our disk again,
107 |   # so flush if we can.
108 |   flush_at_shutdown true
109 |   # We use 1gb at most here (worst case scenario).
110 |   buffer_queue_limit 111
111 |   buffer_chunk_limit 9m # maximum kinesis putRecords is 5mb anyway
112 |   # Ship reasonably frequently so people see logs ASAP.
113 |   flush_interval 5s
114 | </match>
115 | 


--------------------------------------------------------------------------------