├── .gitignore ├── .travis.yml ├── CHANGELOG ├── CONTRIBUTING.md ├── DESIGN ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── Makefile ├── PROTOCOL.md ├── README.md ├── config.go ├── config_test.go ├── emitter.go ├── event.go ├── filecompare.go ├── filecompare_windows.go ├── fileinfo_darwin.go ├── fileinfo_linux.go ├── fileinfo_openbsd.go ├── fileinfo_windows.go ├── filestate_darwin.go ├── filestate_linux.go ├── filestate_openbsd.go ├── filestate_windows.go ├── harvester.go ├── logstash-forwarder.conf.example ├── logstash-forwarder.go ├── pkg ├── centos │ ├── after-install.sh │ ├── before-install.sh │ └── before-remove.sh └── ubuntu │ ├── after-install.sh │ ├── before-install.sh │ └── before-remove.sh ├── prospector.go ├── publisher1.go ├── publisher1_test.go ├── registrar.go ├── registrar_other.go ├── registrar_windows.go ├── spec ├── acceptance │ └── packaging_spec.rb ├── lumberjack_spec.rb └── spec_helper.rb ├── spooler.go ├── syslog.go ├── syslog_windows.go └── version.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | build 3 | bin 4 | *.tar.gz 5 | *.rpm 6 | *.deb 7 | *.bz2 8 | *.gz 9 | src/github.com/alecthomas/gozmq 10 | vendor/jemalloc/*/ 11 | vendor/libuuid/*/ 12 | vendor/lz4/lz4-r74/ 13 | vendor/openssl/*/ 14 | vendor/zeromq/*/ 15 | vendor/libsodium/*/ 16 | vendor/zlib/*/ 17 | nacl.public 18 | nacl.secret 19 | .logstash-forwarder 20 | .lumberjack.new 21 | .rbx 22 | logstash-forwarder 23 | *.DS_Store 24 | *.idea 25 | *.iml 26 | *.crt 27 | *.key 28 | *.json 29 | *.lock 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 1.9.3 4 | install: bundle install 5 | script: "make VENDOR='zeromq jemalloc zlib' && bundle exec rspec spec/lumberjack_spec.rb" 6 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.4.0 2 | = Security: 3 | - Requires server support TLS 1.0 or higher (#402). This resolves a number of 4 | security concerns, including POODLE. The POODLE concern was reported 5 | and validated by Tray Torrance, Marc Chadwick, and David Arena. Additionally, 6 | the PCI SSC announced that SSLv3 was not acceptable anymore. 7 | 8 | = General: 9 | - Tested succsesfully against Logstash 1.4.2 and Logstash 1.5. 10 | - rpm and deb packaging supported with init scripts 11 | - Comments '#' are supported in the config file now! (#154, Michael Pearson) 12 | - The argument to -config can be a directory. If so, all files in that directory 13 | are loaded and merged into a single config. (#154, Michael Pearson) 14 | - Add -quiet flag to omit any notification/informational log messages from lsf. 15 | - Open files are now closed after they are idle for some time period. This 16 | time period is called "dead time" and it is configurable per file section. 17 | (Jason Woods) 18 | - Fix bug where EOF on an unfinished line would cause a partial event to be emitted. 19 | (#164, tzahari) 20 | - SSL Certs with CN or IP SAN are now required for server verification (#205, alex) 21 | - Fix a bug causing the registrar to lose track of files (#198, Jason Woods) 22 | - Supports sha256 and sha512 certificate signatures (#188, Philip Hofstetter) 23 | - OpenBSD supported. 24 | - Stronger test suite (alex, Joubin Houshyar, Michael Pearson, Jordan Sissel, 25 | Pier-Hugues Pellerin, Pere Urbon-Bayes) 26 | - Lots of small bugfixes and general love invested by Joubin Houshyar and Jason 27 | Woods. 28 | 29 | 30 | Versions 0.3.1 and older did not have curated changelogs 31 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to logstash-forwarder 2 | 3 | All contributions are welcome: ideas, patches, documentation, bug reports, 4 | complaints, etc! 5 | 6 | Programming is not a required skill, and there are many ways to help out! 7 | It is more important to us that you are able to contribute. 8 | 9 | That said, some basic guidelines, which you are free to ignore :) 10 | 11 | ## Want to learn? 12 | 13 | Want to lurk about and see what others are doing with logstash and lumberjack? 14 | 15 | * The irc channel (#logstash on irc.freenode.org) is a good place for this 16 | * The [mailing list](http://groups.google.com/group/logstash-users) is also 17 | great for learning from others. 18 | 19 | ## Got Questions? 20 | 21 | Have a problem or a question? 22 | 23 | * You can email the [mailing list](http://groups.google.com/group/logstash-users) 24 | * alternately, you are welcome to join the IRC channel #logstash on 25 | irc.freenode.org and ask for help there! 26 | 27 | ## Have an Idea or Feature Request? 28 | 29 | * File a ticket on [github](https://github.com/elasticsearch/logstash-forwarder), or email the 30 | [mailing list](http://groups.google.com/group/logstash-users), or email 31 | me personally (jls@semicomplete.com) if that is more comfortable. 32 | 33 | ## Something Not Working? Found a Bug? 34 | 35 | If you think you found a bug, it probably is a bug. 36 | 37 | * File it on [github](https://github.com/elasticsearch/logstash-forwarder) 38 | * or the [mailing list](http://groups.google.com/group/logstash-users). 39 | 40 | # Contributing Documentation and Code Changes 41 | 42 | If you have a bugfix or new feature that you would like to contribute to 43 | logstash, and you think it will take more than a few minutes to produce the fix 44 | (ie; write code), it is worth discussing the change with the logstash users and 45 | developers first! You can reach us via 46 | [github](https://github.com/elasticsearch/logstash-forwarder), the [mailing 47 | list](http://groups.google.com/group/logstash-users), or via IRC (#logstash on 48 | freenode irc) 49 | 50 | ## Code and Docs Contribution Steps 51 | 52 | 1. Test your changes! Run the test suite ('go test' and 'rspec') 53 | 2. Please make sure you have signed our [Contributor License 54 | Agreement](http://www.elasticsearch.org/contributor-agreement/). We are not 55 | asking you to assign copyright to us, but to give us the right to distribute 56 | your code without restriction. We ask this of all contributors in order to 57 | assure our users of the origin and continuing existence of the code. You 58 | only need to sign the CLA once. 59 | 3. Send a pull request! Push your changes to your fork of the repository and 60 | [submit a pull 61 | request](https://help.github.com/articles/using-pull-requests). In the pull 62 | request, describe what your changes do and mention any open issues related 63 | to the pull request. 64 | 65 | 66 | -------------------------------------------------------------------------------- /DESIGN: -------------------------------------------------------------------------------- 1 | Same basic functional design as the original lumberjack: 2 | Harvester reads events from files 3 | Event: 4 | Byte offset of start of event (number) 5 | Line number of event (number) 6 | File origin of event (string) 7 | Message (string) 8 | 9 | Work model: 10 | Harvester(s) 11 | -> Enveloper (flush when full or after N idle seconds) 12 | -> Compressor (compresses whole envelopes) 13 | -> Encryptor (encrypts compressed envelopes) 14 | -> Emitter (ships over the wire) 15 | 16 | Sending an envelope of an encrypted, compressed batch of messages allows 17 | me freedom to pick any message-oriented protocol. The previous implementation 18 | of lumberjack requried channel-encryption (with tls) which limited the 19 | kind of transportation tools. 20 | 21 | Previously, compression was done on envelopes, but TLS was used to communicate 22 | securely. 23 | 24 | Messaging model w/ ZMQ: 25 | * REQREP message model 26 | REQREP has high latency (lock step request-response) but since 27 | I'm sending multiple events at once, I believe that latency is 28 | unimportant. 29 | 30 | Messaging model w/ Redis: 31 | * RPUSH + LPOP 32 | * PUBLISH + SUBSCRIBE 33 | 34 | Types of events: 35 | File Event - represents an event read from a file 36 | - file origin of event 37 | - byte offset of event 38 | - line number of event 39 | - event message (the contents) 40 | Compressed Envelope 41 | - number of items 42 | - type of item 43 | - compressed payload 44 | Encrypted Envelope 45 | - cipher 46 | - payload 47 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem "rspec" 4 | gem "insist" 5 | gem "stud" 6 | gem "fpm" 7 | gem "pleaserun" 8 | gem "jls-lumberjack", :github => "elastic/ruby-lumberjack" 9 | 10 | gem "jruby-openssl", :platform => :jruby 11 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GIT 2 | remote: git://github.com/ph/lumberjack.git 3 | revision: cc20667d6140ccd349e8954ba17f9aae03a7cab4 4 | specs: 5 | jls-lumberjack (0.0.22) 6 | 7 | GEM 8 | remote: https://rubygems.org/ 9 | specs: 10 | arr-pm (0.0.10) 11 | cabin (> 0) 12 | backports (3.6.4) 13 | cabin (0.7.1) 14 | childprocess (0.5.6) 15 | ffi (~> 1.0, >= 1.0.11) 16 | clamp (0.6.5) 17 | diff-lcs (1.2.5) 18 | ffi (1.9.8-java) 19 | fpm (1.3.3) 20 | arr-pm (~> 0.0.9) 21 | backports (>= 2.6.2) 22 | cabin (>= 0.6.0) 23 | childprocess 24 | clamp (~> 0.6) 25 | ffi 26 | json (>= 1.7.7) 27 | insist (1.0.0) 28 | jruby-openssl (0.9.7-java) 29 | json (1.8.2-java) 30 | mustache (0.99.8) 31 | pleaserun (0.0.16) 32 | cabin (> 0) 33 | clamp 34 | insist 35 | mustache (= 0.99.8) 36 | stud 37 | rspec (3.2.0) 38 | rspec-core (~> 3.2.0) 39 | rspec-expectations (~> 3.2.0) 40 | rspec-mocks (~> 3.2.0) 41 | rspec-core (3.2.3) 42 | rspec-support (~> 3.2.0) 43 | rspec-expectations (3.2.1) 44 | diff-lcs (>= 1.2.0, < 2.0) 45 | rspec-support (~> 3.2.0) 46 | rspec-mocks (3.2.1) 47 | diff-lcs (>= 1.2.0, < 2.0) 48 | rspec-support (~> 3.2.0) 49 | rspec-support (3.2.2) 50 | stud (0.0.19) 51 | 52 | PLATFORMS 53 | java 54 | 55 | DEPENDENCIES 56 | fpm 57 | insist 58 | jls-lumberjack! 59 | jruby-openssl 60 | pleaserun 61 | rspec 62 | stud 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2012–2015 Jordan Sissel and contributors. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default 2 | default: compile 3 | 4 | OBJECTS=logstash-forwarder 5 | 6 | .PHONY: compile 7 | compile: $(OBJECTS) 8 | 9 | logstash-forwarder: 10 | go build -o $@ 11 | 12 | .PHONY: clean 13 | clean: 14 | -rm $(OBJECTS) 15 | -rm -rf build 16 | 17 | .PHONY: generate-init-scripts 18 | generate-init-script: 19 | pleaserun --install --no-install-actions --install-prefix ./build \ 20 | --chdir /var/lib/logstash-forwarder \ 21 | --sysv-log-path /var/log/logstash-forwarder/ \ 22 | --overwrite -p sysv -v lsb-3.1 $(PREFIX)/bin/logstash-forwarder -config /etc/logstash-forwarder.conf 23 | 24 | build/empty: | build 25 | mkdir $@ 26 | 27 | .PHONY: rpm deb 28 | deb: AFTER_INSTALL=pkg/ubuntu/after-install.sh 29 | rpm: AFTER_INSTALL=pkg/centos/after-install.sh 30 | rpm: BEFORE_INSTALL=pkg/centos/before-install.sh 31 | rpm: BEFORE_REMOVE=pkg/centos/before-remove.sh 32 | deb: AFTER_INSTALL=pkg/ubuntu/after-install.sh 33 | deb: BEFORE_INSTALL=pkg/ubuntu/before-install.sh 34 | deb: BEFORE_REMOVE=pkg/ubuntu/before-remove.sh 35 | rpm deb: PREFIX=/opt/logstash-forwarder 36 | rpm deb: VERSION=$(shell ./logstash-forwarder -version) 37 | rpm deb: compile generate-init-script build/empty 38 | fpm -f -s dir -t $@ -n logstash-forwarder -v $(VERSION) \ 39 | --architecture native \ 40 | --replaces lumberjack \ 41 | --description "a log shipping tool" \ 42 | --url "https://github.com/elasticsearch/logstash-forwarder" \ 43 | --after-install $(AFTER_INSTALL) \ 44 | --before-install $(BEFORE_INSTALL) \ 45 | --before-remove $(BEFORE_REMOVE) \ 46 | --config-files /etc/logstash-forwarder.conf \ 47 | ./logstash-forwarder=$(PREFIX)/bin/ \ 48 | ./logstash-forwarder.conf.example=/etc/logstash-forwarder.conf \ 49 | ./build/etc=/ \ 50 | ./build/empty/=/var/lib/logstash-forwarder/ \ 51 | ./build/empty/=/var/log/logstash-forwarder/ \ 52 | -------------------------------------------------------------------------------- /PROTOCOL.md: -------------------------------------------------------------------------------- 1 | # The Lumberjack Protocol 2 | 3 | # DISCLAIMER 4 | 5 | The lumberjack protocol is actively in development at Elastic. 6 | 7 | However, this document (the protocol documentation) has fallen out of date with respect to the actual implementation of Elastic Beats project and the Logstash Beats input. It may be inaccurate in places, and we have not yet documented the changes between v1 and v2 protocols. This document is therefore deprecated and should not be used as reference. 8 | 9 | # END DISCLAIMER 10 | 11 | The needs that lead to this protocol are: 12 | 13 | * Encryption amd Authentication to protect 14 | * Compression should be used to reduce bandwidth 15 | * Round-trip latency should not damage throughput 16 | * Application-level message acknowledgement 17 | 18 | ## Implementation Considerations 19 | 20 | # Lumberjack Protocol v1 21 | 22 | ## Behavior 23 | 24 | Sequence and ack behavior (including sliding window, etc) is similar to TCP, 25 | but instead of bytes, messages are the base unit. 26 | 27 | A writer with a window size of 50 events can send up to 50 unacked events 28 | before blocking. A reader can acknowledge the 'last event' received to 29 | support bulk acknowledgements. 30 | 31 | Reliable, ordered byte transport is ensured by using TCP (or TLS on top), and 32 | this protocol aims to provide reliable, application-level, message transport. 33 | 34 | ## Encryption and Authentication 35 | 36 | Currently this is to be handled by TLS. 37 | 38 | ## Wire Format 39 | 40 | ### Layering 41 | 42 | This entire protocol is built to be layered on top of TCP or TLS. 43 | 44 | ### Framing 45 | 46 | 0 1 2 3 47 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 48 | +---------------+---------------+-------------------------------+ 49 | | version(1) | frame type | payload ... | 50 | +---------------------------------------------------------------+ 51 | | payload continued... | 52 | +---------------------------------------------------------------+ 53 | 54 | ### 'data' frame type 55 | 56 | * SENT FROM WRITER ONLY 57 | * frame type value: ASCII 'D' aka byte value 0x44 58 | 59 | data is a map of string:string pairs. This is analogous to a Hash in Ruby, a 60 | JSON map, etc, but only strings are supported at this time. 61 | 62 | Payload: 63 | 64 | * 32bit unsigned sequence number 65 | * 32bit 'pair' count (how many key/value sequences follow) 66 | * 32bit unsigned key length followed by that many bytes for the key 67 | * 32bit unsigned value length followed by that many bytes for the value 68 | * repeat key/value 'count' times. 69 | 70 | Note all numeric values are network (big-endian) byte order. 71 | 72 | Sequence number roll-over: If you receive a sequence number less than the 73 | previous value, this signals that the sequence number has rolled over. 74 | 75 | ### 'ack' frame type 76 | 77 | * SENT FROM READER ONLY 78 | * frame type value: ASCII 'A' aka byte value 0x41 79 | 80 | Payload: 81 | 82 | * 32bit unsigned sequence number. 83 | 84 | Bulk acks are supported. If you receive data frames in sequence order 85 | 1,2,3,4,5,6, you can send an ack for '6' and the writer will take this to 86 | mean you are acknowledging all data frames before and including '6'. 87 | 88 | ### 'window size' frame type 89 | 90 | * SENT FROM WRITER ONLY 91 | * frame type value: ASCII 'W' aka byte value 0x57 92 | 93 | Payload: 94 | 95 | * 32bit unsigned window size value in units of whole data frames. 96 | 97 | This frame is used to tell the reader the maximum number of unacknowledged 98 | data frames the writer will send before blocking for acks. 99 | 100 | ### 'compressed' frame type 101 | 102 | * SENT FROM WRITER ONLY 103 | * frame type value: ASCII 'C' aka byte value 0x43 104 | 105 | Payload: 106 | 107 | * 32bit unsigned payload length 108 | * 'length' bytes of zlib compressed 'data' frames. 109 | 110 | This frame type allows you to compress many frames into a single compressed 111 | envelope and is useful for efficiently compressing many small data frames. 112 | 113 | The compressed payload MUST contain full frames only, not partial frames. 114 | The uncompressed payload MUST be a valid frame stream by itself. As an example, 115 | you could have 3 data frames compressed into a single 'compressed' frame type: 116 | 1D{k,v}{k,v}1D{k,v}{k,v}1D{k,v}{k,v} - when uncompressed, you should process 117 | the uncompressed payload as you would reading uncompressed frames from the 118 | network. 119 | 120 | TODO(sissel): It's likely this model is suboptimal, instead choose to 121 | use whole-stream compression z_stream in zlib (Zlib::ZStream in ruby) might be 122 | preferable. 123 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # logstash-forwarder 2 | 3 | # THIS PROJECT IS REPLACED BY [FILEBEAT](https://github.com/elastic/beats/tree/master/filebeat) 4 | 5 | The [filebeat](https://github.com/elastic/beats/tree/master/filebeat) project replaces logstash-forwarder. Please use that instead. 6 | 7 | No further development will occur on this project. Major bug fixes or security fixes may be worked on through 2016, at which point this repository and its project will be abandoned. The replacement is [filebeat](https://github.com/elastic/beats/tree/master/filebeat) which receives new features and fixes frequently. :) 8 | 9 | Happy logging! 10 | 11 | 12 | --- 13 | 14 | ♫ I'm a lumberjack and I'm ok! I sleep when idle, then I ship logs all day! I parse your logs, I eat the JVM agent for lunch! ♫ 15 | 16 | (This project was recently renamed from 'lumberjack' to 'logstash-forwarder' to 17 | make its intended use clear. The 'lumberjack' name now remains as the network protocol, and 'logstash-forwarder' is the name of the program. It's still the same lovely log forwarding program you love.) 18 | 19 | ## Questions and support 20 | 21 | If you have questions and cannot find answers, please join the #logstash irc 22 | channel on freenode irc or ask on the logstash-users@googlegroups.com mailing 23 | list. 24 | 25 | ## What is this? 26 | 27 | A tool to collect logs locally in preparation for processing elsewhere! 28 | 29 | ### Resource Usage Concerns 30 | 31 | Perceived Problems: Some users view logstash releases as "large" or have a generalized fear of Java. 32 | 33 | Actual Problems: Logstash, for right now, runs with a footprint that is not 34 | friendly to underprovisioned systems such as EC2 micro instances; on other 35 | systems it is fine. This project will exist until that is resolved. 36 | 37 | ### Transport Problems 38 | 39 | Few log transport mechanisms provide security, low latency, and reliability. 40 | 41 | The lumberjack protocol used by this project exists to provide a network 42 | protocol for transmission that is secure, low latency, low resource usage, and 43 | reliable. 44 | 45 | ## Configuring 46 | 47 | logstash-forwarder is configured with a json file you specify with the -config flag: 48 | 49 | `logstash-forwarder -config yourstuff.json` 50 | 51 | Here's a sample, with comments in-line to describe the settings. Comments are 52 | invalid in JSON, but logstash-forwarder will strip them out for you if they're 53 | the only thing on the line: 54 | 55 | { 56 | # The network section covers network configuration :) 57 | "network": { 58 | # A list of downstream servers listening for our messages. 59 | # logstash-forwarder will pick one at random and only switch if 60 | # the selected one appears to be dead or unresponsive 61 | "servers": [ "localhost:5043" ], 62 | 63 | # The path to your client ssl certificate (optional) 64 | "ssl certificate": "./logstash-forwarder.crt", 65 | # The path to your client ssl key (optional) 66 | "ssl key": "./logstash-forwarder.key", 67 | 68 | # The path to your trusted ssl CA file. This is used 69 | # to authenticate your downstream server. 70 | "ssl ca": "./logstash-forwarder.crt", 71 | 72 | # Network timeout in seconds. This is most important for 73 | # logstash-forwarder determining whether to stop waiting for an 74 | # acknowledgement from the downstream server. If an timeout is reached, 75 | # logstash-forwarder will assume the connection or server is bad and 76 | # will connect to a server chosen at random from the servers list. 77 | "timeout": 15 78 | }, 79 | 80 | # The list of files configurations 81 | "files": [ 82 | # An array of hashes. Each hash tells what paths to watch and 83 | # what fields to annotate on events from those paths. 84 | { 85 | "paths": [ 86 | # single paths are fine 87 | "/var/log/messages", 88 | # globs are fine too, they will be periodically evaluated 89 | # to see if any new files match the wildcard. 90 | "/var/log/*.log" 91 | ], 92 | 93 | # A dictionary of fields to annotate on each event. 94 | "fields": { "type": "syslog" } 95 | }, { 96 | # A path of "-" means stdin. 97 | "paths": [ "-" ], 98 | "fields": { "type": "stdin" } 99 | }, { 100 | "paths": [ 101 | "/var/log/apache/httpd-*.log" 102 | ], 103 | "fields": { "type": "apache" } 104 | } 105 | ] 106 | } 107 | 108 | Any part of config can use environment variables as `$VAR` or `${VAR}`. They will be evaluated before processing JSON, allowing to pass any structure. 109 | 110 | You can also read an entire directory of JSON configs by specifying a directory instead of a file with the `-config` option. 111 | 112 | # IMPORTANT TLS/SSL CERTIFICATE NOTES 113 | 114 | This program will reject SSL/TLS certificates which have a subject which does not match the `servers` value, for any given connection. For example, if you have `"servers": [ "foobar:12345" ]` then the 'foobar' server MUST use a certificate with subject or subject-alternative that includes `CN=foobar`. Wildcards are supported also for things like `CN=*.example.com`. If you use an IP address, such as `"servers": [ "1.2.3.4:12345" ]`, your ssl certificate MUST use an IP SAN with value "1.2.3.4". If you do not, the TLS handshake will FAIL and the lumberjack connection will close due to trust problems. 115 | 116 | Creating a correct SSL/TLS infrastructure is outside the scope of this document. 117 | 118 | As a very poor example (largely due unpredictability in your system's defaults for openssl), you can try the following command as an example for creating a self-signed certificate/key pair for use with a server named "logstash.example.com": 119 | 120 | ``` 121 | openssl req -x509 -batch -nodes -newkey rsa:2048 -keyout lumberjack.key -out lumberjack.crt -subj /CN=logstash.example.com 122 | ``` 123 | 124 | The above example will create an SSL cert for the host 'logstash.example.com'. You cannot use `/CN=1.2.3.4` to create an SSL certificate for an IP address. In order to do a certificate with an IP address, you must create a certificate with an "IP Subject Alternative" or often called "IP SAN". Creating a certificate with an IP SAN is difficult and annoying, so I highly recommend you use hostnames only. If you have no DNS available to you, it is still often easier to set hostnames in /etc/hosts than it is to create a certificate with an IP SAN. 125 | 126 | logstash-forwarder needs the `.crt` file, and logstash will need both `.key` and `.crt` files. 127 | 128 | Again, creating a correct SSL/TLS certificate authority or generally doing certificate management is outside the scope of this document. 129 | 130 | If you see an error like this: 131 | 132 | ``` 133 | x509: cannot validate certificate for 1.2.3.4 because it doesn't contain any IP SANs 134 | ``` 135 | 136 | It means you are telling logstash-forwarder to connect to a host by IP address, 137 | and therefore you must include an IP SAN in your certificate. Generating an SSL 138 | certificate with an IP SAN is quite annoying, so I *HIGHLY* recommend you use 139 | dns names and set the CN in your cert to your dns name. 140 | 141 | ### Goals 142 | 143 | * Minimize resource usage where possible (CPU, memory, network). 144 | * Secure transmission of logs. 145 | * Configurable event data. 146 | * Easy to deploy with minimal moving parts. 147 | * Simple inputs only: 148 | * Follows files and respects rename/truncation conditions. 149 | * Accepts `STDIN`, useful for things like `varnishlog | logstash-forwarder...`. 150 | 151 | ## Building it 152 | 153 | 1. Install [go](http://golang.org/doc/install) 154 | 155 | 2. Compile logstash-forwarder 156 | 157 | Note: Do not use gccgo for this project. If you don't know what that means, 158 | you're probably OK to ignore this. 159 | 160 | git clone git://github.com/elasticsearch/logstash-forwarder.git 161 | cd logstash-forwarder 162 | go build -o logstash-forwarder 163 | 164 | gccgo note: Using gccgo is not recommended because it produces a binary with a 165 | runtime dependency on libgo. With the normal go compiler, this dependency 166 | doesn't exist and, as a result, makes it easier to deploy. You can check if you 167 | are using gccgo by running `go version` and if it outputs something like `go 168 | version xgcc`, you're probably not using gccgo, and I recommend you don't. 169 | You can also check the resulting binary by doing `ldd ./logstash-forwarder` and 170 | seeing if `libgo` appears in the output; if it appears, then you are using gccgo, 171 | and I recommend you don't. 172 | 173 | ## Packaging it (optional) 174 | 175 | You can make native packages of logstash-forwarder. 176 | 177 | To do this, a recent version of Ruby is required. At least version 2.0.0 or 178 | newer. If you are using your OS distribution's version of Ruby, especially on 179 | Red Hat- or Debian-derived systems (Ubuntu, CentOS, etc), you will need to install 180 | ruby and whatever the "ruby development" package is called for your system. 181 | On Red Hat systems, you probably want `yum install ruby-devel`. On Debian systems, 182 | you probably want `apt-get install ruby-dev`. 183 | 184 | Prerequisite steps to prepare ruby to build your packages are: 185 | 186 | ``` 187 | gem install bundler 188 | bundle install 189 | ``` 190 | 191 | The `bundle install` will install any Ruby library dependencies that are used 192 | in building packages. 193 | 194 | Now build an rpm: 195 | 196 | make rpm 197 | 198 | Or: 199 | 200 | make deb 201 | 202 | ## Installing it (via packages only) 203 | 204 | If you don't use rpm or deb make targets as above, you can skip this section. 205 | 206 | Packages install to `/opt/logstash-forwarder`. 207 | 208 | There are no run-time dependencies. 209 | 210 | ## Running it 211 | 212 | Generally: 213 | 214 | logstash-forwarder -config logstash-forwarder.conf 215 | 216 | See `logstash-forwarder -help` for all the flags. The `-config` option is required and logstash-forwrder will not run without it. 217 | 218 | The config file is documented further up in this file. 219 | 220 | And also note that logstash-forwarder runs quietly when all is a-ok. If you want informational feedback, use the `verbose` flag to enable log emits to stdout. 221 | 222 | Fatal errors are always sent to stderr regardless of the `-quiet` command-line option and process exits with a non-zero status. 223 | 224 | ### Key points 225 | 226 | * You'll need an SSL CA to verify the server (host) with. 227 | * You can specify custom fields for each set of paths in the config file. Any 228 | number of these may be specified. I use them to set fields like `type` and 229 | other custom attributes relevant to each log. 230 | 231 | ### Generating an ssl certificate 232 | 233 | Logstash supports all certificates, including self-signed certificates. To generate a certificate, you can run the following command: 234 | 235 | $ openssl req -x509 -batch -nodes -newkey rsa:2048 -keyout logstash-forwarder.key -out logstash-forwarder.crt -days 365 236 | 237 | This will generate a key at `logstash-forwarder.key` and the 1-year valid certificate at `logstash-forwarder.crt`. Both the server that is running logstash-forwarder as well as the logstash instances receiving logs will require these files on disk to verify the authenticity of messages. 238 | 239 | Recommended file locations: 240 | 241 | - certificates: `/etc/pki/tls/certs/logstash-forwarder/` 242 | - keys: `/etc/pki/tls/private/logstash-forwarder/` 243 | 244 | ## Use with logstash 245 | 246 | In logstash, you'll want to use the [lumberjack](http://logstash.net/docs/latest/inputs/lumberjack) input, something like: 247 | 248 | input { 249 | lumberjack { 250 | # The port to listen on 251 | port => 12345 252 | 253 | # The paths to your ssl cert and key 254 | ssl_certificate => "path/to/ssl.crt" 255 | ssl_key => "path/to/ssl.key" 256 | 257 | # Set this to whatever you want. 258 | type => "somelogs" 259 | } 260 | } 261 | 262 | ## Implementation details 263 | 264 | Below is valid as of 2012/09/19 265 | 266 | ### Minimize resource usage 267 | 268 | * Sets small resource limits (memory, open files) on start up based on the 269 | number of files being watched. 270 | * CPU: sleeps when there is nothing to do. 271 | * Network/CPU: sleeps if there is a network failure. 272 | * Network: uses zlib for compression. 273 | 274 | ### Secure transmission 275 | 276 | * Uses OpenSSL to verify the server certificates (so you know who you 277 | are sending to). 278 | * Uses OpenSSL to transport logs. 279 | 280 | ### Configurable event data 281 | 282 | * The protocol supports sending a `string:string` map. 283 | 284 | ## License 285 | 286 | See LICENSE file. 287 | 288 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "fmt" 7 | "io/ioutil" 8 | "os" 9 | "path" 10 | "regexp" 11 | "time" 12 | ) 13 | 14 | const configFileSizeLimit = 10 << 20 15 | 16 | var defaultConfig = &struct { 17 | netTimeout int64 18 | fileDeadtime string 19 | }{ 20 | netTimeout: 15, 21 | fileDeadtime: "24h", 22 | } 23 | 24 | type Config struct { 25 | Network NetworkConfig `json:network` 26 | Files []FileConfig `json:files` 27 | } 28 | 29 | type NetworkConfig struct { 30 | Servers []string `json:servers` 31 | SSLCertificate string `json:"ssl certificate"` 32 | SSLKey string `json:"ssl key"` 33 | SSLCA string `json:"ssl ca"` 34 | Timeout int64 `json:timeout` 35 | timeout time.Duration 36 | } 37 | 38 | type FileConfig struct { 39 | Paths []string `json:paths` 40 | Fields map[string]string `json:fields` 41 | DeadTime string `json:"dead time"` 42 | deadtime time.Duration 43 | } 44 | 45 | func DiscoverConfigs(file_or_directory string) (files []string, err error) { 46 | fi, err := os.Stat(file_or_directory) 47 | if err != nil { 48 | return nil, err 49 | } 50 | files = make([]string, 0) 51 | if fi.IsDir() { 52 | entries, err := ioutil.ReadDir(file_or_directory) 53 | if err != nil { 54 | return nil, err 55 | } 56 | for _, filename := range entries { 57 | files = append(files, path.Join(file_or_directory, filename.Name())) 58 | } 59 | } else { 60 | files = append(files, file_or_directory) 61 | } 62 | return files, nil 63 | } 64 | 65 | // Append values to the 'to' config from the 'from' config, erroring 66 | // if a value would be overwritten by the merge. 67 | func MergeConfig(to *Config, from Config) (err error) { 68 | 69 | to.Network.Servers = append(to.Network.Servers, from.Network.Servers...) 70 | to.Files = append(to.Files, from.Files...) 71 | 72 | // TODO: Is there a better way to do this in Go? 73 | if from.Network.SSLCertificate != "" { 74 | if to.Network.SSLCertificate != "" { 75 | return fmt.Errorf("SSLCertificate already defined as '%s' in previous config file", to.Network.SSLCertificate) 76 | } 77 | to.Network.SSLCertificate = from.Network.SSLCertificate 78 | } 79 | if from.Network.SSLKey != "" { 80 | if to.Network.SSLKey != "" { 81 | return fmt.Errorf("SSLKey already defined as '%s' in previous config file", to.Network.SSLKey) 82 | } 83 | to.Network.SSLKey = from.Network.SSLKey 84 | } 85 | if from.Network.SSLCA != "" { 86 | if to.Network.SSLCA != "" { 87 | return fmt.Errorf("SSLCA already defined as '%s' in previous config file", to.Network.SSLCA) 88 | } 89 | to.Network.SSLCA = from.Network.SSLCA 90 | } 91 | if from.Network.Timeout != 0 { 92 | if to.Network.Timeout != 0 { 93 | return fmt.Errorf("Timeout already defined as '%d' in previous config file", to.Network.Timeout) 94 | } 95 | to.Network.Timeout = from.Network.Timeout 96 | } 97 | return nil 98 | } 99 | 100 | func LoadConfig(path string) (config Config, err error) { 101 | config_file, err := os.Open(path) 102 | if err != nil { 103 | emit("Failed to open config file '%s': %s\n", path, err) 104 | return 105 | } 106 | 107 | fi, _ := config_file.Stat() 108 | if size := fi.Size(); size > (configFileSizeLimit) { 109 | emit("config file (%q) size exceeds reasonable limit (%d) - aborting", path, size) 110 | return // REVU: shouldn't this return an error, then? 111 | } 112 | 113 | if fi.Size() == 0 { 114 | emit("config file (%q) is empty, skipping", path) 115 | return 116 | } 117 | 118 | buffer := make([]byte, fi.Size()) 119 | _, err = config_file.Read(buffer) 120 | emit("%s\n", buffer) 121 | 122 | buffer, err = StripComments(buffer) 123 | if err != nil { 124 | emit("Failed to strip comments from json: %s\n", err) 125 | return 126 | } 127 | 128 | buffer = []byte(os.ExpandEnv(string(buffer))) 129 | 130 | err = json.Unmarshal(buffer, &config) 131 | if err != nil { 132 | emit("Failed unmarshalling json: %s\n", err) 133 | return 134 | } 135 | 136 | for k, _ := range config.Files { 137 | if config.Files[k].DeadTime == "" { 138 | config.Files[k].DeadTime = defaultConfig.fileDeadtime 139 | } 140 | config.Files[k].deadtime, err = time.ParseDuration(config.Files[k].DeadTime) 141 | if err != nil { 142 | emit("Failed to parse dead time duration '%s'. Error was: %s\n", config.Files[k].DeadTime, err) 143 | return 144 | } 145 | } 146 | 147 | return 148 | } 149 | 150 | func FinalizeConfig(config *Config) { 151 | if config.Network.Timeout == 0 { 152 | config.Network.Timeout = defaultConfig.netTimeout 153 | } 154 | 155 | config.Network.timeout = time.Duration(config.Network.Timeout) * time.Second 156 | } 157 | 158 | func StripComments(data []byte) ([]byte, error) { 159 | data = bytes.Replace(data, []byte("\r"), []byte(""), 0) // Windows 160 | lines := bytes.Split(data, []byte("\n")) 161 | filtered := make([][]byte, 0) 162 | 163 | for _, line := range lines { 164 | match, err := regexp.Match(`^\s*#`, line) 165 | if err != nil { 166 | return nil, err 167 | } 168 | if !match { 169 | filtered = append(filtered, line) 170 | } 171 | } 172 | 173 | return bytes.Join(filtered, []byte("\n")), nil 174 | } 175 | -------------------------------------------------------------------------------- /config_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path" 7 | "reflect" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | // ------------------------------------------------------------------- 13 | // test support funcs 14 | // ------------------------------------------------------------------- 15 | func chkerr(t *testing.T, err error) { 16 | if err != nil { 17 | t.Errorf("Error encountered: %s", err) 18 | } 19 | } 20 | 21 | func makeTempDir(t *testing.T) string { 22 | tmpdir, err := ioutil.TempDir("", "logstash-config-test") 23 | chkerr(t, err) 24 | return tmpdir 25 | } 26 | 27 | func rmTempDir(tmpdir string) { 28 | _ = os.RemoveAll(tmpdir) 29 | } 30 | 31 | // ------------------------------------------------------------------- 32 | // Tests 33 | // ------------------------------------------------------------------- 34 | func TestDiscoverConfigs(t *testing.T) { 35 | tmpdir := makeTempDir(t) 36 | defer rmTempDir(tmpdir) 37 | tmpfile1 := path.Join(tmpdir, "myfile1") 38 | tmpfile2 := path.Join(tmpdir, "myfile2") 39 | err := ioutil.WriteFile(tmpfile1, make([]byte, 0), 0644) 40 | chkerr(t, err) 41 | err = ioutil.WriteFile(tmpfile2, make([]byte, 0), 0644) 42 | 43 | configs, err := DiscoverConfigs(tmpdir) 44 | chkerr(t, err) 45 | 46 | expected := []string{tmpfile1, tmpfile2} 47 | if !reflect.DeepEqual(configs, expected) { 48 | t.Fatalf("Expected to find %v, got %v instead", configs, expected) 49 | } 50 | 51 | configs, err = DiscoverConfigs(tmpfile1) 52 | 53 | expected = []string{tmpfile1} 54 | if !reflect.DeepEqual(configs, expected) { 55 | t.Fatalf("Expected to find %v, got %v instead", configs, expected) 56 | } 57 | } 58 | 59 | func TestLoadEmptyConfig(t *testing.T) { 60 | tmpdir := makeTempDir(t) 61 | defer rmTempDir(tmpdir) 62 | 63 | configFile := path.Join(tmpdir, "myconfig") 64 | err := ioutil.WriteFile(configFile, []byte(""), 0644) 65 | chkerr(t, err) 66 | 67 | config, err := LoadConfig(configFile) 68 | if err != nil { 69 | t.Fatalf("Error loading config file: %s", err) 70 | } 71 | 72 | if !reflect.DeepEqual(config, Config{}) { 73 | t.Fatalf("Expected emtpy Config, got \n\n%v\n\n from LoadConfig", config) 74 | } 75 | } 76 | 77 | func TestLoadConfigAndStripComments(t *testing.T) { 78 | configJson := ` 79 | # A comment at the beginning of the line 80 | { 81 | # A comment after some spaces 82 | "network": { 83 | "servers": [ "localhost:5043" ], 84 | "ssl certificate": "./logstash-forwarder.crt", 85 | "ssl key": "./logstash-forwarder.key", 86 | "ssl ca": "./logstash-forwarder.ca", 87 | "timeout": 20 88 | }, 89 | # A comment in the middle of the JSON 90 | "files": [ 91 | { 92 | "paths": [ 93 | "/var/log/*.log", 94 | "/var/log/messages" 95 | ], 96 | "fields": { "type": "syslog" }, 97 | "dead time": "6h" 98 | }, { 99 | "paths": [ "/var/log/apache2/access.log" ], 100 | "fields": { "type": "apache" } 101 | } 102 | ] 103 | }` 104 | 105 | tmpdir := makeTempDir(t) 106 | defer rmTempDir(tmpdir) 107 | 108 | configFile := path.Join(tmpdir, "myconfig") 109 | err := ioutil.WriteFile(configFile, []byte(configJson), 0644) 110 | chkerr(t, err) 111 | 112 | config, err := LoadConfig(configFile) 113 | if err != nil { 114 | t.Fatalf("Error loading config file: %s", err) 115 | } 116 | 117 | defaultDeadTime, _ := time.ParseDuration(defaultConfig.fileDeadtime) 118 | expected := Config{ 119 | Network: NetworkConfig{ 120 | Servers: []string{"localhost:5043"}, 121 | SSLCertificate: "./logstash-forwarder.crt", 122 | SSLKey: "./logstash-forwarder.key", 123 | SSLCA: "./logstash-forwarder.ca", 124 | Timeout: 20, 125 | }, 126 | Files: []FileConfig{{ 127 | Paths: []string{"/var/log/*.log", "/var/log/messages"}, 128 | Fields: map[string]string{"type": "syslog"}, 129 | DeadTime: "6h", 130 | deadtime: 21600000000000, 131 | }, { 132 | Paths: []string{"/var/log/apache2/access.log"}, 133 | Fields: map[string]string{"type": "apache"}, 134 | DeadTime: defaultConfig.fileDeadtime, 135 | deadtime: defaultDeadTime, 136 | }}, 137 | } 138 | 139 | if !reflect.DeepEqual(config, expected) { 140 | t.Fatalf("Expected\n%v\n\ngot\n\n%v\n\nfrom LoadConfig", expected, config) 141 | } 142 | 143 | } 144 | 145 | func TestFinalizeConfig(t *testing.T) { 146 | config := Config{} 147 | 148 | FinalizeConfig(&config) 149 | if config.Network.Timeout != defaultConfig.netTimeout { 150 | t.Fatalf("Expected FinalizeConfig to default timeout to %d, got %d instead", defaultConfig.netTimeout, config.Network.Timeout) 151 | } 152 | 153 | config.Network.Timeout = 40 154 | expected := time.Duration(40) * time.Second 155 | FinalizeConfig(&config) 156 | if config.Network.timeout != expected { 157 | t.Fatalf("Expected FinalizeConfig to set the timeout duration to %v, got %v instead", config.Network.timeout, expected) 158 | } 159 | } 160 | 161 | func TestMergeConfig(t *testing.T) { 162 | configA := Config{ 163 | Network: NetworkConfig{ 164 | Servers: []string{"localhost:5043"}, 165 | SSLCertificate: "./logstash-forwarder.crt", 166 | SSLKey: "./logstash-forwarder.key", 167 | }, 168 | Files: []FileConfig{{ 169 | Paths: []string{"/var/log/messagesA"}, 170 | }}, 171 | } 172 | 173 | configB := Config{ 174 | Network: NetworkConfig{ 175 | Servers: []string{"otherhost:5043"}, 176 | SSLCA: "./logstash-forwarder.crt", 177 | Timeout: 20, 178 | }, 179 | Files: []FileConfig{{ 180 | Paths: []string{"/var/log/messagesB"}, 181 | }}, 182 | } 183 | 184 | expected := Config{ 185 | Network: NetworkConfig{ 186 | Servers: []string{"localhost:5043", "otherhost:5043"}, 187 | SSLCertificate: "./logstash-forwarder.crt", 188 | SSLKey: "./logstash-forwarder.key", 189 | SSLCA: "./logstash-forwarder.crt", 190 | Timeout: 20, 191 | }, 192 | Files: []FileConfig{{ 193 | Paths: []string{"/var/log/messagesA"}, 194 | }, { 195 | Paths: []string{"/var/log/messagesB"}, 196 | }}, 197 | } 198 | 199 | err := MergeConfig(&configA, configB) 200 | chkerr(t, err) 201 | 202 | if !reflect.DeepEqual(configA, expected) { 203 | t.Fatalf("Expected merged config to be %v, got %v instead", expected, configA) 204 | } 205 | 206 | err = MergeConfig(&configA, configB) 207 | if err == nil { 208 | t.Fatalf("Expected a double merge attempt to give us an error, it didn't") 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /emitter.go: -------------------------------------------------------------------------------- 1 | package main 2 | -------------------------------------------------------------------------------- /event.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "os" 4 | 5 | type FileEvent struct { 6 | Source *string `json:"source,omitempty"` 7 | Offset int64 `json:"offset,omitempty"` 8 | Line uint64 `json:"line,omitempty"` 9 | Text *string `json:"text,omitempty"` 10 | Fields *map[string]string 11 | 12 | fileinfo *os.FileInfo 13 | } 14 | -------------------------------------------------------------------------------- /filecompare.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package main 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | ) 9 | 10 | func is_file_same(path string, info os.FileInfo, state *FileState) bool { 11 | fstat := info.Sys().(*syscall.Stat_t) 12 | return (fstat.Ino == state.Inode && fstat.Dev == state.Device) 13 | } 14 | 15 | func is_fileinfo_same(a os.FileInfo, b os.FileInfo) bool { 16 | af := a.Sys().(*syscall.Stat_t) 17 | bf := b.Sys().(*syscall.Stat_t) 18 | return (af.Dev == bf.Dev && af.Ino == bf.Ino) 19 | } 20 | 21 | func is_file_renamed(file string, info os.FileInfo, fileinfo map[string]ProspectorInfo, missingfiles map[string]os.FileInfo) string { 22 | // NOTE(driskell): What about using golang's func os.SameFile(fi1, fi2 FileInfo) bool instead? 23 | stat := info.Sys().(*syscall.Stat_t) 24 | 25 | for kf, ki := range fileinfo { 26 | if kf == file { 27 | continue 28 | } 29 | ks := ki.fileinfo.Sys().(*syscall.Stat_t) 30 | if stat.Dev == ks.Dev && stat.Ino == ks.Ino { 31 | return kf 32 | } 33 | } 34 | 35 | // Now check the missingfiles 36 | for kf, ki := range missingfiles { 37 | ks := ki.Sys().(*syscall.Stat_t) 38 | if stat.Dev == ks.Dev && stat.Ino == ks.Ino { 39 | return kf 40 | } 41 | } 42 | return "" 43 | } 44 | 45 | func is_file_renamed_resumelist(file string, info os.FileInfo, initial map[string]*FileState) string { 46 | // NOTE(driskell): What about using golang's func os.SameFile(fi1, fi2 FileInfo) bool instead? 47 | stat := info.Sys().(*syscall.Stat_t) 48 | 49 | for kf, ki := range initial { 50 | if kf == file { 51 | continue 52 | } 53 | if stat.Dev == ki.Device && stat.Ino == ki.Inode { 54 | return kf 55 | } 56 | } 57 | 58 | return "" 59 | } 60 | -------------------------------------------------------------------------------- /filecompare_windows.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | func is_file_same(path string, info os.FileInfo, state *FileState) bool { 8 | // Do we have any other way to validate a file is the same file 9 | // under windows? 10 | return path == *state.Source 11 | } 12 | 13 | func is_fileinfo_same(a os.FileInfo, b os.FileInfo) bool { 14 | // Anything meaningful to compare on file infos? 15 | return true 16 | } 17 | 18 | func is_file_renamed(file string, info os.FileInfo, fileinfo map[string]ProspectorInfo, missingfiles map[string]os.FileInfo) string { 19 | // Can we detect if a file was renamed on Windows? 20 | // NOTE(driskell): What about using golang's func os.SameFile(fi1, fi2 FileInfo) bool? 21 | return "" 22 | } 23 | 24 | func is_file_renamed_resumelist(file string, info os.FileInfo, initial map[string]*FileState) string { 25 | // Can we detect if a file was renamed on Windows? 26 | // NOTE(driskell): What about using golang's func os.SameFile(fi1, fi2 FileInfo) bool? 27 | return "" 28 | } 29 | -------------------------------------------------------------------------------- /fileinfo_darwin.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "syscall" 6 | ) 7 | 8 | func file_ids(info *os.FileInfo) (uint64, int32) { 9 | fstat := (*(info)).Sys().(*syscall.Stat_t) 10 | return fstat.Ino, fstat.Dev 11 | } 12 | -------------------------------------------------------------------------------- /fileinfo_linux.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "syscall" 6 | ) 7 | 8 | func file_ids(info *os.FileInfo) (uint64, uint64) { 9 | fstat := (*info).Sys().(*syscall.Stat_t) 10 | return fstat.Ino, fstat.Dev 11 | } 12 | -------------------------------------------------------------------------------- /fileinfo_openbsd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "syscall" 6 | ) 7 | 8 | func file_ids(info *os.FileInfo) (uint64, int32) { 9 | fstat := (*(info)).Sys().(*syscall.Stat_t) 10 | return fstat.Ino, fstat.Dev 11 | } 12 | -------------------------------------------------------------------------------- /fileinfo_windows.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | func file_ids(info *os.FileInfo) (uint64, uint64) { 8 | // No dev and inode numbers on windows, right? 9 | return 0, 0 10 | } 11 | -------------------------------------------------------------------------------- /filestate_darwin.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type FileState struct { 4 | Source *string `json:"source,omitempty"` 5 | Offset int64 `json:"offset,omitempty"` 6 | Inode uint64 `json:"inode,omitempty"` 7 | Device int32 `json:"device,omitempty"` 8 | } 9 | -------------------------------------------------------------------------------- /filestate_linux.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type FileState struct { 4 | Source *string `json:"source,omitempty"` 5 | Offset int64 `json:"offset,omitempty"` 6 | Inode uint64 `json:"inode,omitempty"` 7 | Device uint64 `json:"device,omitempty"` 8 | } 9 | -------------------------------------------------------------------------------- /filestate_openbsd.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type FileState struct { 4 | Source *string `json:"source,omitempty"` 5 | Offset int64 `json:"offset,omitempty"` 6 | Inode uint64 `json:"inode,omitempty"` 7 | Device int32 `json:"device,omitempty"` 8 | } 9 | 10 | -------------------------------------------------------------------------------- /filestate_windows.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type FileState struct { 4 | Source *string `json:"source,omitempty"` 5 | Offset int64 `json:"offset,omitempty"` 6 | Inode uint64 `json:"inode,omitempty"` 7 | Device uint64 `json:"device,omitempty"` 8 | } 9 | -------------------------------------------------------------------------------- /harvester.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "os" // for File and friends 9 | "time" 10 | ) 11 | 12 | type Harvester struct { 13 | Path string /* the file path to harvest */ 14 | FileConfig FileConfig 15 | Offset int64 16 | FinishChan chan int64 17 | 18 | file *os.File /* the file being watched */ 19 | } 20 | 21 | func (h *Harvester) Harvest(output chan *FileEvent) { 22 | h.open() 23 | info, e := h.file.Stat() 24 | if e != nil { 25 | panic(fmt.Sprintf("Harvest: unexpected error: %s", e.Error())) 26 | } 27 | defer h.file.Close() 28 | 29 | // On completion, push offset so we can continue where we left off if we relaunch on the same file 30 | defer func() { h.FinishChan <- h.Offset }() 31 | 32 | var line uint64 = 0 // Ask registrar about the line number 33 | 34 | // get current offset in file 35 | offset, _ := h.file.Seek(0, os.SEEK_CUR) 36 | 37 | if h.Offset > 0 { 38 | emit("harvest: %q position:%d (offset snapshot:%d)\n", h.Path, h.Offset, offset) 39 | } else if options.tailOnRotate { 40 | emit("harvest: (tailing) %q (offset snapshot:%d)\n", h.Path, offset) 41 | } else { 42 | emit("harvest: %q (offset snapshot:%d)\n", h.Path, offset) 43 | } 44 | 45 | h.Offset = offset 46 | 47 | reader := bufio.NewReaderSize(h.file, options.harvesterBufferSize) // 16kb buffer by default 48 | buffer := new(bytes.Buffer) 49 | 50 | var read_timeout = 10 * time.Second 51 | last_read_time := time.Now() 52 | for { 53 | text, bytesread, err := h.readline(reader, buffer, read_timeout) 54 | 55 | if err != nil { 56 | if err == io.EOF { 57 | // timed out waiting for data, got eof. 58 | // Check to see if the file was truncated 59 | info, _ := h.file.Stat() 60 | if info.Size() < h.Offset { 61 | emit("File truncated, seeking to beginning: %s\n", h.Path) 62 | h.file.Seek(0, os.SEEK_SET) 63 | h.Offset = 0 64 | } else if age := time.Since(last_read_time); age > h.FileConfig.deadtime { 65 | // if last_read_time was more than dead time, this file is probably 66 | // dead. Stop watching it. 67 | emit("Stopping harvest of %s; last change was %v ago\n", h.Path, age) 68 | return 69 | } 70 | continue 71 | } else { 72 | emit("Unexpected state reading from %s; error: %s\n", h.Path, err) 73 | return 74 | } 75 | } 76 | last_read_time = time.Now() 77 | 78 | line++ 79 | event := &FileEvent{ 80 | Source: &h.Path, 81 | Offset: h.Offset, 82 | Line: line, 83 | Text: text, 84 | Fields: &h.FileConfig.Fields, 85 | fileinfo: &info, 86 | } 87 | h.Offset += int64(bytesread) 88 | 89 | output <- event // ship the new event downstream 90 | } /* forever */ 91 | } 92 | 93 | func (h *Harvester) open() *os.File { 94 | // Special handling that "-" means to read from standard input 95 | if h.Path == "-" { 96 | h.file = os.Stdin 97 | return h.file 98 | } 99 | 100 | for { 101 | var err error 102 | h.file, err = os.Open(h.Path) 103 | 104 | if err != nil { 105 | // retry on failure. 106 | emit("Failed opening %s: %s\n", h.Path, err) 107 | time.Sleep(5 * time.Second) 108 | } else { 109 | break 110 | } 111 | } 112 | 113 | // Check we are not following a rabbit hole (symlinks, etc.) 114 | mustBeRegularFile(h.file) // panics 115 | 116 | if h.Offset > 0 { 117 | h.file.Seek(h.Offset, os.SEEK_SET) 118 | } else if options.tailOnRotate { 119 | h.file.Seek(0, os.SEEK_END) 120 | } else { 121 | h.file.Seek(0, os.SEEK_SET) 122 | } 123 | 124 | return h.file 125 | } 126 | 127 | func (h *Harvester) readline(reader *bufio.Reader, buffer *bytes.Buffer, eof_timeout time.Duration) (*string, int, error) { 128 | var is_partial bool = true 129 | var newline_length int = 1 130 | start_time := time.Now() 131 | 132 | for { 133 | segment, err := reader.ReadBytes('\n') 134 | 135 | if segment != nil && len(segment) > 0 { 136 | if segment[len(segment)-1] == '\n' { 137 | // Found a complete line 138 | is_partial = false 139 | 140 | // Check if also a CR present 141 | if len(segment) > 1 && segment[len(segment)-2] == '\r' { 142 | newline_length++ 143 | } 144 | } 145 | 146 | // TODO(sissel): if buffer exceeds a certain length, maybe report an error condition? chop it? 147 | buffer.Write(segment) 148 | } 149 | 150 | if err != nil { 151 | if err == io.EOF && is_partial { 152 | time.Sleep(1 * time.Second) // TODO(sissel): Implement backoff 153 | 154 | // Give up waiting for data after a certain amount of time. 155 | // If we time out, return the error (eof) 156 | if time.Since(start_time) > eof_timeout { 157 | return nil, 0, err 158 | } 159 | continue 160 | } else { 161 | emit("error: Harvester.readLine: %s", err.Error()) 162 | return nil, 0, err // TODO(sissel): don't do this? 163 | } 164 | } 165 | 166 | // If we got a full line, return the whole line without the EOL chars (CRLF or LF) 167 | if !is_partial { 168 | // Get the str length with the EOL chars (LF or CRLF) 169 | bufferSize := buffer.Len() 170 | str := new(string) 171 | *str = buffer.String()[:bufferSize-newline_length] 172 | // Reset the buffer for the next line 173 | buffer.Reset() 174 | return str, bufferSize, nil 175 | } 176 | } /* forever read chunks */ 177 | 178 | return nil, 0, nil 179 | } 180 | 181 | // panics 182 | func mustBeRegularFile(f *os.File) { 183 | if f == nil { 184 | panic(fmt.Errorf("Harvester: BUG: f arg is nil")) 185 | } 186 | info, e := f.Stat() 187 | if e != nil { 188 | panic(fmt.Errorf("Harvester: FAULT: stat error: %s", e.Error())) 189 | } 190 | 191 | if !info.Mode().IsRegular() { 192 | panic(fmt.Errorf("Harvester: not a regular file:%q", info.Mode(), info.Name())) 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /logstash-forwarder.conf.example: -------------------------------------------------------------------------------- 1 | { 2 | # The network section covers network configuration :) 3 | "network": { 4 | # A list of downstream servers listening for our messages. 5 | # logstash-forwarder will pick one at random and only switch if 6 | # the selected one appears to be dead or unresponsive 7 | #"servers": [ "localhost:5043" ], 8 | 9 | # The path to your client ssl certificate (optional) 10 | #"ssl certificate": "./logstash-forwarder.crt", 11 | # The path to your client ssl key (optional) 12 | #"ssl key": "./logstash-forwarder.key", 13 | 14 | # The path to your trusted ssl CA file. This is used 15 | # to authenticate your downstream server. 16 | #"ssl ca": "./logstash-forwarder.crt", 17 | 18 | # Network timeout in seconds. This is most important for 19 | # logstash-forwarder determining whether to stop waiting for an 20 | # acknowledgement from the downstream server. If an timeout is reached, 21 | # logstash-forwarder will assume the connection or server is bad and 22 | # will connect to a server chosen at random from the servers list. 23 | #"timeout": 15 24 | }, 25 | 26 | # The list of files configurations 27 | "files": [ 28 | # An array of hashes. Each hash tells what paths to watch and 29 | # what fields to annotate on events from those paths. 30 | #{ 31 | #"paths": [ 32 | # single paths are fine 33 | #"/var/log/messages", 34 | # globs are fine too, they will be periodically evaluated 35 | # to see if any new files match the wildcard. 36 | #"/var/log/*.log" 37 | #], 38 | 39 | # A dictionary of fields to annotate on each event. 40 | #"fields": { "type": "syslog" } 41 | #}, { 42 | # A path of "-" means stdin. 43 | #"paths": [ "-" ], 44 | #"fields": { "type": "stdin" } 45 | #}, { 46 | #"paths": [ 47 | #"/var/log/apache/httpd-*.log" 48 | #], 49 | #"fields": { "type": "apache" } 50 | #} 51 | ] 52 | } 53 | -------------------------------------------------------------------------------- /logstash-forwarder.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "flag" 6 | "log" 7 | "os" 8 | "runtime/pprof" 9 | "time" 10 | "fmt" 11 | ) 12 | 13 | var exitStat = struct { 14 | ok, usageError, faulted int 15 | }{ 16 | ok: 0, 17 | usageError: 1, 18 | faulted: 2, 19 | } 20 | 21 | var options = &struct { 22 | configArg string 23 | spoolSize uint64 24 | harvesterBufferSize int 25 | cpuProfileFile string 26 | idleTimeout time.Duration 27 | useSyslog bool 28 | tailOnRotate bool 29 | quiet bool 30 | version bool 31 | }{ 32 | spoolSize: 1024, 33 | harvesterBufferSize: 16 << 10, 34 | idleTimeout: time.Second * 5, 35 | } 36 | 37 | func emitOptions() { 38 | emit("\t--- options -------\n") 39 | emit("\tconfig-arg: %s\n", options.configArg) 40 | emit("\tidle-timeout: %v\n", options.idleTimeout) 41 | emit("\tspool-size: %d\n", options.spoolSize) 42 | emit("\tharvester-buff-size: %d\n", options.harvesterBufferSize) 43 | emit("\t--- flags ---------\n") 44 | emit("\ttail (on-rotation): %t\n", options.tailOnRotate) 45 | emit("\tlog-to-syslog: %t\n", options.useSyslog) 46 | emit("\tquiet: %t\n", options.quiet) 47 | if runProfiler() { 48 | emit("\t--- profile run ---\n") 49 | emit("\tcpu-profile-file: %s\n", options.cpuProfileFile) 50 | } 51 | 52 | } 53 | 54 | // exits with stat existStat.usageError if required options are not provided 55 | func assertRequiredOptions() { 56 | if options.configArg == "" { 57 | exit(exitStat.usageError, "fatal: config file must be defined") 58 | } 59 | } 60 | 61 | const logflags = log.Ldate | log.Ltime | log.Lmicroseconds 62 | 63 | func init() { 64 | flag.StringVar(&options.configArg, "config", options.configArg, "path to logstash-forwarder configuration file or directory") 65 | 66 | flag.StringVar(&options.cpuProfileFile, "cpuprofile", options.cpuProfileFile, "path to cpu profile output - note: exits on profile end.") 67 | 68 | flag.Uint64Var(&options.spoolSize, "spool-size", options.spoolSize, "event count spool threshold - forces network flush") 69 | flag.Uint64Var(&options.spoolSize, "sv", options.spoolSize, "event count spool threshold - forces network flush") 70 | 71 | flag.IntVar(&options.harvesterBufferSize, "harvest-buffer-size", options.harvesterBufferSize, "harvester reader buffer size") 72 | flag.IntVar(&options.harvesterBufferSize, "hb", options.harvesterBufferSize, "harvester reader buffer size") 73 | 74 | flag.BoolVar(&options.useSyslog, "log-to-syslog", options.useSyslog, "log to syslog instead of stdout") // deprecate this 75 | flag.BoolVar(&options.useSyslog, "syslog", options.useSyslog, "log to syslog instead of stdout") 76 | 77 | flag.BoolVar(&options.tailOnRotate, "tail", options.tailOnRotate, "always tail on log rotation -note: may skip entries ") 78 | flag.BoolVar(&options.tailOnRotate, "t", options.tailOnRotate, "always tail on log rotation -note: may skip entries ") 79 | 80 | flag.BoolVar(&options.quiet, "quiet", options.quiet, "operate in quiet mode - only emit errors to log") 81 | flag.BoolVar(&options.version, "version", options.version, "output the version of this program") 82 | } 83 | 84 | func init() { 85 | log.SetFlags(logflags) 86 | } 87 | 88 | func main() { 89 | defer func() { 90 | p := recover() 91 | if p == nil { 92 | return 93 | } 94 | fault("recovered panic: %v", p) 95 | }() 96 | 97 | flag.Parse() 98 | 99 | if options.version { 100 | fmt.Println(Version); 101 | return 102 | } 103 | 104 | if options.useSyslog { 105 | configureSyslog() 106 | } 107 | 108 | assertRequiredOptions() 109 | emitOptions() 110 | 111 | if runProfiler() { 112 | f, err := os.Create(options.cpuProfileFile) 113 | if err != nil { 114 | log.Fatal(err) 115 | } 116 | pprof.StartCPUProfile(f) 117 | emit("Profiling enabled. I will collect profiling information and then exit in 60 seconds.") 118 | go func() { 119 | time.Sleep(60 * time.Second) 120 | pprof.StopCPUProfile() 121 | panic("60-seconds of profiling is complete. Shutting down.") 122 | }() 123 | } 124 | 125 | config_files, err := DiscoverConfigs(options.configArg) 126 | if err != nil { 127 | fault("Could not use -config of '%s': %s", options.configArg, err) 128 | } 129 | 130 | var config Config 131 | 132 | for _, filename := range config_files { 133 | additional_config, err := LoadConfig(filename) 134 | if err == nil { 135 | err = MergeConfig(&config, additional_config) 136 | } 137 | if err != nil { 138 | fault("Could not load config file %s: %s", filename, err) 139 | } 140 | } 141 | FinalizeConfig(&config) 142 | 143 | event_chan := make(chan *FileEvent, 16) 144 | publisher_chan := make(chan []*FileEvent, 1) 145 | registrar_chan := make(chan []*FileEvent, 1) 146 | 147 | if len(config.Files) == 0 { 148 | log.Fatalf("No paths given. What files do you want me to watch?\n") 149 | } 150 | 151 | // The basic model of execution: 152 | // - prospector: finds files in paths/globs to harvest, starts harvesters 153 | // - harvester: reads a file, sends events to the spooler 154 | // - spooler: buffers events until ready to flush to the publisher 155 | // - publisher: writes to the network, notifies registrar 156 | // - registrar: records positions of files read 157 | // Finally, prospector uses the registrar information, on restart, to 158 | // determine where in each file to restart a harvester. 159 | 160 | restart := &ProspectorResume{} 161 | restart.persist = make(chan *FileState) 162 | 163 | // Load the previous log file locations now, for use in prospector 164 | restart.files = make(map[string]*FileState) 165 | if existing, e := os.Open(".logstash-forwarder"); e == nil { 166 | defer existing.Close() 167 | wd := "" 168 | if wd, e = os.Getwd(); e != nil { 169 | emit("WARNING: os.Getwd retuned unexpected error %s -- ignoring\n", e.Error()) 170 | } 171 | emit("Loading registrar data from %s/.logstash-forwarder\n", wd) 172 | 173 | decoder := json.NewDecoder(existing) 174 | decoder.Decode(&restart.files) 175 | } 176 | 177 | pendingProspectorCnt := 0 178 | 179 | // Prospect the globs/paths given on the command line and launch harvesters 180 | for _, fileconfig := range config.Files { 181 | prospector := &Prospector{FileConfig: fileconfig} 182 | go prospector.Prospect(restart, event_chan) 183 | pendingProspectorCnt++ 184 | } 185 | 186 | // Now determine which states we need to persist by pulling the events from the prospectors 187 | // When we hit a nil source a prospector had finished so we decrease the expected events 188 | emit("Waiting for %d prospectors to initialise\n", pendingProspectorCnt) 189 | persist := make(map[string]*FileState) 190 | 191 | for event := range restart.persist { 192 | if event.Source == nil { 193 | pendingProspectorCnt-- 194 | if pendingProspectorCnt == 0 { 195 | break 196 | } 197 | continue 198 | } 199 | persist[*event.Source] = event 200 | emit("Registrar will re-save state for %s\n", *event.Source) 201 | } 202 | 203 | emit("All prospectors initialised with %d states to persist\n", len(persist)) 204 | 205 | // Harvesters dump events into the spooler. 206 | go Spool(event_chan, publisher_chan, options.spoolSize, options.idleTimeout) 207 | 208 | go Publishv1(publisher_chan, registrar_chan, &config.Network) 209 | 210 | // registrar records last acknowledged positions in all files. 211 | Registrar(persist, registrar_chan) 212 | } 213 | 214 | // REVU: yes, this is a temp hack. 215 | func emit(msgfmt string, args ...interface{}) { 216 | if options.quiet { 217 | return 218 | } 219 | log.Printf(msgfmt, args...) 220 | } 221 | 222 | func fault(msgfmt string, args ...interface{}) { 223 | exit(exitStat.faulted, msgfmt, args...) 224 | } 225 | 226 | func exit(stat int, msgfmt string, args ...interface{}) { 227 | log.Printf(msgfmt, args...) 228 | os.Exit(stat) 229 | } 230 | 231 | func runProfiler() bool { 232 | return options.cpuProfileFile != "" 233 | } 234 | -------------------------------------------------------------------------------- /pkg/centos/after-install.sh: -------------------------------------------------------------------------------- 1 | /sbin/chkconfig --add logstash-forwarder 2 | 3 | chown -R logstash-forwarder:logstash-forwarder /opt/logstash-forwarder 4 | chown logstash-forwarder /var/log/logstash-forwarder 5 | chown logstash-forwarder:logstash-forwarder /var/lib/logstash-forwarder 6 | 7 | echo "Logs for logstash-forwarder will be in /var/log/logstash-forwarder/" 8 | -------------------------------------------------------------------------------- /pkg/centos/before-install.sh: -------------------------------------------------------------------------------- 1 | # create logstash-forwarder group 2 | if ! getent group logstash-forwarder >/dev/null; then 3 | groupadd -r logstash-forwarder 4 | fi 5 | 6 | # create logstash-forwarder user 7 | if ! getent passwd logstash-forwarder >/dev/null; then 8 | useradd -r -g logstash-forwarder -d /opt/logstash-forwarder \ 9 | -s /sbin/nologin -c "logstash-forwarder" logstash-forwarder 10 | fi 11 | -------------------------------------------------------------------------------- /pkg/centos/before-remove.sh: -------------------------------------------------------------------------------- 1 | if [ $1 -eq 0 ]; then 2 | /sbin/service logstash-forwarder stop >/dev/null 2>&1 || true 3 | /sbin/chkconfig --del logstash-forwarder 4 | if getent passwd logstash-forwarder >/dev/null ; then 5 | userdel logstash-forwarder 6 | fi 7 | 8 | if getent group logstash-forwarder > /dev/null ; then 9 | groupdel logstash-forwarder 10 | fi 11 | fi 12 | -------------------------------------------------------------------------------- /pkg/ubuntu/after-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | chown -R logstash-forwarder:logstash-forwarder /opt/logstash-forwarder 4 | chown logstash-forwarder /var/log/logstash-forwarder 5 | chown logstash-forwarder:logstash-forwarder /var/lib/logstash-forwarder 6 | update-rc.d logstash-forwarder defaults 7 | 8 | echo "Logs for logstash-forwarder will be in /var/log/logstash-forwarder/" 9 | -------------------------------------------------------------------------------- /pkg/ubuntu/before-install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # create logstash-forwarder group 4 | if ! getent group logstash-forwarder >/dev/null; then 5 | groupadd -r logstash-forwarder 6 | fi 7 | 8 | # create logstash-forwarder user 9 | if ! getent passwd logstash-forwarder >/dev/null; then 10 | useradd -M -r -g logstash-forwarder -d /var/lib/logstash-forwarder \ 11 | -s /usr/sbin/nologin -c "logstash-forwarder Service User" logstash-forwarder 12 | fi 13 | -------------------------------------------------------------------------------- /pkg/ubuntu/before-remove.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $1 = "remove" ]; then 4 | service logstash-forwarder stop >/dev/null 2>&1 || true 5 | update-rc.d -f logstash-forwarder remove 6 | 7 | if getent passwd logstash-forwarder >/dev/null ; then 8 | userdel logstash-forwarder 9 | fi 10 | 11 | if getent group logstash-forwarder >/dev/null ; then 12 | groupdel logstash-forwarder 13 | fi 14 | fi 15 | -------------------------------------------------------------------------------- /prospector.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "time" 7 | ) 8 | 9 | type ProspectorResume struct { 10 | files map[string]*FileState 11 | persist chan *FileState 12 | } 13 | 14 | type ProspectorInfo struct { 15 | fileinfo os.FileInfo /* the file info */ 16 | harvester chan int64 /* the harvester will send an event with its offset when it closes */ 17 | last_seen uint32 /* int number of the last iterations in which we saw this file */ 18 | } 19 | 20 | type Prospector struct { 21 | FileConfig FileConfig 22 | prospectorinfo map[string]ProspectorInfo 23 | iteration uint32 24 | lastscan time.Time 25 | } 26 | 27 | func (p *Prospector) Prospect(resume *ProspectorResume, output chan *FileEvent) { 28 | p.prospectorinfo = make(map[string]ProspectorInfo) 29 | 30 | // Handle any "-" (stdin) paths 31 | for i, path := range p.FileConfig.Paths { 32 | if path == "-" { 33 | // Offset and Initial never get used when path is "-" 34 | harvester := Harvester{Path: path, FileConfig: p.FileConfig} 35 | go harvester.Harvest(output) 36 | 37 | // Remove it from the file list 38 | p.FileConfig.Paths = append(p.FileConfig.Paths[:i], p.FileConfig.Paths[i+1:]...) 39 | } 40 | } 41 | 42 | // Seed last scan time 43 | p.lastscan = time.Now() 44 | 45 | // Now let's do one quick scan to pick up new files 46 | for _, path := range p.FileConfig.Paths { 47 | p.scan(path, output, resume) 48 | } 49 | 50 | // This signals we finished considering the previous state 51 | event := &FileState{ 52 | Source: nil, 53 | } 54 | resume.persist <- event 55 | 56 | for { 57 | newlastscan := time.Now() 58 | 59 | for _, path := range p.FileConfig.Paths { 60 | // Scan - flag false so new files always start at beginning 61 | p.scan(path, output, nil) 62 | } 63 | 64 | p.lastscan = newlastscan 65 | 66 | // Defer next scan for a bit. 67 | time.Sleep(10 * time.Second) // Make this tunable 68 | 69 | // Clear out files that disappeared and we've stopped harvesting 70 | for file, lastinfo := range p.prospectorinfo { 71 | if len(lastinfo.harvester) != 0 && lastinfo.last_seen < p.iteration { 72 | delete(p.prospectorinfo, file) 73 | } 74 | } 75 | 76 | p.iteration++ // Overflow is allowed 77 | } 78 | } /* Prospect */ 79 | 80 | func (p *Prospector) scan(path string, output chan *FileEvent, resume *ProspectorResume) { 81 | 82 | // Evaluate the path as a wildcards/shell glob 83 | matches, err := filepath.Glob(path) 84 | if err != nil { 85 | emit("glob(%s) failed: %v\n", path, err) 86 | return 87 | } 88 | 89 | // To keep the old inode/dev reference if we see a file has renamed, in case it was also renamed prior 90 | missinginfo := make(map[string]os.FileInfo) 91 | 92 | // Check any matched files to see if we need to start a harvester 93 | for _, file := range matches { 94 | // Stat the file, following any symlinks. 95 | fileinfo, err := os.Stat(file) 96 | // TODO(sissel): check err 97 | if err != nil { 98 | emit("stat(%s) failed: %s\n", file, err) 99 | continue 100 | } 101 | 102 | if fileinfo.IsDir() { 103 | emit("Skipping directory: %s\n", file) 104 | continue 105 | } 106 | 107 | // Check the current info against p.prospectorinfo[file] 108 | lastinfo, is_known := p.prospectorinfo[file] 109 | newinfo := lastinfo 110 | 111 | // Conditions for starting a new harvester: 112 | // - file path hasn't been seen before 113 | // - the file's inode or device changed 114 | if !is_known { 115 | // Create a new prospector info with the stat info for comparison 116 | newinfo = ProspectorInfo{fileinfo: fileinfo, harvester: make(chan int64, 1), last_seen: p.iteration} 117 | 118 | // Check for dead time, but only if the file modification time is before the last scan started 119 | // This ensures we don't skip genuine creations with dead times less than 10s 120 | if fileinfo.ModTime().Before(p.lastscan) && time.Since(fileinfo.ModTime()) > p.FileConfig.deadtime { 121 | var offset int64 = 0 122 | var is_resuming bool = false 123 | 124 | if resume != nil { 125 | // Call the calculator - it will process resume state if there is one 126 | offset, is_resuming = p.calculate_resume(file, fileinfo, resume) 127 | } 128 | 129 | // Are we resuming a dead file? We have to resume even if dead so we catch any old updates to the file 130 | // This is safe as the harvester, once it hits the EOF and a timeout, will stop harvesting 131 | // Once we detect changes again we can resume another harvester again - this keeps number of go routines to a minimum 132 | if is_resuming { 133 | emit("Resuming harvester on a previously harvested file: %s\n", file) 134 | harvester := &Harvester{Path: file, FileConfig: p.FileConfig, Offset: offset, FinishChan: newinfo.harvester} 135 | go harvester.Harvest(output) 136 | } else { 137 | // Old file, skip it, but push offset of file size so we start from the end if this file changes and needs picking up 138 | emit("Skipping file (older than dead time of %v): %s\n", p.FileConfig.deadtime, file) 139 | newinfo.harvester <- fileinfo.Size() 140 | } 141 | } else if previous := is_file_renamed(file, fileinfo, p.prospectorinfo, missinginfo); previous != "" { 142 | // This file was simply renamed (known inode+dev) - link the same harvester channel as the old file 143 | emit("File rename was detected: %s -> %s\n", previous, file) 144 | 145 | newinfo.harvester = p.prospectorinfo[previous].harvester 146 | } else { 147 | var offset int64 = 0 148 | var is_resuming bool = false 149 | 150 | if resume != nil { 151 | // Call the calculator - it will process resume state if there is one 152 | offset, is_resuming = p.calculate_resume(file, fileinfo, resume) 153 | } 154 | 155 | // Are we resuming a file or is this a completely new file? 156 | if is_resuming { 157 | emit("Resuming harvester on a previously harvested file: %s\n", file) 158 | } else { 159 | emit("Launching harvester on new file: %s\n", file) 160 | } 161 | 162 | // Launch the harvester 163 | harvester := &Harvester{Path: file, FileConfig: p.FileConfig, Offset: offset, FinishChan: newinfo.harvester} 164 | go harvester.Harvest(output) 165 | } 166 | } else { 167 | // Update the fileinfo information used for future comparisons, and the last_seen counter 168 | newinfo.fileinfo = fileinfo 169 | newinfo.last_seen = p.iteration 170 | 171 | if !is_fileinfo_same(lastinfo.fileinfo, fileinfo) { 172 | if previous := is_file_renamed(file, fileinfo, p.prospectorinfo, missinginfo); previous != "" { 173 | // This file was renamed from another file we know - link the same harvester channel as the old file 174 | emit("File rename was detected: %s -> %s\n", previous, file) 175 | emit("Launching harvester on renamed file: %s\n", file) 176 | 177 | newinfo.harvester = p.prospectorinfo[previous].harvester 178 | } else { 179 | // File is not the same file we saw previously, it must have rotated and is a new file 180 | emit("Launching harvester on rotated file: %s\n", file) 181 | 182 | // Forget about the previous harvester and let it continue on the old file - so start a new channel to use with the new harvester 183 | newinfo.harvester = make(chan int64, 1) 184 | 185 | // Start a harvester on the path 186 | harvester := &Harvester{Path: file, FileConfig: p.FileConfig, FinishChan: newinfo.harvester} 187 | go harvester.Harvest(output) 188 | } 189 | 190 | // Keep the old file in missinginfo so we don't rescan it if it was renamed and we've not yet reached the new filename 191 | // We only need to keep it for the remainder of this iteration then we can assume it was deleted and forget about it 192 | missinginfo[file] = lastinfo.fileinfo 193 | } else if len(newinfo.harvester) != 0 && lastinfo.fileinfo.ModTime() != fileinfo.ModTime() { 194 | // Resume harvesting of an old file we've stopped harvesting from 195 | emit("Resuming harvester on an old file that was just modified: %s\n", file) 196 | 197 | // Start a harvester on the path; an old file was just modified and it doesn't have a harvester 198 | // The offset to continue from will be stored in the harvester channel - so take that to use and also clear the channel 199 | harvester := &Harvester{Path: file, FileConfig: p.FileConfig, Offset: <-newinfo.harvester, FinishChan: newinfo.harvester} 200 | go harvester.Harvest(output) 201 | } 202 | } 203 | 204 | // Track the stat data for this file for later comparison to check for 205 | // rotation/etc 206 | p.prospectorinfo[file] = newinfo 207 | } // for each file matched by the glob 208 | } 209 | 210 | func (p *Prospector) calculate_resume(file string, fileinfo os.FileInfo, resume *ProspectorResume) (int64, bool) { 211 | last_state, is_found := resume.files[file] 212 | 213 | if is_found && is_file_same(file, fileinfo, last_state) { 214 | // We're resuming - throw the last state back downstream so we resave it 215 | // And return the offset - also force harvest in case the file is old and we're about to skip it 216 | resume.persist <- last_state 217 | return last_state.Offset, true 218 | } 219 | 220 | if previous := is_file_renamed_resumelist(file, fileinfo, resume.files); previous != "" { 221 | // File has rotated between shutdown and startup 222 | // We return last state downstream, with a modified event source with the new file name 223 | // And return the offset - also force harvest in case the file is old and we're about to skip it 224 | emit("Detected rename of a previously harvested file: %s -> %s\n", previous, file) 225 | last_state := resume.files[previous] 226 | last_state.Source = &file 227 | resume.persist <- last_state 228 | return last_state.Offset, true 229 | } 230 | 231 | if is_found { 232 | emit("Not resuming rotated file: %s\n", file) 233 | } 234 | 235 | // New file so just start from an automatic position 236 | return 0, false 237 | } 238 | -------------------------------------------------------------------------------- /publisher1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "crypto/tls" 7 | "crypto/x509" 8 | "encoding/binary" 9 | "encoding/pem" 10 | "fmt" 11 | "io" 12 | "io/ioutil" 13 | "math/rand" 14 | "net" 15 | "os" 16 | "regexp" 17 | "strconv" 18 | "time" 19 | ) 20 | 21 | // Support for newer SSL signature algorithms 22 | import _ "crypto/sha256" 23 | import _ "crypto/sha512" 24 | 25 | var hostname string 26 | var hostport_re, _ = regexp.Compile("^(.+):([0-9]+)$") 27 | 28 | func init() { 29 | hostname, _ = os.Hostname() 30 | rand.Seed(time.Now().UnixNano()) 31 | } 32 | 33 | func Publishv1(input chan []*FileEvent, 34 | registrar chan []*FileEvent, 35 | config *NetworkConfig) { 36 | var buffer bytes.Buffer 37 | var socket *tls.Conn 38 | var sequence uint32 39 | var err error 40 | 41 | socket = connect(config) 42 | defer socket.Close() 43 | 44 | for events := range input { 45 | buffer.Truncate(0) 46 | compressor, _ := zlib.NewWriterLevel(&buffer, 3) 47 | 48 | for _, event := range events { 49 | sequence += 1 50 | writeDataFrame(event, sequence, compressor) 51 | } 52 | compressor.Flush() 53 | compressor.Close() 54 | 55 | compressed_payload := buffer.Bytes() 56 | 57 | // Send buffer until we're successful... 58 | oops := func(err error) { 59 | // TODO(sissel): Track how frequently we timeout and reconnect. If we're 60 | // timing out too frequently, there's really no point in timing out since 61 | // basically everything is slow or down. We'll want to ratchet up the 62 | // timeout value slowly until things improve, then ratchet it down once 63 | // things seem healthy. 64 | emit("Socket error, will reconnect: %s\n", err) 65 | time.Sleep(1 * time.Second) 66 | socket.Close() 67 | socket = connect(config) 68 | } 69 | 70 | SendPayload: 71 | for { 72 | // Abort if our whole request takes longer than the configured 73 | // network timeout. 74 | socket.SetDeadline(time.Now().Add(config.timeout)) 75 | 76 | // Set the window size to the length of this payload in events. 77 | _, err = socket.Write([]byte("1W")) 78 | if err != nil { 79 | oops(err) 80 | continue 81 | } 82 | binary.Write(socket, binary.BigEndian, uint32(len(events))) 83 | if err != nil { 84 | oops(err) 85 | continue 86 | } 87 | 88 | // Write compressed frame 89 | socket.Write([]byte("1C")) 90 | if err != nil { 91 | oops(err) 92 | continue 93 | } 94 | binary.Write(socket, binary.BigEndian, uint32(len(compressed_payload))) 95 | if err != nil { 96 | oops(err) 97 | continue 98 | } 99 | _, err = socket.Write(compressed_payload) 100 | if err != nil { 101 | oops(err) 102 | continue 103 | } 104 | 105 | // read ack 106 | response := make([]byte, 0, 6) 107 | ackbytes := 0 108 | for ackbytes != 6 { 109 | n, err := socket.Read(response[len(response):cap(response)]) 110 | if err != nil { 111 | emit("Read error looking for ack: %s\n", err) 112 | socket.Close() 113 | socket = connect(config) 114 | continue SendPayload // retry sending on new connection 115 | } else { 116 | ackbytes += n 117 | } 118 | } 119 | 120 | // TODO(sissel): verify ack 121 | // Success, stop trying to send the payload. 122 | break 123 | } 124 | 125 | // Tell the registrar that we've successfully sent these events 126 | registrar <- events 127 | } /* for each event payload */ 128 | } // Publish 129 | 130 | func connect(config *NetworkConfig) (socket *tls.Conn) { 131 | var tlsconfig tls.Config 132 | tlsconfig.MinVersion = tls.VersionTLS10 133 | 134 | if len(config.SSLCertificate) > 0 && len(config.SSLKey) > 0 { 135 | emit("Loading client ssl certificate: %s and %s\n", 136 | config.SSLCertificate, config.SSLKey) 137 | cert, err := tls.LoadX509KeyPair(config.SSLCertificate, config.SSLKey) 138 | if err != nil { 139 | fault ("Failed loading client ssl certificate: %s\n", err) 140 | } 141 | tlsconfig.Certificates = []tls.Certificate{cert} 142 | } 143 | 144 | if len(config.SSLCA) > 0 { 145 | emit("Setting trusted CA from file: %s\n", config.SSLCA) 146 | tlsconfig.RootCAs = x509.NewCertPool() 147 | 148 | pemdata, err := ioutil.ReadFile(config.SSLCA) 149 | if err != nil { 150 | fault("Failure reading CA certificate: %s\n", err) 151 | } 152 | 153 | block, _ := pem.Decode(pemdata) 154 | if block == nil { 155 | fault("Failed to decode PEM data, is %s a valid cert?\n", config.SSLCA) 156 | } 157 | if block.Type != "CERTIFICATE" { 158 | fault("This is not a certificate file: %s\n", config.SSLCA) 159 | } 160 | 161 | cert, err := x509.ParseCertificate(block.Bytes) 162 | if err != nil { 163 | fault("Failed to parse a certificate: %s\n", config.SSLCA) 164 | } 165 | tlsconfig.RootCAs.AddCert(cert) 166 | } 167 | 168 | for { 169 | // Pick a random server from the list. 170 | hostport := config.Servers[rand.Int()%len(config.Servers)] 171 | submatch := hostport_re.FindSubmatch([]byte(hostport)) 172 | if submatch == nil { 173 | fault("Invalid host:port given: %s", hostport) 174 | } 175 | host := string(submatch[1]) 176 | port := string(submatch[2]) 177 | addresses, err := net.LookupHost(host) 178 | 179 | if err != nil { 180 | emit("DNS lookup failure \"%s\": %s\n", host, err) 181 | time.Sleep(1 * time.Second) 182 | continue 183 | } 184 | 185 | address := addresses[rand.Int()%len(addresses)] 186 | var addressport string 187 | 188 | ip := net.ParseIP(address) 189 | if len(ip) == net.IPv4len { 190 | addressport = fmt.Sprintf("%s:%s", address, port) 191 | } else if len(ip) == net.IPv6len { 192 | addressport = fmt.Sprintf("[%s]:%s", address, port) 193 | } 194 | 195 | emit("Connecting to %s (%s) \n", addressport, host) 196 | 197 | tcpsocket, err := net.DialTimeout("tcp", addressport, config.timeout) 198 | if err != nil { 199 | emit("Failure connecting to %s: %s\n", address, err) 200 | time.Sleep(1 * time.Second) 201 | continue 202 | } 203 | 204 | tlsconfig.ServerName = host 205 | 206 | socket = tls.Client(tcpsocket, &tlsconfig) 207 | socket.SetDeadline(time.Now().Add(config.timeout)) 208 | err = socket.Handshake() 209 | if err != nil { 210 | emit("Failed to tls handshake with %s %s\n", address, err) 211 | time.Sleep(1 * time.Second) 212 | socket.Close() 213 | continue 214 | } 215 | 216 | emit("Connected to %s\n", address) 217 | 218 | // connected, let's rock and roll. 219 | return 220 | } 221 | return 222 | } 223 | 224 | func writeDataFrame(event *FileEvent, sequence uint32, output io.Writer) { 225 | //emit("event: %s\n", *event.Text) 226 | // header, "1D" 227 | output.Write([]byte("1D")) 228 | // sequence number 229 | binary.Write(output, binary.BigEndian, uint32(sequence)) 230 | // 'pair' count 231 | binary.Write(output, binary.BigEndian, uint32(len(*event.Fields)+4)) 232 | 233 | writeKV("file", *event.Source, output) 234 | writeKV("host", hostname, output) 235 | writeKV("offset", strconv.FormatInt(event.Offset, 10), output) 236 | writeKV("line", *event.Text, output) 237 | for k, v := range *event.Fields { 238 | writeKV(k, v, output) 239 | } 240 | } 241 | 242 | func writeKV(key string, value string, output io.Writer) { 243 | //emit("kv: %d/%s %d/%s\n", len(key), key, len(value), value) 244 | binary.Write(output, binary.BigEndian, uint32(len(key))) 245 | output.Write([]byte(key)) 246 | binary.Write(output, binary.BigEndian, uint32(len(value))) 247 | output.Write([]byte(value)) 248 | } 249 | -------------------------------------------------------------------------------- /publisher1_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/rand" 5 | "crypto/rsa" 6 | "crypto/tls" 7 | "crypto/x509" 8 | "crypto/x509/pkix" 9 | "encoding/pem" 10 | "errors" 11 | "io/ioutil" 12 | "log" 13 | "math/big" 14 | "net" 15 | "os" 16 | "sync" 17 | "testing" 18 | "time" 19 | ) 20 | 21 | const strict bool = true 22 | 23 | const insecure bool = false 24 | 25 | const caKey = `-----BEGIN RSA PRIVATE KEY----- 26 | MIICXQIBAAKBgQDXQK+POFCtDlYgc2nQTnZ+WfaPQg1ms9JjomZ9vZXpqH9JaxBj 27 | jWKTyg7k6GOXpNbaET76nlWLtAKKofesOGwadil/HtyEwXxvXJY/UTdkqtwCFjPS 28 | 5XR5fNTkVK2sLw23Z67TIBApIGR42a7WhaP/BFv6zc/wpxDnokpLtJkvlQIDAQAB 29 | AoGAA6paUvoGhavk05CjkKSFaAloJXBegg012/R8AlAkKWJxKHLmSIuzzgh20HcU 30 | mxR3hCcfB22Cz2o1UN8JNKmRTaoMrPHf4gv0MIlcEBumxh8nyFiBocXimqJKHWHY 31 | PMWzOoyhgBIXPoAIkmo9Ft41LidJ3FBl0z74muGcYsdu4FECQQD1nwfXHBP5jE2X 32 | vVc5SupIxIgoK9reCGB2CyYdQtkdRPTO7bSLwTTqFlzjLYNaM3xZhG6Qh/tHIrE0 33 | 95MuumIDAkEA4Fkl8yVj+Pkx7gAGcEQoRwupk6gE/FM0WTJrpSc9+thNLk5DYCod 34 | qwxmju8ttfr6wrIE1vDfK6njVo1a+RqAhwJABNmFABxP0KeSiKJ2bG0sPw+SWKi1 35 | A5lKvknuELnXK5rG8qcC35eLAew7HUkyxL8rf2D8BeKJdZgbw533y/5mHQJBAJXL 36 | MEmOl5evWyUcIzBmcbYuFUWfk+Sd8X/06GbXMs0AC1h1rQrSVAjXOMsK66xsLW49 37 | ynlxTrEqt74cl7dneJUCQQDFwBdpxWZtEeIx0uliUZNoAUX5D8qA4/BuHgstXREw 38 | 5rWQly3kCyFmocbv7WggnNnmgIk3V2P9Vj3n8ZFLCYOJ 39 | -----END RSA PRIVATE KEY-----` 40 | 41 | const caCert = `-----BEGIN CERTIFICATE----- 42 | MIICRTCCAa4CCQC/GQitAOqHJTANBgkqhkiG9w0BAQUFADBnMQswCQYDVQQGEwJB 43 | VTERMA8GA1UECBMIVmljdG9yaWExEjAQBgNVBAcTCU1lbGJvdXJuZTEWMBQGA1UE 44 | ChMNRWxhc3RpY1NlYXJjaDEZMBcGA1UEAxMQY2EubG9nc3Rhc2gudGVzdDAeFw0x 45 | NDA3MDQwMTIwMjNaFw0yNDA3MDEwMTIwMjNaMGcxCzAJBgNVBAYTAkFVMREwDwYD 46 | VQQIEwhWaWN0b3JpYTESMBAGA1UEBxMJTWVsYm91cm5lMRYwFAYDVQQKEw1FbGFz 47 | dGljU2VhcmNoMRkwFwYDVQQDExBjYS5sb2dzdGFzaC50ZXN0MIGfMA0GCSqGSIb3 48 | DQEBAQUAA4GNADCBiQKBgQDXQK+POFCtDlYgc2nQTnZ+WfaPQg1ms9JjomZ9vZXp 49 | qH9JaxBjjWKTyg7k6GOXpNbaET76nlWLtAKKofesOGwadil/HtyEwXxvXJY/UTdk 50 | qtwCFjPS5XR5fNTkVK2sLw23Z67TIBApIGR42a7WhaP/BFv6zc/wpxDnokpLtJkv 51 | lQIDAQABMA0GCSqGSIb3DQEBBQUAA4GBAFzkH8T+dU40g330QnDp2qO0XTfhNOsC 52 | fjUOGYo7F6eqfBcQColcE+BLKc1aKEAAEvzokQi72L7xuOenJUzpGaIJXGkmGZsV 53 | 2OIO5Zf4ChZTMuut9yPjer9sTt0pZUNsOSg6o7hBeXlCMEvoM/31ag2sxZaOKA/Z 54 | p/X0O4Qz0RTF 55 | -----END CERTIFICATE-----` 56 | 57 | var listening sync.WaitGroup 58 | 59 | func init() { log.SetFlags(0) } 60 | 61 | func makeCert(host string) tls.Certificate { 62 | ca, err := tls.X509KeyPair([]byte(caCert), []byte(caKey)) 63 | caCert, err := x509.ParseCertificate(ca.Certificate[0]) 64 | if err != nil { 65 | panic(err) 66 | } 67 | tpl := x509.Certificate{ 68 | SerialNumber: new(big.Int).SetInt64(0), 69 | Subject: pkix.Name{CommonName: host}, 70 | NotBefore: time.Now().AddDate(-1, 0, 0).UTC(), 71 | NotAfter: time.Now().AddDate(1, 0, 0).UTC(), 72 | KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, 73 | BasicConstraintsValid: true, 74 | SubjectKeyId: []byte{1, 2, 3, 4}, 75 | Version: 2, 76 | } 77 | if ip := net.ParseIP(host); ip != nil { 78 | tpl.IPAddresses = []net.IP{ip} 79 | } 80 | 81 | key, err := rsa.GenerateKey(rand.Reader, 1024) 82 | if err != nil { 83 | panic(err) 84 | } 85 | der, err := x509.CreateCertificate(rand.Reader, &tpl, caCert, &key.PublicKey, ca.PrivateKey) 86 | if err != nil { 87 | panic(err) 88 | } 89 | bcrt := &pem.Block{Type: "CERTIFICATE", Bytes: der} 90 | bkey := &pem.Block{Type: "PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)} 91 | 92 | v, err := tls.X509KeyPair(pem.EncodeToMemory(bcrt), pem.EncodeToMemory(bkey)) 93 | if err != nil { 94 | panic(err) 95 | } 96 | return v 97 | } 98 | 99 | func listenWithCert(hostname string, address string) { 100 | 101 | listening.Add(1) 102 | go func() { 103 | log.Println("DEBUG - start mock server ..") 104 | // Establish a dummy TLS server 105 | var serverConfig tls.Config 106 | kp := makeCert(hostname) 107 | 108 | serverConfig.Certificates = []tls.Certificate{kp} 109 | 110 | listener, err := tls.Listen("tcp", address, &serverConfig) 111 | if err != nil { 112 | panic(err) 113 | } 114 | // Listen and handshake for a single connection 115 | defer listener.Close() 116 | listening.Done() 117 | 118 | conn, err := listener.Accept() 119 | if err != nil { 120 | panic(err) 121 | } 122 | defer conn.Close() 123 | tlsconn, ok := conn.(*tls.Conn) 124 | if !ok { 125 | panic("conn should of *tls.Conn") 126 | } 127 | if err := tlsconn.Handshake(); err != nil { 128 | return 129 | } 130 | }() 131 | listening.Wait() 132 | } 133 | 134 | func tryConnect(addr string, strict bool) (errchan chan error) { 135 | errchan = make(chan error) 136 | go func() { 137 | 138 | caCertFile, err := ioutil.TempFile("", "logstash-forwarder-cacert") 139 | if err != nil { 140 | panic(err) 141 | } 142 | defer func() { os.Remove(caCertFile.Name()) }() 143 | ioutil.WriteFile(caCertFile.Name(), []byte(caCert), os.ModeTemporary) 144 | 145 | // this can be messy because of localhost resolving to ipv6 addresses 146 | // but there's no easy way to disable v6 resolution here 147 | const wait = 5 148 | const retryLimit = 3 149 | tryAttempt := 0 150 | exinfo := "" 151 | config := &NetworkConfig{ 152 | SSLCA: caCertFile.Name(), 153 | Servers: []string{addr}, 154 | Timeout: wait, 155 | timeout: time.Second * wait, 156 | } 157 | 158 | var socket *tls.Conn 159 | for socket == nil && tryAttempt < retryLimit { 160 | select { 161 | case socket = <-doConnect(config): 162 | case <-time.After(time.Second * wait): 163 | log.Printf("INFO: Connect timeout: attempt: %d\n", tryAttempt) 164 | tryAttempt++ 165 | } 166 | } 167 | if socket == nil { 168 | errchan <- errors.New("Client connect failed. " + exinfo) 169 | return 170 | } 171 | defer socket.Close() 172 | log.Printf("INFO: Connected to %s\n", socket.RemoteAddr()) 173 | 174 | if !socket.ConnectionState().HandshakeComplete { 175 | errchan <- errors.New("handshake should be complete") 176 | return 177 | } 178 | errchan <- nil 179 | }() 180 | return errchan 181 | } 182 | 183 | func doConnect(config *NetworkConfig) <-chan *tls.Conn { 184 | sockchan := make(chan *tls.Conn) 185 | go func() { 186 | sockchan <- connect(config) 187 | }() 188 | return sockchan 189 | } 190 | 191 | // ---------------------------------------------------------------------- 192 | // Strict 193 | // ---------------------------------------------------------------------- 194 | 195 | // CA certificate is CN=ca.logstash.test in test/ca.crt, test/ca.key 196 | // Server certificate is CN=localhost, signed by above CA, in test/server.crt, test/server.key 197 | 198 | func TestStrictConnectValidCertificate(t *testing.T) { 199 | log.Println("\n-- TestStrictConnectValidCertificate -- ") 200 | 201 | listenWithCert("localhost", "0.0.0.0:19876") 202 | if err := <-tryConnect("localhost:19876", strict); err != nil { 203 | t.Fatal("Should have succeeded", err) 204 | } 205 | } 206 | func TestStrictConnectMismatchedCN(t *testing.T) { 207 | log.Println("\n-- TestStrictConnectMismatchedCN -- ") 208 | 209 | listenWithCert("localalt", "0.0.0.0:19876") 210 | if err := <-tryConnect("localhost:19876", strict); err == nil { 211 | t.Fatal("Should have failed but didn't!") 212 | } 213 | } 214 | 215 | func TestStrictConnectToIpWithoutSAN(t *testing.T) { 216 | log.Println("\n-- TestStrictConnectToIpWithoutSAN -- ") 217 | 218 | listenWithCert("localhost", "0.0.0.0:19876") 219 | if err := <-tryConnect("127.0.0.1:19876", strict); err == nil { 220 | t.Fatal("Should have failed but didn't!") 221 | } 222 | } 223 | 224 | func TestStrictConnectToIpWithSAN(t *testing.T) { 225 | log.Println("\n-- TestStrictConnectToIpWithSAN -- ") 226 | 227 | listenWithCert("127.0.0.1", "0.0.0.0:19876") 228 | if err := <-tryConnect("127.0.0.1:19876", strict); err != nil { 229 | t.Fatal("Should not have failed", err) 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /registrar.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | "encoding/json" 6 | ) 7 | 8 | func Registrar(state map[string]*FileState, input chan []*FileEvent) { 9 | for events := range input { 10 | emit ("Registrar: processing %d events\n", len(events)) 11 | // Take the last event found for each file source 12 | for _, event := range events { 13 | // skip stdin 14 | if *event.Source == "-" { 15 | continue 16 | } 17 | 18 | ino, dev := file_ids(event.fileinfo) 19 | state[*event.Source] = &FileState{ 20 | Source: event.Source, 21 | // take the offset + length of the line + newline char and 22 | // save it as the new starting offset. 23 | // This issues a problem, if the EOL is a CRLF! Then on start it read the LF again and generates a event with an empty line 24 | Offset: event.Offset + int64(len(*event.Text)) + 1, // REVU: this is begging for BUGs 25 | Inode: ino, 26 | Device: dev, 27 | } 28 | //log.Printf("State %s: %d\n", *event.Source, event.Offset) 29 | } 30 | 31 | if e := writeRegistry(state, ".logstash-forwarder"); e != nil { 32 | // REVU: but we should panic, or something, right? 33 | emit("WARNING: (continuing) update of registry returned error: %s", e) 34 | } 35 | } 36 | } 37 | 38 | func writeRegistry(state map[string]*FileState, path string) error { 39 | tempfile := path + ".new" 40 | file, e := os.Create(tempfile) 41 | if e != nil { 42 | emit("Failed to create tempfile (%s) for writing: %s\n", tempfile, e) 43 | return e 44 | } 45 | defer file.Close() 46 | 47 | encoder := json.NewEncoder(file) 48 | encoder.Encode(state) 49 | 50 | return onRegistryWrite(path, tempfile) 51 | } 52 | -------------------------------------------------------------------------------- /registrar_other.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package main 4 | 5 | import ( 6 | "os" 7 | ) 8 | 9 | func onRegistryWrite(path, tempfile string) error { 10 | if e := os.Rename(tempfile, path); e != nil { 11 | emit("registry rotate: rename of %s to %s - %s\n", tempfile, path, e) 12 | return e 13 | } 14 | return nil 15 | } 16 | -------------------------------------------------------------------------------- /registrar_windows.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "os" 5 | ) 6 | 7 | func onRegistryWrite(path, tempfile string) error { 8 | old := path + ".old" 9 | var e error 10 | 11 | if e = os.Rename(path, old); e != nil { 12 | emit("registry rotate: rename of %s to %s - %s\n", path, old, e) 13 | return e 14 | } 15 | 16 | if e = os.Rename(tempfile, path); e != nil { 17 | emit("registry rotate: rename of %s to %s - %s\n", tempfile, path, e) 18 | return e 19 | } 20 | return nil 21 | } 22 | -------------------------------------------------------------------------------- /spec/acceptance/packaging_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | describe "packaging" do 3 | let(:redirect) { ENV["DEBUG"] ? "" : "> /dev/null 2>&1" } 4 | let(:version) { `./logstash-forwarder -version`.chomp } 5 | before do 6 | if !File.exist?("logstash-forwarder") 7 | system("make logstash-forwarder") 8 | end 9 | end 10 | 11 | describe "make rpm" do 12 | let(:architecture) { RbConfig::CONFIG["host_cpu"] } 13 | it "should build an rpm" do 14 | system("make rpm #{redirect}") 15 | expect($?).to be_success 16 | expect(File).to be_exist("logstash-forwarder-#{version}-1.#{architecture}.rpm") 17 | end 18 | end 19 | 20 | describe "make deb" do 21 | let(:architecture) do 22 | a = RbConfig::CONFIG["host_cpu"] 23 | case a 24 | when "x86_64"; "amd64" # why? Because computers. 25 | else a 26 | end 27 | end 28 | it "should build a deb" do 29 | system("make deb #{redirect}") 30 | expect($?).to be_success 31 | expect(File).to be_exist("logstash-forwarder_#{version}_#{architecture}.deb") 32 | end 33 | end 34 | end 35 | 36 | -------------------------------------------------------------------------------- /spec/lumberjack_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # encoding: utf-8 3 | # 4 | require "json" 5 | require "lumberjack/server" 6 | require "stud/try" 7 | require "stud/temporary" 8 | 9 | shared_examples_for "logstash-forwarder" do 10 | # TODO(sissel): Refactor this to use factory pattern instead of so many 'let' statements. 11 | let(:workdir) { Stud::Temporary.directory } 12 | let(:ssl_certificate) { File.join(workdir, "certificate.pem") } 13 | let(:ssl_key) { File.join(workdir, "certificate.key") } 14 | let(:config_file) { File.join(workdir, "config.json") } 15 | let(:input_file) { File.join(workdir, "input.log") } 16 | 17 | let(:random_field) { (rand(30)+1).times.map { (rand(26) + 97).chr }.join } 18 | let(:random_value) { (rand(30)+1).times.map { (rand(26) + 97).chr }.join } 19 | let(:port) { rand(50000) + 1024 } 20 | 21 | let(:server) do 22 | Lumberjack::Server.new(:ssl_certificate => ssl_certificate, :ssl_key => ssl_key, :port => port) 23 | end 24 | 25 | let(:logstash_forwarder_config) do 26 | <<-CONFIG 27 | { 28 | "network": { 29 | "servers": [ "localhost:#{port}" ], 30 | "ssl ca": "#{ssl_certificate}" 31 | }, 32 | "files": [ 33 | { 34 | "paths": [ "#{input_file}" ], 35 | "fields": { #{random_field.to_json}: #{random_value.to_json} } 36 | } 37 | ] 38 | } 39 | CONFIG 40 | end 41 | 42 | after do 43 | [ssl_certificate, ssl_key, config_file].each do |path| 44 | File.unlink(path) if File.exists?(path) 45 | end 46 | Process::kill("KILL", lsf.pid) 47 | #Calling this method raises a SystemCallError if there are no child processes. 48 | Process::wait(lsf.pid) rescue '' 49 | end 50 | 51 | before do 52 | system("openssl req -x509 -batch -nodes -newkey rsa:2048 -keyout #{ssl_key} -out #{ssl_certificate} -subj /CN=localhost #{redirect}") 53 | expect($?).to(be_success) 54 | File.write(config_file, logstash_forwarder_config) 55 | lsf 56 | 57 | # Make sure lsf hasn't crashed 58 | 5.times do 59 | # Sending signal 0 will throw exception if the process is dead. 60 | Process.kill(0, lsf.pid) 61 | sleep(rand * 0.1) 62 | end 63 | end # before each 64 | 65 | 66 | it "should follow a file and emit lines as events" do 67 | # TODO(sissel): Refactor this once we figure out a good way to do 68 | # multi-component integration tests and property tests. 69 | fd = File.new(input_file, "wb") 70 | lines = [ "Hello world", "Fancy Pants", "Some Unicode Emoji: 👍 💗 " ] 71 | lines.each { |l| fd.write(l + "\n") } 72 | fd.flush 73 | fd.close 74 | 75 | # TODO(sissel): Make sure this doesn't take forever, do a timeout. 76 | count = 0 77 | events = [] 78 | connection = server.accept 79 | connection.run do |event| 80 | events << event 81 | connection.close if events.length == lines.length 82 | end 83 | 84 | expect(events.count).to(eq(lines.length)) 85 | lines.zip(events).each do |line, event| 86 | # TODO(sissel): Resolve the need for this hack. 87 | event["line"].force_encoding("UTF-8") 88 | expect(event["line"]).to(eq(line)) 89 | expect(event[random_field]).to(eq(random_value)) 90 | end 91 | end 92 | end 93 | 94 | describe "operating" do 95 | let(:redirect) { ENV["DEBUG"] ? "" : "> /dev/null 2>&1" } 96 | context "when compiled from source" do 97 | let(:lsf) do 98 | # Start the process, return the pid 99 | IO.popen(["./logstash-forwarder", "-config", config_file, "-quiet"]) 100 | end 101 | let(:host) { "localhost" } 102 | it_behaves_like "logstash-forwarder" 103 | end 104 | 105 | if false 106 | context "when installed from a deb", :deb => true do 107 | let (:deb) { Dir.glob(File.join(File.dirname(__FILE__), "..", "*.deb")).first } 108 | let(:ontainer_name) { "lsf-spec-#{$$}" } 109 | let(:lsf) do 110 | args = ["docker", "run", "--name", container_name, "-v", "#{workdir}:#{workdir}", "-i", "ubuntu:14.04", "/bin/bash"] 111 | IO.popen(args, "wb") 112 | end 113 | 114 | # Have to try repeatedly here because the network configuration of a docker container isn't available immediately. 115 | let(:host) do 116 | lsf 117 | ip = nil 118 | 10.times do 119 | ip = JSON.parse(`docker inspect #{container_name}`)[0]["NetworkSettings"]["Gateway"] rescue nil 120 | break unless ip.nil? || ip.empty? 121 | sleep 0.01 122 | end 123 | raise "Something is wrong with docker" if ip.nil? 124 | p :ip => ip 125 | ip 126 | end 127 | 128 | it_behaves_like "logstash-forwarder" do 129 | before do 130 | if !File.exist?("logstash-forwarder") 131 | system("make logstash-forwarder #{redirect}") 132 | expect($?).to(be_success) 133 | end 134 | system("make deb #{redirect}") 135 | expect($?).to(be_success) 136 | expect(File).to(be_exist(deb)) 137 | 138 | FileUtils.cp(deb, workdir) 139 | lsf.write("dpkg -i #{workdir}/#{File.basename(deb)}\n") 140 | system("docker inspect #{container_name}") 141 | 142 | # Put a custom config for testing 143 | lsf.write("sed -e 's/localhost:/#{ip}:/' #{config_file} > /etc/logstash-forwarder.conf\n") 144 | 145 | # Start lsf 146 | lsf.write("/etc/init.d/logstash-forwarder start\n") 147 | 148 | # Watch the logs 149 | lsf.write("tail -F /var/log/logstash-forwarder.{err,log}\n") 150 | end 151 | 152 | after do 153 | system("docker", "kill", container_name) 154 | end 155 | end 156 | end 157 | end # if false 158 | end 159 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | require 'rspec' 3 | require 'rspec/mocks' 4 | $: << File.realpath(File.join(File.dirname(__FILE__), "..", "lib")) 5 | -------------------------------------------------------------------------------- /spooler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func Spool(input chan *FileEvent, 8 | output chan []*FileEvent, 9 | max_size uint64, 10 | idle_timeout time.Duration) { 11 | // heartbeat periodically. If the last flush was longer than 12 | // 'idle_timeout' time ago, then we'll force a flush to prevent us from 13 | // holding on to spooled events for too long. 14 | 15 | ticker := time.NewTicker(idle_timeout / 2) 16 | 17 | // slice for spooling into 18 | // TODO(sissel): use container.Ring? 19 | spool := make([]*FileEvent, max_size) 20 | 21 | // Current write position in the spool 22 | var spool_i int = 0 23 | 24 | next_flush_time := time.Now().Add(idle_timeout) 25 | for { 26 | select { 27 | case event := <-input: 28 | //append(spool, event) 29 | spool[spool_i] = event 30 | spool_i++ 31 | 32 | // Flush if full 33 | if spool_i == cap(spool) { 34 | //spoolcopy := make([]*FileEvent, max_size) 35 | var spoolcopy []*FileEvent 36 | //fmt.Println(spool[0]) 37 | spoolcopy = append(spoolcopy, spool[:]...) 38 | output <- spoolcopy 39 | next_flush_time = time.Now().Add(idle_timeout) 40 | 41 | spool_i = 0 42 | } 43 | case <-ticker.C: 44 | //fmt.Println("tick") 45 | if now := time.Now(); now.After(next_flush_time) { 46 | // if current time is after the next_flush_time, flush! 47 | //fmt.Printf("timeout: %d exceeded by %d\n", idle_timeout, 48 | //now.Sub(next_flush_time)) 49 | 50 | // Flush what we have, if anything 51 | if spool_i > 0 { 52 | var spoolcopy []*FileEvent 53 | spoolcopy = append(spoolcopy, spool[0:spool_i]...) 54 | output <- spoolcopy 55 | next_flush_time = now.Add(idle_timeout) 56 | spool_i = 0 57 | } 58 | } /* if 'now' is after 'next_flush_time' */ 59 | /* case ... */ 60 | } /* select */ 61 | } /* for */ 62 | } /* spool */ 63 | -------------------------------------------------------------------------------- /syslog.go: -------------------------------------------------------------------------------- 1 | // +build !windows 2 | 3 | package main 4 | 5 | import ( 6 | "log" 7 | "log/syslog" 8 | ) 9 | 10 | func configureSyslog() { 11 | writer, err := syslog.New(syslog.LOG_INFO|syslog.LOG_DAEMON, "logstash-forwarder") 12 | if err != nil { 13 | log.Fatalf("Failed to open syslog: %s\n", err) 14 | return 15 | } 16 | log.SetOutput(writer) 17 | } 18 | -------------------------------------------------------------------------------- /syslog_windows.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "log" 4 | 5 | func configureSyslog() { 6 | log.Printf("Logging to syslog not supported on this platform\n") 7 | } 8 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | 4 | var Version string = "0.4.0" 5 | --------------------------------------------------------------------------------