├── .ruby-version ├── Gemfile ├── Gemfile.lock ├── Procfile ├── README.md ├── app.json ├── app.rb ├── config.ru ├── config ├── newrelic.yml └── puma.rb ├── queue_io.rb └── writer ├── base.rb ├── local.rb └── s3.rb /.ruby-version: -------------------------------------------------------------------------------- 1 | 2.7.3 2 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | ruby '2.7.3' 4 | 5 | gem 'puma', '>= 3.12.3' 6 | gem 'rack', '>= 2.0.8' 7 | gem 'aws-sdk', '~> 1.4' 8 | gem 'heroku-log-parser', git: 'https://github.com/rwdaigle/heroku-log-parser.git' 9 | gem 'nokogiri', '>= 1.10.8' 10 | 11 | gem 'newrelic_rpm' 12 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GIT 2 | remote: https://github.com/rwdaigle/heroku-log-parser.git 3 | revision: 33e46fdee18ffc5cd21a5190008152b02d5f7103 4 | specs: 5 | heroku-log-parser (0.4.0) 6 | 7 | GEM 8 | remote: https://rubygems.org/ 9 | specs: 10 | aws-sdk (1.67.0) 11 | aws-sdk-v1 (= 1.67.0) 12 | aws-sdk-v1 (1.67.0) 13 | json (~> 1.4) 14 | nokogiri (~> 1) 15 | json (1.8.6) 16 | mini_portile2 (2.4.0) 17 | newrelic_rpm (3.15.2.317) 18 | nio4r (2.5.2) 19 | nokogiri (1.10.8) 20 | mini_portile2 (~> 2.4.0) 21 | puma (4.3.5) 22 | nio4r (~> 2.0) 23 | rack (2.2.3) 24 | 25 | PLATFORMS 26 | ruby 27 | 28 | DEPENDENCIES 29 | aws-sdk (~> 1.4) 30 | heroku-log-parser! 31 | newrelic_rpm 32 | nokogiri (>= 1.10.8) 33 | puma (>= 3.12.3) 34 | rack (>= 2.0.8) 35 | 36 | RUBY VERSION 37 | ruby 2.7.3p183 38 | 39 | BUNDLED WITH 40 | 2.1.4 41 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: bundle exec puma -C config/puma.rb 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Heroku Log S3 2 | 3 | [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy) 4 | 5 | ## Configure 6 | 7 | Setup the following `ENV` (aka `heroku config:set`) 8 | 9 | - `FILTER_PREFIX` this is the prefix string to look out for. every other log lines are ignored 10 | - `S3_KEY`, `S3_SECRET`, `S3_BUCKET` necessary ACL to your s3 bucket 11 | - `AWS_REGION` the AWS region your S3 bucket is in 12 | - `DURATION` (default `60`) seconds to buffer until we close the `IO` to `AWS::S3::S3Object#write` 13 | - `STRFTIME` (default `%Y%m/%d/%H/%M%S.:thread_id.log`) format of your s3 `object_id` 14 | - `:thread_id` will be replaced by a unique number to prevent overwriting of the same file between reboots, in case the timestamp overlaps 15 | - `HTTP_USER`, `HTTP_PASSWORD` (default no password protection) credentials for HTTP Basic Authentication 16 | - `WRITER_LIB` (default `./writer/s3.rb`) defines the ruby script to load `Writer` class 17 | 18 | ## Using 19 | 20 | In your heroku app, add this drain (changing `HTTP_USER`, `HTTP_PASSWORD` and `DRAIN_APP_NAME` to appropriate values) 21 | 22 | ``` 23 | heroku drains:add https://HTTP_USER:HTTP_PASSWORD@DRAIN_APP_NAME.herokuapp.com/ 24 | ``` 25 | 26 | or if you have no password protection 27 | 28 | ``` 29 | heroku drains:add https://DRAIN_APP_NAME.herokuapp.com/ 30 | ``` 31 | 32 | # Credits 33 | 34 | - https://github.com/rwdaigle/heroku-log-parser 35 | - https://github.com/rwdaigle/heroku-log-store 36 | 37 | # Alternatives 38 | 39 | - https://logbox.io a logs drain that forwards Heroku messages for a long‑term archival to AWS S3, Glacier or CloudWatch. 40 | - _{insert suggestions here}_ 41 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Heroku Log S3", 3 | "description": "log drain from heroku to s3", 4 | "repository": "https://github.com/choonkeat/heroku-log-s3", 5 | "keywords": ["ruby", "rack", "log", "drain", "s3", "upload"], 6 | "env": { 7 | "AWS_REGION": "", 8 | "S3_KEY": "", 9 | "S3_SECRET": "", 10 | "S3_BUCKET": "", 11 | "DURATION": { 12 | "description": "Seconds to buffer until we write to S3", 13 | "value": "60" 14 | }, 15 | "STRFTIME": { 16 | "description": "Format of your s3 `object_id` (file path); `:thread_id` will be replaced by a unique number to prevent overwriting of the same file between reboots, in case the timestamp overlaps", 17 | "value": "%Y%m/%d/%H/%M%S.:thread_id.log" 18 | }, 19 | "HTTP_USER": { 20 | "value": "logger", 21 | "required": false 22 | }, 23 | "HTTP_PASSWORD": { 24 | "description": "Password to prevent public from using your log drain", 25 | "generator": "secret", 26 | "required": false 27 | }, 28 | "FILTER_PREFIX": { 29 | "description": "Lines with this prefix will be saved to S3; non-matching lines are ignored", 30 | "value": "", 31 | "required": false 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /app.rb: -------------------------------------------------------------------------------- 1 | require 'logger' 2 | require 'heroku-log-parser' 3 | require_relative './queue_io.rb' 4 | require_relative ENV.fetch("WRITER_LIB", "./writer/s3.rb") # provider of `Writer < WriterBase` singleton 5 | 6 | class App 7 | 8 | PREFIX = ENV.fetch("FILTER_PREFIX", "") 9 | PREFIX_LENGTH = PREFIX.length 10 | LOG_REQUEST_URI = ENV['LOG_REQUEST_URI'] 11 | 12 | def initialize 13 | @logger = Logger.new(STDOUT) 14 | @logger.formatter = proc do |severity, datetime, progname, msg| 15 | "[app #{$$} #{Thread.current.object_id}] #{msg}\n" 16 | end 17 | @logger.info "initialized" 18 | end 19 | 20 | def call(env) 21 | lines = if LOG_REQUEST_URI 22 | [{ msg: env['REQUEST_URI'], ts: '' }] 23 | else 24 | HerokuLogParser.parse(env['rack.input'].read).collect { |m| { msg: m[:message], ts: m[:emitted_at].strftime('%Y-%m-%dT%H:%M:%S.%L%z') } } 25 | end 26 | 27 | lines.each do |line| 28 | msg = line[:msg] 29 | next unless msg.start_with?(PREFIX) 30 | Writer.instance.write([line[:ts], msg[PREFIX_LENGTH..-1]].join(' ').strip) # WRITER_LIB 31 | end 32 | 33 | rescue Exception 34 | @logger.error $! 35 | @logger.error $@ 36 | 37 | ensure 38 | return [200, { 'Content-Length' => '0' }, []] 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /config.ru: -------------------------------------------------------------------------------- 1 | if ENV['NEW_RELIC_LICENSE_KEY'] 2 | require 'newrelic_rpm' 3 | NewRelic::Agent.manual_start 4 | end 5 | 6 | require_relative './app.rb' 7 | 8 | $stdout.sync = true 9 | 10 | if ENV['HTTP_USER'].to_s == '' && ENV['HTTP_PASSWORD'].to_s == '' 11 | # skip 12 | else 13 | use Rack::Auth::Basic, "Restricted Area" do |username, password| 14 | [username, password] == [ENV['HTTP_USER'], ENV['HTTP_PASSWORD']] 15 | end 16 | end 17 | 18 | run App.new 19 | -------------------------------------------------------------------------------- /config/newrelic.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This file configures the New Relic Agent. New Relic monitors Ruby, Java, 3 | # .NET, PHP, Python and Node applications with deep visibility and low 4 | # overhead. For more information, visit www.newrelic.com. 5 | # 6 | # Generated August 11, 2016 7 | # 8 | # This configuration file is custom generated for app54970161@heroku.com 9 | # 10 | # For full documentation of agent configuration options, please refer to 11 | # https://docs.newrelic.com/docs/agents/ruby-agent/installation-configuration/ruby-agent-configuration 12 | 13 | common: &default_settings 14 | # Required license key associated with your New Relic account. 15 | license_key: '<%= ENV["NEW_RELIC_LICENSE_KEY"] %>' 16 | 17 | # Your application name. Renaming here affects where data displays in New 18 | # Relic. For more details, see https://docs.newrelic.com/docs/apm/new-relic-apm/maintenance/renaming-applications 19 | app_name: "<%= ENV['S3_BUCKET'] %> Log Drain" 20 | 21 | # To disable the agent regardless of other settings, uncomment the following: 22 | # agent_enabled: false 23 | 24 | # Logging level for log/newrelic_agent.log 25 | log_level: info 26 | 27 | 28 | # Environment-specific settings are in this section. 29 | # RAILS_ENV or RACK_ENV (as appropriate) is used to determine the environment. 30 | # If your application has other named environments, configure them here. 31 | development: 32 | <<: *default_settings 33 | app_name: "<%= ENV['S3_BUCKET'] %> Log Drain (Development)" 34 | 35 | # NOTE: There is substantial overhead when running in developer mode. 36 | # Do not use for production or load testing. 37 | developer_mode: true 38 | 39 | test: 40 | <<: *default_settings 41 | # It doesn't make sense to report to New Relic from automated test runs. 42 | monitor_mode: false 43 | 44 | staging: 45 | <<: *default_settings 46 | app_name: "<%= ENV['S3_BUCKET'] %> Log Drain (Staging)" 47 | 48 | production: 49 | <<: *default_settings 50 | -------------------------------------------------------------------------------- /config/puma.rb: -------------------------------------------------------------------------------- 1 | workers_count = Integer(ENV.fetch("WEB_CONCURRENCY", 1)) 2 | workers(workers_count) 3 | 4 | threads_count = Integer(ENV.fetch("WEB_MAX_THREADS", 25)) 5 | threads(threads_count, threads_count) 6 | 7 | port ENV.fetch("PORT", 9292) 8 | environment ENV.fetch("RACK_ENV", "development") 9 | -------------------------------------------------------------------------------- /queue_io.rb: -------------------------------------------------------------------------------- 1 | require 'logger' 2 | 3 | class QueueIO 4 | def initialize(duration = Integer(ENV.fetch('DURATION', 60))) 5 | @duration = duration 6 | @pending = Queue.new 7 | @queue = Queue.new 8 | @start = Time.now.to_i 9 | end 10 | 11 | def write(data) 12 | @queue.push(data) 13 | end 14 | 15 | def read(bytes) 16 | return if @closed 17 | @pending.pop(true) # non-blocking, best effort 18 | 19 | rescue Exception 20 | @pending.push @queue.shift 21 | now = Time.now.to_i 22 | if (@start + @duration) < now 23 | @start = now 24 | return # make `eof?` return true 25 | else 26 | return @pending.shift 27 | end 28 | end 29 | 30 | def eof? 31 | !@pending.empty? 32 | end 33 | 34 | def close 35 | # make `closed?` return true 36 | @closed = true 37 | 38 | # short circuit `:read` 39 | @duration = 0 40 | @queue.push "" 41 | @queue.push "" 42 | end 43 | 44 | def closed? 45 | @closed 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /writer/base.rb: -------------------------------------------------------------------------------- 1 | require 'singleton' 2 | require 'logger' 3 | require_relative '../queue_io.rb' 4 | 5 | class WriterBase 6 | include Singleton 7 | 8 | def initialize 9 | @logger = Logger.new(STDOUT) 10 | @logger.formatter = proc do |severity, datetime, progname, msg| 11 | "[upload #{$$} #{Thread.current.object_id}] #{msg}\n" 12 | end 13 | @io = QueueIO.new 14 | @logger.info "initialized" 15 | self.start 16 | end 17 | 18 | def write(line) 19 | @io.write(line + "\n") 20 | end 21 | 22 | def generate_filepath 23 | Time.now.utc.strftime(ENV.fetch('STRFTIME', '%Y%m/%d/%H/%M%S.:thread_id.log').gsub(":thread_id", Thread.current.object_id.to_s)) 24 | end 25 | 26 | def stream_to(filepath) 27 | raise NotImplementedError.new("stream_to(filepath)") 28 | end 29 | 30 | def start 31 | thread = Thread.new do 32 | @logger.info "begin thread" 33 | stream_to(generate_filepath) until @io.closed? 34 | @logger.info "end thread" 35 | end 36 | 37 | at_exit do 38 | @logger.info "shutdown!" 39 | @io.close 40 | thread.join 41 | end 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /writer/local.rb: -------------------------------------------------------------------------------- 1 | require 'fileutils' 2 | require_relative './base.rb' 3 | 4 | class Writer < WriterBase 5 | 6 | def stream_to(filepath) 7 | @logger.info "begin #{filepath}" 8 | file = "output/#{filepath}" 9 | FileUtils.mkdir_p File.dirname(file) 10 | open(file, "w") do |f| 11 | while data = @io.read(4068) 12 | f.write(data) 13 | end 14 | end 15 | @logger.info "end #{filepath}" 16 | end 17 | 18 | end 19 | -------------------------------------------------------------------------------- /writer/s3.rb: -------------------------------------------------------------------------------- 1 | require 'aws' 2 | require 'logger' 3 | require_relative './base.rb' 4 | 5 | class Writer < WriterBase 6 | 7 | S3_BUCKET_OBJECTS = AWS::S3.new({ 8 | access_key_id: ENV.fetch('S3_KEY'), 9 | secret_access_key: ENV.fetch('S3_SECRET'), 10 | }).buckets[ENV.fetch('S3_BUCKET')].objects 11 | 12 | def stream_to(filepath) 13 | @logger.info "begin #{filepath}" 14 | S3_BUCKET_OBJECTS[filepath].write( 15 | @io, 16 | estimated_content_length: 1 # low-ball estimate; so we can close buffer by returning nil 17 | ) 18 | @logger.info "end #{filepath}" 19 | end 20 | 21 | end 22 | --------------------------------------------------------------------------------