├── Rakefile ├── spec └── inputs │ └── test_input.rb ├── .gitignore ├── Gemfile ├── LICENSE ├── logstash-input-rethinkdb.gemspec ├── README.md └── lib └── logstash └── inputs └── rethinkdb.rb /Rakefile: -------------------------------------------------------------------------------- 1 | require "logstash/devutils/rake" 2 | -------------------------------------------------------------------------------- /spec/inputs/test_input.rb: -------------------------------------------------------------------------------- 1 | require "logstash/devutils/rspec/spec_helper" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | Gemfile.lock 3 | Gemfile.bak 4 | .bundle 5 | vendor 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gem "eventmachine" 3 | gem "rethinkdb" 4 | gemspec 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012-2015 Elasticsearch 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /logstash-input-rethinkdb.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'logstash-input-rethinkdb' 3 | s.version = '0.2.3' 4 | s.licenses = ['Apache License (2.0)'] 5 | s.summary = "Streams changes from RethinkDB tables into LogStash." 6 | s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program" 7 | s.authors = ["RethinkDB"] 8 | s.email = 'josh@rethinkdb.com' 9 | s.homepage = "http://www.rethinkdb.com/docs/logstash-plugins" 10 | s.require_paths = ["lib"] 11 | 12 | # Files 13 | s.files = `git ls-files`.split($\) 14 | # Tests 15 | s.test_files = s.files.grep(%r{^(test|spec|features)/}) 16 | 17 | # Special flag to let us know this is actually a logstash plugin 18 | s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" } 19 | 20 | # Gem dependencies 21 | s.add_runtime_dependency "logstash-core", '< 3.0.0', '>= 2.0.0.beta2' 22 | s.add_runtime_dependency 'logstash-codec-plain' 23 | s.add_runtime_dependency 'rethinkdb', '>=2.2.0' 24 | s.add_runtime_dependency 'eventmachine', '~>1.0', '>=1.0.7' 25 | s.add_development_dependency 'logstash-devutils' 26 | end 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RethinkDB Logstash input plugin 2 | 3 | This is a plugin for [Logstash](https://github.com/elasticsearch/logstash). 4 | 5 | **Currently this is BETA software. It contains some known limitations (see below)** 6 | 7 | ## Using the plugin 8 | 9 | You'll need to use this with the Logstash 1.5 or higher, which you can [download here](https://www.elastic.co/downloads/logstash). 10 | 11 | - Install the plugin from the Logstash home directory 12 | ```sh 13 | $ bin/plugin install logstash-input-rethinkdb 14 | ``` 15 | 16 | - Now you can test the plugin using the stdout output: 17 | 18 | ```sh 19 | $ bin/logstash -e ' 20 | input {rethinkdb 21 | {host => "localhost" 22 | port => 28015 23 | auth_key => "" 24 | watch_dbs => ["db1", "db2"] 25 | watch_tables => ["test.foo", "db2.baz"] 26 | backfill => true 27 | }} 28 | output {stdout {codec => json_lines}}' 29 | ``` 30 | 31 | This will immediately watch the tables `test.foo` and `db2.baz`, and it will also watch the databases `db1` and `db2` for new or dropped tables and watch or unwatch those tables appropriately. Since `backfill` is `true`, it will automatically send events for the documents that already exist in those tables during initialization. 32 | 33 | ### Format of the events: 34 | 35 | The events are encoded with the "json_lines" codec, which puts compressed json documents one event per line 36 | 37 | Fields: 38 | 39 | - **db**: the database that emitted the event 40 | - **table**: the table that emitted the event 41 | - **old_val**: the old value of the document (see [changefeeds](http://rethinkdb.com/docs/changefeeds/ruby/)) 42 | - **new_val**: the new value of the document 43 | - **@timestamp**: timestamp added by logstash (so may not correspond to the rethinkdb server time the change was emitted) 44 | - **@version**: version number added by logstash (always 1) 45 | 46 | ## Known limitations 47 | 48 | There are two limitations that should be known by anyone using this in production systems: 49 | 50 | 1. Until RethinkDB supports [resuming changefeeds](https://github.com/rethinkdb/rethinkdb/issues/3471), this plugin cannot guarantee that no changes are missed if a connection to the database is dropped. Again, once that functionality is implemented, this plugin will be modified to provide reliable "at least once" semantics for changes (meaning once it reconnects, it can catch back up and send any changes that it missed). 51 | 2. Documents that are deleted in RethinkDB while the LogStash plugin is disconnected will not be synchronized. This is true even if `backfill` is enabled. This limitation is a consequence of LogStash operating on a document-by-document basis. 52 | 53 | ## License 54 | 55 | Apache 2.0 56 | -------------------------------------------------------------------------------- /lib/logstash/inputs/rethinkdb.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require "logstash/inputs/base" 4 | require "logstash/namespace" 5 | require "eventmachine" 6 | require "rethinkdb" 7 | 8 | class LogStash::Inputs::RethinkDB < LogStash::Inputs::Base 9 | config_name "rethinkdb" 10 | default :codec, "json_lines" 11 | attr_accessor :logger 12 | 13 | include RethinkDB::Shortcuts 14 | 15 | # Hostname of RethinkDB server 16 | config :host, :validate => :string, :default => "localhost" 17 | # Driver connection port of RethinkDB server 18 | config :port, :validate => :number, :default => 28015 19 | # Auth key of RethinkDB server (don't provide if nil) 20 | config :auth_key, :validate => :string, :default => "" 21 | # Time period to squash changefeeds on. Defaults to no squashing. 22 | config :squash, :default => true 23 | # Which tables to watch for changes 24 | config :watch_tables, :validate => :array, :default => [] 25 | # Which databases to watch for changes. Tables added or removed from 26 | # these databases will be watched or unwatched accordingly 27 | config :watch_dbs, :validate => :array, :default => [] 28 | # Whether to backfill documents from the dbs and tables when 29 | # (re)connecting to RethinkDB. This ensures all documents in the 30 | # RethinkDB tables will be sent over logstash, but it may cause a 31 | # lot of traffic with very large tables and/or unstable connections. 32 | config :backfill, :default => true 33 | #ssl support 34 | config :ca_certs, :default => nil 35 | # Credentials as of RethinkDB v2.3.x 36 | config :user, :validate => :string, :default => "admin" 37 | config :password, :validate => :string, :default => "" 38 | 39 | # Part of the logstash input interface 40 | def register 41 | # {db => {table => QueryHandle}} 42 | @table_feeds = Hash.new { |hsh, key| hsh[key] = {} } 43 | # {db => QueryHandle} 44 | @db_feeds = {} 45 | @queue = nil 46 | @backfill = @backfill && @backfill != 'false' 47 | @squash = @squash && @squash != 'false' 48 | end 49 | 50 | # # Part of the logstash input interface 51 | def run(queue) 52 | @queue = queue 53 | if @ca_certs 54 | ssl = { :ca_certs => @ca_certs } 55 | else 56 | ssl = nil 57 | end 58 | 59 | unless @auth_key == "" 60 | @conn = r.connect( 61 | :host => @host, 62 | :port => @port, 63 | :auth_key => @auth_key, 64 | :ssl => ssl 65 | ) 66 | else 67 | @conn = r.connect( 68 | :host => @host, 69 | :port => @port, 70 | :user => @user, 71 | :password => @password, 72 | :ssl => ssl 73 | ) 74 | end 75 | 76 | EM.run do 77 | @logger.log "Eventmachine loop started" 78 | @watch_dbs.uniq.each do |db| 79 | create_db_feed(db, DBHandler.new(db, self)) 80 | end 81 | @watch_tables.uniq.each do |db_table| 82 | db, table = db_table.split '.' 83 | db, table = "test", db if table.nil? 84 | update_db_tables(nil, {'db' => db, 'name' => table}) 85 | end 86 | end 87 | end 88 | 89 | def send(db, table, old_val, new_val) 90 | event = LogStash::Event.new( 91 | "db" => db, 92 | "table" => table, 93 | "old_val" => old_val, 94 | "new_val" => new_val 95 | ) 96 | decorate(event) 97 | @queue << event 98 | end 99 | 100 | def update_db_tables(old_val, new_val) 101 | unless new_val.nil? 102 | handler = TableHandler.new(new_val['db'], new_val['name'], self) 103 | create_table_feed(new_val['db'], new_val['name'], handler) 104 | end 105 | unless old_val.nil? 106 | unregister_table(old_val['db'], old_val['name'], nil) 107 | end 108 | end 109 | 110 | def register_table(db, table, qhandle) 111 | # Add a table feed to the registry 112 | unless @table_feeds.has_key?(db) && 113 | @table_feeds[db].has_key?(table) 114 | @logger.log("Watching table #{db}.#{table}") 115 | @table_feeds[db][table] = qhandle 116 | else 117 | qhandle.close 118 | end 119 | end 120 | 121 | def unregister_table(db, table, qhandle) 122 | # Remove a table from the registry. 123 | if @table_feeds.has_key?(db) && 124 | @table_feeds[db].has_key?(table) && 125 | # If a duplicate feed comes in for the same table and needs to 126 | # be unregistered, we need to check if the handle is the same 127 | (qhandle.nil? || @table_feeds[db][table].equal?(qhandle)) 128 | @logger.log("Unregistering table #{db}.#{table}") 129 | @table_feeds[db].delete(table).close 130 | end 131 | end 132 | 133 | def register_db(db, qhandle) 134 | # Add a db to the registry to watch it for updates to which tables 135 | # are listed in it. 136 | unless @db_feeds.has_key? db 137 | @db_feeds[db] = qhandle 138 | @logger.log "Feed for db '#{db}' registered" 139 | end 140 | end 141 | 142 | def unregister_db(db) 143 | # Remove a db from the registry, close all of its feeds, and 144 | # remove its entry in @db_admin_tables 145 | if @table_feeds.has_key?(db) 146 | @logger.log("Unregistering feed for db '#{db}'") 147 | @table_feeds[db].keys.each do |table| 148 | unregister_table(db, table, nil) 149 | end 150 | @table_feeds.delete(db) 151 | @db_feeds.delete(db).close 152 | end 153 | end 154 | 155 | def create_db_feed(db, handler) 156 | r.db('rethinkdb'). 157 | table('table_status'). 158 | changes(:include_initial => @backfill, 159 | :squash => @squash, 160 | :include_states => true). 161 | # The filter and pluck are after .changes due to bug #5241. When 162 | # that's solved they can be moved before .changes and can be 163 | # simplified since they won't have to operate over both new_val 164 | # and old_val 165 | filter{|row| r([row['old_val']['db'], row['new_val']['db']]).contains(db). 166 | and(row['status']['all_replicas_ready'])}. 167 | pluck({:new_val => ['db', 'name'], :old_val => ['db', 'name']}). 168 | em_run(@conn, handler) 169 | end 170 | 171 | def create_table_feed(db, table, handler) 172 | options = { 173 | :time_format => 'raw', 174 | :binary_format => 'raw', 175 | } 176 | r.db(db). 177 | table(table). 178 | changes(:include_initial => @backfill, 179 | :squash => @squash, 180 | :include_states => true). 181 | em_run(@conn, options, handler) 182 | end 183 | 184 | def teardown 185 | # Goes through all existing handles and closes them, then clears 186 | # out the registry and closes the connection to RethinkDB 187 | @table_feeds.values.each do |tables| 188 | tables.values.each { |qhandle| qhandle.close } 189 | end 190 | @db_feeds.values.each { |qhandle| qhandle.close } 191 | @table_feeds.clear 192 | @db_feeds.clear 193 | @conn.close 194 | EM.stop 195 | @queue = nil 196 | end 197 | 198 | end 199 | 200 | # This handles feeds listening for changes to documents in a table 201 | class TableHandler < RethinkDB::Handler 202 | attr_accessor :db 203 | attr_accessor :table 204 | def initialize(db, table, plugin) 205 | super() 206 | @db = db 207 | @table = table 208 | @plugin = plugin 209 | end 210 | 211 | def on_initial_val(val) 212 | @plugin.send(@db, @table, nil, val) 213 | end 214 | 215 | def on_change(old_val, new_val) 216 | @plugin.send(@db, @table, old_val, new_val) 217 | end 218 | 219 | def on_open(qhandle) 220 | @plugin.register_table(@db, @table, qhandle) 221 | end 222 | 223 | def on_close(qhandle) 224 | @plugin.unregister_table(@db, @table, qhandle) 225 | end 226 | 227 | def on_error(err, qhandle) 228 | @plugin.logger.error(err.to_s) 229 | @plugin.unregister_table(@db, @table, qhandle) 230 | end 231 | 232 | def on_change_error(err_str) 233 | @plugin.logger.warn(err.to_s) 234 | end 235 | end 236 | 237 | # Handler for changes to the tables in a database 238 | class DBHandler < RethinkDB::Handler 239 | 240 | attr_accessor :db 241 | 242 | def initialize(db, plugin) 243 | super() 244 | @db = db 245 | @plugin = plugin 246 | end 247 | 248 | def on_open(qhandle) 249 | @plugin.register_db(@db, qhandle) 250 | end 251 | 252 | def on_close(qhandle) 253 | @plugin.unregister_db(@db) 254 | end 255 | 256 | def on_error(err, qhandle) 257 | @plugin.logger.error(err.to_s) 258 | @plugin.unregister_db(@db) 259 | end 260 | 261 | def on_change_error(err_str) 262 | @plugin.logger.warn(err_str) 263 | end 264 | 265 | def on_initial_val(val) 266 | @plugin.update_db_tables(nil, val) 267 | end 268 | 269 | def on_change(old_val, new_val) 270 | @plugin.update_db_tables(old_val, new_val) 271 | end 272 | end 273 | --------------------------------------------------------------------------------