├── .gitignore ├── .rspec ├── lib └── active_record │ ├── sql_analyzer │ ├── version.rb │ ├── redactor.rb │ ├── monkeypatches │ │ ├── tagger.rb │ │ └── query.rb │ ├── logger.rb │ ├── compact_logger.rb │ ├── backtrace_filter.rb │ ├── analyzer.rb │ ├── redacted_logger.rb │ ├── background_processor.rb │ ├── cli.rb │ ├── cli_processor.rb │ └── configuration.rb │ └── sql_analyzer.rb ├── CHANGES.md ├── .rubocop.yml ├── spec ├── support │ ├── stub_logger.rb │ ├── stub_rails.rb │ ├── wait_for_pop.rb │ └── db_connection.rb ├── active_record │ └── sql_analyzer │ │ ├── backtrace_filter_spec.rb │ │ ├── background_processor_spec.rb │ │ ├── analyzer_spec.rb │ │ ├── cli_spec.rb │ │ ├── cli_processor_spec.rb │ │ ├── redacted_logger_spec.rb │ │ └── end_to_end_spec.rb └── spec_helper.rb ├── .travis.yml ├── Gemfile ├── Rakefile ├── bin └── ar-log-analyzer ├── CONTRIBUTING.md ├── active_record-sql_analyzer.gemspec ├── README.md └── LICENSE.md /.gitignore: -------------------------------------------------------------------------------- 1 | active_record-sql_analyzer-*.gem 2 | Gemfile.lock 3 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format=documentation 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/version.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | VERSION = '0.3.0' 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/redactor.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | Redactor = Struct.new(:search, :replace) 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.3.0 / 2018-08-31 4 | Breaking Changes 5 | - Change hash method: use `json.hash` instead of `MD5.hexdigest(json)`. 6 | 7 | Internal: 8 | - Use `print` instead of `puts`. 9 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | DisplayCopNames: true 3 | TargetRubyVersion: 2.3 4 | Exclude: 5 | - 'bin/*' 6 | - 'spec/support/**/*' 7 | 8 | Metrics: 9 | Enabled: false 10 | 11 | Style: 12 | Enabled: false 13 | -------------------------------------------------------------------------------- /spec/support/stub_logger.rb: -------------------------------------------------------------------------------- 1 | class Rails 2 | def self.root 3 | @root ||= Pathname.new(File.expand_path('../../', __FILE__)).freeze 4 | end 5 | 6 | def self.logger 7 | @logger ||= Logger.new(STDOUT) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /spec/support/stub_rails.rb: -------------------------------------------------------------------------------- 1 | class Rails 2 | def self.root 3 | @root ||= Pathname.new(File.expand_path('../../', __FILE__)).freeze 4 | end 5 | 6 | def self.logger 7 | @logger ||= Logger.new(STDOUT) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | bundler_args: --without development 2 | language: ruby 3 | rvm: 4 | - 2.2 5 | - 2.3.3 6 | - 2.4.0-preview3 7 | - ruby-head 8 | matrix: 9 | allow_failures: 10 | - rvm: 2.4.0-preview3 11 | - rvm: ruby-head 12 | fast_finish: true 13 | sudo: false 14 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem "pry-byebug" 4 | gem "rake" 5 | 6 | group :test do 7 | gem "mysql2" 8 | gem "rspec", "~> 3.4" 9 | gem "rubocop", "0.58.2" 10 | gem "sql-parser", git: "https://github.com/nerdrew/sql-parser.git" 11 | gem "timecop", "~> 0.8" 12 | end 13 | 14 | gemspec 15 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | 3 | begin 4 | require "rubocop/rake_task" 5 | require "rspec/core/rake_task" 6 | 7 | RuboCop::RakeTask.new 8 | RSpec::Core::RakeTask.new(:spec) 9 | 10 | task default: [:rubocop, :spec] 11 | rescue LoadError 12 | warn "rubocop, rspec only available in development" 13 | end 14 | -------------------------------------------------------------------------------- /bin/ar-log-analyzer: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | require "optparse" 3 | require "json" 4 | require_relative "../lib/active_record/sql_analyzer" 5 | require_relative "../lib/active_record/sql_analyzer/cli" 6 | require_relative "../lib/active_record/sql_analyzer/cli_processor" 7 | 8 | cli = ActiveRecord::SqlAnalyzer::CLI.new 9 | cli.parse_options(ARGV) 10 | cli.run 11 | -------------------------------------------------------------------------------- /spec/support/wait_for_pop.rb: -------------------------------------------------------------------------------- 1 | module WaitForPop 2 | # This is trying to work around race conditions in a semi-sane manner 3 | def wait_for_pop 4 | queue = ActiveRecord::SqlAnalyzer.background_processor.instance_variable_get(:@queue) 5 | 6 | 4.times do 7 | return sleep 0.1 if queue.empty? 8 | sleep 0.05 9 | end 10 | 11 | raise "Queue failed to drain in 0.2 seconds" 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | If you would like to contribute code, thank you! You can do so through 5 | GitHub by forking the repository and sending a pull request. However, 6 | before your code can be accepted into the project we need you to sign Square's (super 7 | simple) [Individual Contributor License Agreement (CLA)][1]. 8 | 9 | [1]: https://spreadsheets.google.com/spreadsheet/viewform?formkey=dDViT2xzUHAwRkI3X3k5Z0lQM091OGc6MQ&ndplr=1 10 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/monkeypatches/tagger.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | module Monkeypatches 4 | module Tagger 5 | def initialize(*) 6 | super 7 | @_ar_analyzer_tag = nil 8 | end 9 | 10 | def with_tag(name) 11 | @_ar_analyzer_tag = name 12 | self 13 | end 14 | 15 | def exec_queries 16 | Thread.current[:_ar_analyzer_tag] ||= @_ar_analyzer_tag 17 | super 18 | ensure 19 | Thread.current[:_ar_analyzer_tag] = nil if @_ar_analyzer_tag 20 | end 21 | end 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/logger.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class Logger 4 | attr_reader :log_file, :log_prefix, :log_root, :config 5 | 6 | def initialize(log_root, log_prefix) 7 | @log_prefix = log_prefix 8 | @log_root = log_root 9 | @config = SqlAnalyzer.config 10 | 11 | @log_file = File.open("#{log_root}/#{log_prefix}.log", "a") 12 | end 13 | 14 | # Log the raw event data directly to disk 15 | def log(event) 16 | log_file.puts(event.to_json) 17 | end 18 | 19 | # Further redact or remove any other information from an event 20 | def filter_event(event) 21 | event 22 | end 23 | 24 | def close 25 | @log_file.close rescue nil 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/compact_logger.rb: -------------------------------------------------------------------------------- 1 | require "digest" 2 | 3 | module ActiveRecord 4 | module SqlAnalyzer 5 | class CompactLogger < Logger 6 | attr_reader :logged_shas, :definition_log_file 7 | 8 | def initialize(*) 9 | super 10 | 11 | @logged_shas = Set.new 12 | @definition_log_file = File.open("#{log_root}/#{log_prefix}_definitions.log", "a+") 13 | end 14 | 15 | def log(event) 16 | json = event.to_json 17 | sha = json.hash 18 | unless logged_shas.include?(sha) 19 | definition_log_file.print("#{sha}|#{json}\n") 20 | logged_shas << sha 21 | end 22 | 23 | log_file.print("#{Time.now.to_i}|#{sha}\n") 24 | end 25 | 26 | def close 27 | @definition_log_file.close rescue nil 28 | super 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/backtrace_filter_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe ActiveRecord::SqlAnalyzer::BacktraceFilter do 2 | before do 3 | ActiveRecord::SqlAnalyzer.configure { |_c| } 4 | end 5 | 6 | it "filters non-app paths" do 7 | lines = ActiveRecord::SqlAnalyzer.config[:backtrace_filter_proc].call( 8 | [ 9 | "foo/bar:1 in 'method'", 10 | "#{Gem.path.first}:4231 in 'method'", 11 | "foo/bar:2 in 'method'", 12 | "#{File.realpath(Gem.path.first)}:9531 in 'method'", 13 | "foo/bar:3 in 'method'", 14 | "(eval):1234 in 'method'", 15 | "foo/bar:4 in 'method'" 16 | ] 17 | ) 18 | 19 | expect(lines).to eq( 20 | [ 21 | "foo/bar:1 in 'method'", 22 | "foo/bar:2 in 'method'", 23 | "foo/bar:3 in 'method'", 24 | "foo/bar:4 in 'method'" 25 | ] 26 | ) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /active_record-sql_analyzer.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require File.expand_path('../lib/active_record/sql_analyzer/version', __FILE__) 4 | Gem::Specification.new do |s| 5 | s.authors = ['Zachary Anker', 'Gabriel Gilder'] 6 | s.email = %w(zanker@squareup.com gabriel@squareup.com) 7 | s.description = 'ActiveRecord query logger and analyzer' 8 | s.summary = 'Logs a subset of ActiveRecord queries and dumps them for analyses.' 9 | s.homepage = 'https://github.com/square/active_record-sql_analyzer' 10 | 11 | s.license = 'Apache License 2.0' 12 | s.files = `git ls-files`.split("\n") 13 | s.executables = s.files.grep(%r{^bin/}).map { |f| File.basename(f) } 14 | s.test_files = s.files.grep(%r{^(test|spec|features)/}) 15 | s.name = 'active_record-sql_analyzer' 16 | s.require_paths = ['lib'] 17 | s.version = ActiveRecord::SqlAnalyzer::VERSION 18 | 19 | s.add_dependency 'activerecord' 20 | 21 | s.add_development_dependency 'bundler', '~> 1.0' 22 | end 23 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require "rspec" 2 | require "active_record" 3 | require "timecop" 4 | require "tmpdir" 5 | require "support/db_connection" 6 | require "support/stub_rails" 7 | require "support/wait_for_pop" 8 | require "active_record/sql_analyzer.rb" 9 | 10 | DBConnection.setup_db 11 | ActiveRecord::SqlAnalyzer.install! 12 | 13 | RSpec.configure do |c| 14 | c.disable_monkey_patching! 15 | 16 | c.before :each do 17 | DBConnection.reset 18 | end 19 | 20 | c.after :each do 21 | # Try and reset our state to something a bit fresher 22 | if ActiveRecord::SqlAnalyzer.config 23 | thread = ActiveRecord::SqlAnalyzer.background_processor.instance_variable_get(:@thread) 24 | thread.terminate if thread 25 | 26 | ActiveRecord::SqlAnalyzer.config[:analyzers].each do |analyzer| 27 | analyzer[:logger_instance].close 28 | end 29 | 30 | ActiveRecord::SqlAnalyzer.instance_variable_set(:@background_processor, nil) 31 | ActiveRecord::SqlAnalyzer.instance_variable_set(:@config, nil) 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/background_processor_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe ActiveRecord::SqlAnalyzer::BackgroundProcessor do 2 | include WaitForPop 3 | 4 | let(:instance) { described_class.new } 5 | 6 | let(:event) { { calls: [{ caller: "CALLER", sql: "SQL" }], logger: logger } } 7 | 8 | let(:logger) do 9 | Class.new do 10 | def self.events 11 | @events ||= [] 12 | end 13 | 14 | def self.filter_event(*) 15 | end 16 | 17 | def self.log(event) 18 | events << event 19 | sleep 2 20 | end 21 | end 22 | end 23 | 24 | before do 25 | ActiveRecord::SqlAnalyzer.configure do |c| 26 | c.backtrace_filter_proc(Proc.new { |lines| "BFP #{lines}" }) 27 | c.complex_sql_redactor_proc(Proc.new { |sql| "CSRP #{sql}" }) 28 | end 29 | end 30 | 31 | it "processes in the background" do 32 | instance << event 33 | wait_for_pop 34 | 35 | expect(logger.events).to eq( 36 | [ 37 | calls: [{ 38 | caller: "BFP CALLER", 39 | sql: "CSRP SQL" 40 | }] 41 | ] 42 | ) 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/backtrace_filter.rb: -------------------------------------------------------------------------------- 1 | require "pathname" 2 | 3 | # This is a bit complex but can't be avoided since otherwise we have to log 5000000 backtrace lines 4 | module ActiveRecord 5 | module SqlAnalyzer 6 | class BacktraceFilter 7 | def self.library_paths 8 | @library_paths ||= begin 9 | paths = Gem.path + Gem.path.map { |f| File.realpath(f) } 10 | paths << "(eval):" 11 | paths << RbConfig::CONFIG.fetch('libdir') 12 | paths 13 | end 14 | end 15 | 16 | def self.rails_root_regex 17 | @rails_root_regex ||= %r{^#{Regexp.escape(Rails.root.to_s)}} 18 | end 19 | 20 | def self.proc 21 | @proc ||= Proc.new do |lines| 22 | filtered = [] 23 | lines.each do |line| 24 | unless library_paths.any? { |path| line.include?(path) } 25 | if line =~ rails_root_regex 26 | filtered << Pathname.new(line).relative_path_from(Rails.root).to_s 27 | else 28 | filtered << line 29 | end 30 | end 31 | end 32 | 33 | filtered 34 | end 35 | end 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/analyzer.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class Analyzer 4 | attr_reader :options 5 | 6 | def initialize 7 | @options = {} 8 | end 9 | 10 | def [](key) 11 | @options[key] 12 | end 13 | 14 | # Tables to watch for this analyzer 15 | def tables(names) 16 | unless names.is_a?(Array) 17 | raise ArgumentError, "Names of tables must be an array" 18 | end 19 | 20 | @options[:table_regex] = /\A\s*((SELECT|DELETE).*(FROM|JOIN)|(INSERT\s+INTO|UPDATE))\s+`?(#{names.join('|')})`?/i 21 | end 22 | 23 | # Logger class to use for recording data 24 | def logger(klass) 25 | @options[:logger] = klass 26 | end 27 | 28 | # How to tag the data 29 | def name(name) 30 | if !name.is_a?(String) && name !~ /\A([a-z0-9A-Z_]+)\z/ 31 | raise ArgumentError, "Name for this analyzer can only contain [a-z0-9A-Z_] characters" 32 | end 33 | 34 | @options[:name] = name 35 | end 36 | 37 | def setup 38 | @options[:logger_instance] ||= (@options[:logger] || RedactedLogger).new( 39 | SqlAnalyzer.config[:logger_root_path], 40 | @options[:name] 41 | ) 42 | end 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/analyzer_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe ActiveRecord::SqlAnalyzer::Analyzer do 2 | let(:analyzer) do 3 | described_class.new.tap do |instance| 4 | instance.tables %w(foo bar) 5 | end 6 | end 7 | 8 | context "table regex" do 9 | let(:regex) { analyzer[:table_regex] } 10 | 11 | it "matches" do 12 | expect("SELECT * FROM foo").to match(regex) 13 | expect("DELETE FROM foo").to match(regex) 14 | expect("INSERT INTO bar (a, b, c) VALUES (1, 2, 3)").to match(regex) 15 | expect("UPDATE bar SET a=b WHERE id=1").to match(regex) 16 | end 17 | 18 | it "matches with complex queries" do 19 | expect("SELECT * FROM apple JOIN foo").to match(regex) 20 | expect("SELECT * FROM apple LEFT JOIN foo").to match(regex) 21 | expect("SELECT * FROM apple WHERE id = (SELECT * FROM foo)").to match(regex) 22 | end 23 | 24 | it "does not match" do 25 | expect("SELECT * FROM apple WHERE id='foo'").to_not match(regex) 26 | expect("SELECT foo FROM apple WHERE id='bar'").to_not match(regex) 27 | 28 | expect("DELETE FROM apple").to_not match(regex) 29 | expect("INSERT INTO apple (a, b, c) VALUES (1, 2, 3)").to_not match(regex) 30 | expect("UPDATE apple SET a=b WHERE id=1").to_not match(regex) 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer.rb: -------------------------------------------------------------------------------- 1 | require_relative './sql_analyzer/monkeypatches/query' 2 | require_relative './sql_analyzer/monkeypatches/tagger' 3 | require_relative './sql_analyzer/analyzer' 4 | require_relative './sql_analyzer/logger' 5 | require_relative './sql_analyzer/compact_logger' 6 | require_relative './sql_analyzer/redacted_logger' 7 | require_relative './sql_analyzer/background_processor' 8 | require_relative './sql_analyzer/configuration' 9 | require_relative './sql_analyzer/redactor' 10 | require_relative './sql_analyzer/backtrace_filter' 11 | require_relative './sql_analyzer/version' 12 | 13 | module ActiveRecord 14 | module SqlAnalyzer 15 | def self.configure 16 | @config ||= Configuration.new 17 | yield @config 18 | @config 19 | end 20 | 21 | def self.config 22 | @config 23 | end 24 | 25 | def self.background_processor 26 | @background_processor ||= BackgroundProcessor.new 27 | end 28 | 29 | def self.install! 30 | return if @installed 31 | @installed = true 32 | 33 | # Install our patch that logs SQL queries 34 | ActiveRecord::ConnectionAdapters::Mysql2Adapter.prepend(Monkeypatches::Query) 35 | 36 | # Install our patch that enables a `with_tag` method on AR calls 37 | ActiveRecord::Relation.prepend(Monkeypatches::Tagger) 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/redacted_logger.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class RedactedLogger < CompactLogger 4 | def filter_event(event) 5 | # Determine if we're doing extended tracing or only the first 6 | calls = event.delete(:calls).map do |call| 7 | { sql: filter_sql(call[:sql]), caller: filter_caller(call[:caller]) } 8 | end 9 | 10 | # De-duplicate redacted calls to avoid many transactions with looping "N+1" queries. 11 | calls.uniq! 12 | 13 | event[:sql] = calls.map { |call| call[:sql] } 14 | event[:caller] = calls.map { |call| call[:caller] }.join(';; ') 15 | 16 | if event[:sql].size == 1 17 | event[:sql] = event[:sql].first 18 | else 19 | event[:sql] = event[:sql].join('; ') + ';' 20 | end 21 | end 22 | 23 | def filter_caller(kaller) 24 | kaller = if config[:ambiguous_tracers].any? { |regex| kaller.first =~ regex } 25 | kaller[0, config[:ambiguous_backtrace_lines]].join(", ") 26 | else 27 | kaller.first 28 | end 29 | 30 | return '' unless kaller 31 | 32 | config[:backtrace_redactors].each do |redactor| 33 | kaller.gsub!(redactor.search, redactor.replace) 34 | end 35 | 36 | kaller 37 | end 38 | 39 | def filter_sql(sql) 40 | config[:sql_redactors].each do |redactor| 41 | sql.gsub!(redactor.search, redactor.replace) 42 | end 43 | 44 | sql 45 | end 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/background_processor.rb: -------------------------------------------------------------------------------- 1 | require "set" 2 | 3 | module ActiveRecord 4 | module SqlAnalyzer 5 | class BackgroundProcessor 6 | def initialize 7 | @queue = Queue.new 8 | end 9 | 10 | def <<(event) 11 | processor_thread 12 | @queue << event 13 | end 14 | 15 | private 16 | 17 | MUTEX = Mutex.new 18 | 19 | def process_queue 20 | event = @queue.pop 21 | 22 | event[:calls] = event[:calls].map do |call| 23 | { 24 | caller: SqlAnalyzer.config[:backtrace_filter_proc].call(call[:caller]), 25 | sql: SqlAnalyzer.config[:sql_redactor_complex_proc].call(call[:sql].dup) 26 | } 27 | end 28 | 29 | logger = event.delete(:logger) 30 | logger.filter_event(event) 31 | logger.log(event) 32 | end 33 | 34 | def processor_thread 35 | # Avoid grabbing a mutex unless we really need to 36 | return if @thread && @thread.alive? 37 | 38 | MUTEX.synchronize do 39 | # Double check to avoid a race condition 40 | return if @thread && @thread.alive? 41 | 42 | @thread = Thread.new do 43 | Rails.logger.info "[SQL-Analyzer] Starting background query thread id #{Thread.current.object_id} in pid #{Process.pid}" 44 | 45 | begin 46 | loop do 47 | process_queue 48 | end 49 | rescue => ex 50 | Rails.logger.warn "[SQL-Analyzer] Exception in thread #{Thread.current.object_id}: #{ex.class}, #{ex.message}" 51 | Rails.logger.warn "[SQL-Analyzer] #{ex.backtrace.join(", ")}" 52 | end 53 | end 54 | end 55 | end 56 | end 57 | end 58 | end 59 | -------------------------------------------------------------------------------- /spec/support/db_connection.rb: -------------------------------------------------------------------------------- 1 | class DBConnection 2 | def self.db 3 | @db ||= ActiveRecord::Base.establish_connection( 4 | configuration.merge(database: 'ar_sql_analyzer_test') 5 | ) 6 | end 7 | 8 | def self.connection 9 | db.connection 10 | end 11 | 12 | def self.configuration 13 | { 14 | adapter: 'mysql2', 15 | host: 'localhost', 16 | encoding: 'utf8', 17 | usernamme: ENV["TRAVIS"] ? "travis" : "root" 18 | } 19 | end 20 | 21 | def self.setup_db 22 | conn = ActiveRecord::Base.establish_connection(configuration) 23 | conn.connection.execute <<-SQL 24 | CREATE DATABASE IF NOT EXISTS ar_sql_analyzer_test 25 | SQL 26 | end 27 | 28 | def self.reset 29 | connection.execute <<-SQL 30 | DROP TABLE IF EXISTS matching_table 31 | SQL 32 | 33 | connection.execute <<-SQL 34 | CREATE TABLE `matching_table` ( 35 | `id` int(11) NOT NULL AUTO_INCREMENT, 36 | `test_string` varchar(255) DEFAULT NULL, 37 | PRIMARY KEY (`id`) 38 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 39 | SQL 40 | 41 | connection.execute <<-SQL 42 | DROP TABLE IF EXISTS second_matching_table 43 | SQL 44 | 45 | connection.execute <<-SQL 46 | CREATE TABLE `second_matching_table` ( 47 | `id` int(11) NOT NULL AUTO_INCREMENT, 48 | `test_string` varchar(255) DEFAULT NULL, 49 | PRIMARY KEY (`id`) 50 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 51 | SQL 52 | 53 | connection.execute <<-SQL 54 | DROP TABLE IF EXISTS nonmatching_table 55 | SQL 56 | 57 | connection.execute <<-SQL 58 | CREATE TABLE `nonmatching_table` ( 59 | `id` int(11) NOT NULL AUTO_INCREMENT, 60 | `test_string` varchar(255) DEFAULT NULL, 61 | PRIMARY KEY (`id`) 62 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 63 | SQL 64 | end 65 | end 66 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/cli.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class CLI 4 | attr_reader :options 5 | 6 | def initialize 7 | @options = { 8 | concurrency: 6 9 | } 10 | end 11 | 12 | def processor 13 | @processor ||= ActiveRecord::SqlAnalyzer::CLIProcessor.new(options[:concurrency]) 14 | end 15 | 16 | def run 17 | definition_logs = Dir["#{options[:log_dir]}/*_definitions.log*"].map do |path| 18 | [File.basename(path).gsub(/_definitions\.log.*/, ""), path] 19 | end 20 | 21 | if definition_logs.empty? 22 | raise ArgumentError, "Cannot find any log files in '#{options[:log_dir]}'" 23 | end 24 | 25 | # Process the definition logs 26 | processor.run_definition(definition_logs) 27 | 28 | # Process the usage logs 29 | usage_logs = Dir["#{options[:log_dir]}/{#{definition_logs.map(&:first).uniq.join(",")}}.log*"].map do |path| 30 | [File.basename(path).split(".", 2).first, path] 31 | end 32 | 33 | processor.run_usage(usage_logs) 34 | 35 | processor.dump(options[:dest_dir]) 36 | end 37 | 38 | def parse_options(args) 39 | opts = OptionParser.new 40 | 41 | opts.on("--log-dir [DIR]", String, "Directory that logs are in") do |val| 42 | unless Dir.exist?(val) 43 | raise ArgumentError, "log directory '#{val}' does not exist" 44 | end 45 | 46 | options[:log_dir] = val 47 | end 48 | 49 | opts.on("--dest-dir [DIR]", String, "Directory to dump logs to") do |val| 50 | unless Dir.exist?(val) 51 | raise ArgumentError, "dest directory '#{val}' does not exist" 52 | end 53 | 54 | options[:dest_dir] = val 55 | end 56 | 57 | opts.on("-c", "--concurrency", Integer, "How many threads to use for processing log files") do |val| 58 | if val <= 0 59 | raise ArgumentError, "Concurrency must be >0" 60 | end 61 | 62 | options[:concurrency] = val 63 | end 64 | 65 | opts.on_tail("-h", "--help") do 66 | puts opts 67 | exit 68 | end 69 | 70 | opts.parse(args) 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/cli_spec.rb: -------------------------------------------------------------------------------- 1 | require "fileutils" 2 | require_relative "../../../lib/active_record/sql_analyzer/cli" 3 | require_relative "../../../lib/active_record/sql_analyzer/cli_processor" 4 | 5 | RSpec.describe ActiveRecord::SqlAnalyzer::CLI do 6 | let(:tmp_dir) { Dir.mktmpdir } 7 | after { FileUtils.remove_entry(tmp_dir) } 8 | 9 | let(:instance) do 10 | cli = described_class.new 11 | cli.parse_options(["--log-dir", tmp_dir, "--dest-dir", tmp_dir]) 12 | cli 13 | end 14 | 15 | before do 16 | %w(foo bar).each do |prefix| 17 | 3.times do |i| 18 | FileUtils.touch("#{tmp_dir}/#{prefix}.log.#{i}") 19 | FileUtils.touch("#{tmp_dir}/#{prefix}_definitions.log.#{i}") 20 | end 21 | end 22 | end 23 | 24 | # I'm sorry 25 | it "parses logs and starts the processor" do 26 | expect(instance.processor).to receive(:run_definition) do |paths| 27 | expect(paths.length).to eq(6) 28 | 29 | prefixes = paths[0, 3].map(&:first).uniq 30 | logs = paths[0, 3].map(&:last) 31 | expect(prefixes.length).to eq(1) 32 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/) 33 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/) 34 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/) 35 | 36 | prefixes = paths[3, 6].map(&:first).uniq 37 | logs = paths[3, 6].map(&:last) 38 | expect(prefixes.length).to eq(1) 39 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/) 40 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/) 41 | expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/) 42 | end 43 | 44 | expect(instance.processor).to receive(:run_usage) do |paths| 45 | expect(paths.length).to eq(6) 46 | 47 | prefixes = paths[0, 3].map(&:first).uniq 48 | logs = paths[0, 3].map(&:last) 49 | expect(prefixes.length).to eq(1) 50 | expect(logs).to include(/#{prefixes.first}\.log\.0/) 51 | expect(logs).to include(/#{prefixes.first}\.log\.1/) 52 | expect(logs).to include(/#{prefixes.first}\.log\.2/) 53 | 54 | prefixes = paths[3, 6].map(&:first).uniq 55 | logs = paths[3, 6].map(&:last) 56 | expect(prefixes.length).to eq(1) 57 | expect(logs).to include(/#{prefixes.first}\.log\.0/) 58 | expect(logs).to include(/#{prefixes.first}\.log\.1/) 59 | expect(logs).to include(/#{prefixes.first}\.log\.2/) 60 | end 61 | 62 | expect(instance.processor).to receive(:dump).with(tmp_dir) 63 | 64 | instance.run 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/cli_processor.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class CLIProcessor 4 | attr_reader :concurrency, :definitions 5 | 6 | def initialize(concurrency) 7 | @concurrency = concurrency 8 | @definitions = {} 9 | end 10 | 11 | def self.process_queue(queue) 12 | local_data = {} 13 | 14 | while !queue.empty? do 15 | prefix, path = queue.pop 16 | local_data[prefix] ||= {} 17 | 18 | File.open(path, "r") do |io| 19 | while !io.eof? do 20 | yield local_data[prefix], io.readline.strip 21 | end 22 | end 23 | end 24 | 25 | local_data 26 | rescue => ex 27 | puts "#{ex.class}: #{ex.message}" 28 | puts ex.backtrace 29 | raise 30 | end 31 | 32 | def run_definition(logs) 33 | queue = Queue.new 34 | logs.each { |l| queue << l } 35 | 36 | # Spin up threads to start processing the queue 37 | threads = Array.new(concurrency) do 38 | Thread.new(queue) do |t_queue| 39 | # Create a local copy of each definitions then merge them in 40 | CLIProcessor.process_queue(t_queue) do |local_definitions, line| 41 | line.strip! 42 | 43 | unless line == "" 44 | sha, event = line.split("|", 2) 45 | local_definitions[sha] = JSON.parse(event) 46 | end 47 | end 48 | end 49 | end 50 | 51 | # Merge everything 52 | threads.each do |thread| 53 | thread.value.each do |prefix, data| 54 | definitions[prefix] ||= {} 55 | definitions[prefix].merge!(data) 56 | end 57 | end 58 | end 59 | 60 | def run_usage(logs) 61 | queue = Queue.new 62 | logs.each { |l| queue << l } 63 | 64 | # Spin up threads to start processing the queue 65 | threads = Array.new(concurrency) do 66 | Thread.new(queue) do |t_queue| 67 | # Create a local copy of the usage for each SHA then merge it in at the end 68 | CLIProcessor.process_queue(t_queue) do |local_usage, line| 69 | line.strip! 70 | 71 | unless line == "" 72 | last_called, sha = line.split("|", 2) 73 | last_called = Time.at(last_called.to_i).utc 74 | 75 | local_usage[sha] ||= { "count" => 0 } 76 | local_usage[sha]["count"] += 1 77 | 78 | if !local_usage[sha]["last_called"] || local_usage[sha]["last_called"] < last_called 79 | local_usage[sha]["last_called"] = last_called 80 | end 81 | end 82 | end 83 | end 84 | end 85 | 86 | # Merge everything 87 | threads.each do |thread| 88 | thread.value.each do |prefix, data| 89 | definitions[prefix] ||= {} 90 | 91 | data.each do |sha, usage| 92 | definition = definitions[prefix][sha] 93 | unless definition 94 | puts "Undefined event '#{sha}'" 95 | next 96 | end 97 | 98 | definition["count"] ||= 0 99 | definition["count"] += usage["count"] 100 | 101 | if !definition["last_called"] || definition["last_called"] < usage["last_called"] 102 | definition["last_called"] = usage["last_called"] 103 | end 104 | end 105 | end 106 | end 107 | end 108 | 109 | def dump(dest_dir) 110 | definitions.each do |prefix, data| 111 | path = "#{dest_dir}/#{prefix}_#{Time.now.strftime("%Y-%m-%d")}.log" 112 | puts "Writing logs to '#{path}' (#{data.length} queries)" 113 | 114 | File.open(path, "w+") do |io| 115 | io.write(data.to_json) 116 | end 117 | end 118 | end 119 | end 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/cli_processor_spec.rb: -------------------------------------------------------------------------------- 1 | require_relative "../../../lib/active_record/sql_analyzer/cli_processor" 2 | 3 | RSpec.describe ActiveRecord::SqlAnalyzer::CLIProcessor do 4 | let(:tmp_dir) { Dir.mktmpdir } 5 | after { FileUtils.remove_entry(tmp_dir) } 6 | 7 | def write_logs(prefix, *events) 8 | started_at = Time.utc(2015, 1, 1, 0) 9 | 10 | events.each_slice(2).each_with_index do |lines, index| 11 | logger = ActiveRecord::SqlAnalyzer::CompactLogger.new( 12 | tmp_dir, 13 | "#{prefix}_#{index}" 14 | ) 15 | 16 | lines.each do |event| 17 | started_at += 3600 18 | 19 | Timecop.freeze(started_at) do 20 | logger.log(event) 21 | end 22 | end 23 | 24 | logger.close 25 | end 26 | end 27 | 28 | let(:instance) { described_class.new(2) } 29 | 30 | before do 31 | write_logs(:foo, 32 | { sql: "F-SQL1", caller: "CALLER1", tag: true }, 33 | { sql: "F-SQL2", caller: "CALLER2", tag: true }, 34 | { sql: "F-SQL1", caller: "CALLER1", tag: true }, 35 | { sql: "F-SQL3", caller: "CALLER3", tag: true }, 36 | { sql: "F-SQL2", caller: "CALLER2", tag: true }) 37 | 38 | write_logs(:bar, 39 | { sql: "B-SQL1", caller: "CALLER1" }, 40 | { sql: "B-SQL2", caller: "CALLER2" }, 41 | { sql: "B-SQL3", caller: "CALLER3" }, 42 | { sql: "B-SQL2", caller: "CALLER2" }, 43 | { sql: "B-SQL1", caller: "CALLER1" }) 44 | end 45 | 46 | subject(:process) do 47 | instance.run_definition( 48 | Dir["#{tmp_dir}/*_*_definitions.log"].map do |path| 49 | [File.basename(path).split("_", 2).first, path] 50 | end 51 | ) 52 | 53 | instance.run_usage( 54 | Dir["#{tmp_dir}/*_*.log"].map do |path| 55 | next if path =~ /definitions\.log$/ 56 | [File.basename(path).split("_", 2).first, path] 57 | end.compact 58 | ) 59 | 60 | instance.definitions 61 | end 62 | 63 | it "processes logs" do 64 | logs = process 65 | 66 | expect(logs["foo"].length).to eq(3) 67 | 68 | queries = logs["foo"].values.sort_by { |row| row["sql"] } 69 | 70 | expect(queries[0]["sql"]).to eq("F-SQL1") 71 | expect(queries[0]["caller"]).to eq("CALLER1") 72 | expect(queries[0]["count"]).to eq(2) 73 | expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 3)) 74 | expect(queries[0]["tag"]).to eq(true) 75 | 76 | expect(queries[1]["sql"]).to eq("F-SQL2") 77 | expect(queries[1]["caller"]).to eq("CALLER2") 78 | expect(queries[1]["count"]).to eq(2) 79 | expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 5)) 80 | expect(queries[1]["tag"]).to eq(true) 81 | 82 | expect(queries[2]["sql"]).to eq("F-SQL3") 83 | expect(queries[2]["caller"]).to eq("CALLER3") 84 | expect(queries[2]["count"]).to eq(1) 85 | expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 4)) 86 | expect(queries[2]["tag"]).to eq(true) 87 | 88 | expect(logs["bar"].length).to eq(3) 89 | 90 | queries = logs["bar"].values.sort_by { |row| row["sql"] } 91 | 92 | expect(queries[0]["sql"]).to eq("B-SQL1") 93 | expect(queries[0]["caller"]).to eq("CALLER1") 94 | expect(queries[0]["count"]).to eq(2) 95 | expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 5)) 96 | expect(queries[0]["tag"]).to eq(nil) 97 | 98 | expect(queries[1]["sql"]).to eq("B-SQL2") 99 | expect(queries[1]["caller"]).to eq("CALLER2") 100 | expect(queries[1]["count"]).to eq(2) 101 | expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 4)) 102 | expect(queries[1]["tag"]).to eq(nil) 103 | 104 | expect(queries[2]["sql"]).to eq("B-SQL3") 105 | expect(queries[2]["caller"]).to eq("CALLER3") 106 | expect(queries[2]["count"]).to eq(1) 107 | expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 3)) 108 | expect(queries[2]["tag"]).to eq(nil) 109 | end 110 | 111 | it "dumps to disk" do 112 | process 113 | instance.dump(tmp_dir) 114 | 115 | paths = Dir["#{tmp_dir}/{foo,bar}_#{Time.now.strftime("%Y-%m-%d")}.log"] 116 | expect(paths.length).to eq(2) 117 | end 118 | end 119 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/monkeypatches/query.rb: -------------------------------------------------------------------------------- 1 | require 'securerandom' 2 | 3 | module ActiveRecord 4 | module SqlAnalyzer 5 | module Monkeypatches 6 | module Query 7 | QueryAnalyzerCall = Struct.new(:sql, :caller) 8 | 9 | def execute(sql, *args) 10 | return super unless SqlAnalyzer.config 11 | safe_sql = nil 12 | query_analyzer_call = nil 13 | 14 | # Record "full" transactions (see below for more information about "full") 15 | if @_query_analyzer_private_in_transaction 16 | if @_query_analyzer_private_record_transaction 17 | safe_sql ||= sql.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) 18 | query_analyzer_call ||= QueryAnalyzerCall.new(safe_sql, caller) 19 | @_query_analyzer_private_transaction_queue << query_analyzer_call 20 | end 21 | end 22 | 23 | # Record interesting queries 24 | SqlAnalyzer.config[:analyzers].each do |analyzer| 25 | if SqlAnalyzer.config[:should_log_sample_proc].call(analyzer[:name]) 26 | # This is here rather than above intentionally. 27 | # We assume we're not going to be analyzing 100% of queries and want to only re-encode 28 | # when it's actually relevant. 29 | safe_sql ||= sql.encode(Encoding::UTF_8, invalid: :replace, undef: :replace) 30 | query_analyzer_call ||= QueryAnalyzerCall.new(safe_sql, caller) 31 | 32 | if safe_sql =~ analyzer[:table_regex] 33 | SqlAnalyzer.background_processor << 34 | _query_analyzer_private_query_stanza([query_analyzer_call], analyzer) 35 | end 36 | end 37 | end 38 | 39 | super 40 | end 41 | 42 | def begin_db_transaction 43 | @_query_analyzer_private_in_transaction = true 44 | 45 | record_transaction = SqlAnalyzer.config[:analyzers].any? do |analyzer| 46 | SqlAnalyzer.config[:should_log_sample_proc].call(analyzer[:name]) 47 | end 48 | if record_transaction 49 | @_query_analyzer_private_transaction_queue ||= [] 50 | @_query_analyzer_private_record_transaction = true 51 | else 52 | @_query_analyzer_private_record_transaction = nil 53 | end 54 | super 55 | end 56 | 57 | def commit_db_transaction 58 | _query_analyzer_private_drain_transaction_queue("COMMIT") 59 | super 60 | ensure 61 | @_query_analyzer_private_in_transaction = false 62 | end 63 | 64 | def exec_rollback_db_transaction 65 | _query_analyzer_private_drain_transaction_queue("ROLLBACK") 66 | super 67 | ensure 68 | @_query_analyzer_private_in_transaction = false 69 | end 70 | 71 | # "private" methods for this monkeypatch 72 | 73 | # Drain the transaction query queue. Log the current transaction out to any logger that samples it. 74 | def _query_analyzer_private_drain_transaction_queue(last_query) 75 | return unless @_query_analyzer_private_record_transaction 76 | 77 | reencoded_calls = nil 78 | 79 | SqlAnalyzer.config[:analyzers].each do |analyzer| 80 | reencoded_calls ||= @_query_analyzer_private_transaction_queue << QueryAnalyzerCall.new(last_query, caller) 81 | 82 | has_matching_calls = reencoded_calls.any? { |call| call.sql =~ analyzer[:table_regex] } 83 | 84 | # Record "full" transactions 85 | # Record all INSERT, UPDATE, and DELETE 86 | # Record all queries that match the analyzer's table_regex 87 | if has_matching_calls 88 | matching_calls = reencoded_calls.select do |call| 89 | (call.sql =~ /^(BEGIN|COMMIT|ROLLBACK|UPDATE|INSERT|DELETE)/) || (call.sql =~ analyzer[:table_regex]) 90 | end 91 | 92 | SqlAnalyzer.background_processor << _query_analyzer_private_query_stanza(matching_calls, analyzer) 93 | end 94 | end 95 | 96 | @_query_analyzer_private_transaction_queue.clear 97 | @_query_analyzer_private_record_transaction = nil 98 | end 99 | 100 | # Helper method to construct the event for a query or transaction. 101 | # safe_sql [string]: SQL statement or combined SQL statement for transaction 102 | # calls: A list of QueryAnalyzerCall objects to be turned into call hashes 103 | def _query_analyzer_private_query_stanza(calls, analyzer) 104 | { 105 | # Calls are of the format {sql: String, caller: String} 106 | calls: calls.map(&:to_h), 107 | logger: analyzer[:logger_instance], 108 | tag: Thread.current[:_ar_analyzer_tag], 109 | request_path: Thread.current[:_ar_analyzer_request_path], 110 | } 111 | end 112 | end 113 | end 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ActiveRecord SQL Analyzer 2 | 3 | [![Build Status](https://travis-ci.org/square/active_record-sql_analyzer.svg?branch=master)](https://travis-ci.org/square/active_record-sql_analyzer) 4 | 5 | This gem provides a hook into ActiveRecord to redact, sample, and aggregate SQL queries being ran against production systems. It gives you better visibility into exactly what kind of queries are being run, and aids in planning pulling features out of your service. 6 | 7 | Currently this only supports MySQL. It should have no problems working with other SQL drivers, but we've only tested the redaction on MySQL. 8 | 9 | ## Warning! 10 | 11 | This does log raw SQL to disk (by default). While we've been using this with the regexes below for redaction + a redactor gem, you should ensure you will not inadvertently log data to disk you do not want to. You can also take a look at [Custom Loggers](#custom-loggers) to log somewhere else. 12 | 13 | ## Usage 14 | 15 | By default, you only really need to configure what you want to analyze, and the sample rate. To get started quickly: 16 | 17 | ```ruby 18 | ActiveRecord::SqlAnalyzer.configure do |c| 19 | c.add_analyzer do |a| 20 | a.name 'users' 21 | a.tables %w(users permissions) 22 | end 23 | 24 | c.log_sample_proc Proc.new { |name| rand(1..100) <= 25 } 25 | end 26 | ``` 27 | 28 | Will sample 25% of the queries against the `users` and `permissions` tables (including JOINs that use either), to `Rails.root.join('log', 'users.log')` and `Rails.root.join('log', 'users_definitions.log')`. 29 | 30 | ## Analyzer 31 | 32 | You can use the `ar-log-analyzer` command to analyze the created log files. It will output an aggregated JSON dump with SQL, backtraces, call counts and when it was last used. 33 | 34 | ## Aggregation 35 | 36 | Queries are aggregated based on the redacted SQL string + first line of the stacktrace. You can tweak the redaction used for SQL/backtrace to improve the aggregation. 37 | 38 | If a callsite is listed as ambiguous, we automatically include a couple more lines of the stacktrace. See [Advanced Configuration](#advanced-configuration) for more information. 39 | 40 | ## Custom Loggers 41 | 42 | By default, the analyze is logged to disk. You can create your own logger class and send logs to somewhere like a SQL DB instead. Take a look at `ActiveRecord::SqlAnalyzer::RedactedLogger`, `ActiveRecord::SqlAnalyzer::CompactLogger`, and `ActiveRecord::SqlAnalyzer::Logger` as an example of how to build your own. 43 | 44 | ## Advanced Configuration 45 | 46 | ```ruby 47 | ActiveRecord::SqlAnalyzer.configure do |c| 48 | # Define a custom backtrace filter. In this case, only filter out lines containing `/nokogiri/` 49 | c.backtrace_filter_proc Proc.new { |lines| 50 | lines.reject { |line| line =~ /\/nokogiri\// } 51 | } 52 | 53 | # Add a new analyzer, can add as many as you need 54 | c.add_analyzer( 55 | # Used for the prefix of the log files, as well as passed to `log_sample_proc` 56 | name: 'users' 57 | # SQL table names to log 58 | tables: %w(users permissions) 59 | # Logger to use (Can be changed to use your own custom one) 60 | logger: ActiveRecord::SqlAnalyzer::RedactedLogger 61 | ) 62 | 63 | # Directory to log to, by default this is `Rails.root.join('log')` 64 | c.logger_root_path '/tmp/' 65 | 66 | # Sets a new sampler, logs only 25% of queries tagged with `users` and 50% tagged with `payments` 67 | c.log_sample_proc Proc.new { |name| 68 | val = rand(1..100) 69 | 70 | if name == 'users' 71 | val <= 25 72 | elsif name == 'payments' 73 | val <= 50 74 | end 75 | } 76 | 77 | # Beyond the simple find/replace, any kind of dynamic redaction you need. 78 | # We use this with another gem that redacts common patterns like phone numbers or emails. 79 | c.complex_sql_redactor_proc Proc.new { |sql| 80 | sql.gsub!(/(.+)@(.+)\.(.+)/, "") 81 | } 82 | 83 | # Adds a new redactor that strips out all numbers 84 | c.add_sql_redactors [ 85 | [/[0-9]+/, ""] 86 | ] 87 | 88 | # Adds a redactor for backtraces to improve aggregation. 89 | # In this case, strips out line numbers in case your code changes day to day. 90 | c.add_backtrace_redactors [ 91 | [/:([0-9]+):in/, ":in"] 92 | ] 93 | 94 | # Any backtraces matching the regexes will switch to an ambiguous tracer 95 | # where we log more details. Can be handy if you have code sites that 96 | # inherits from other files or middlewares that are showing up in the caller. 97 | c.add_ambiguous_tracers [ 98 | %r{\Aapp/middleware/query_string_sanitizer\.rb:\d+:in `call'\z} 99 | ] 100 | 101 | # How many additional lines to log when calls are ambiguous 102 | c.ambiguous_backtrace_lines 5 103 | end 104 | ``` 105 | 106 | 107 | ## License 108 | 109 | Copyright 2015 Square Inc. 110 | 111 | Licensed under the Apache License, Version 2.0 (the "License"); 112 | you may not use this file except in compliance with the License. 113 | You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 114 | 115 | Unless required by applicable law or agreed to in writing, software 116 | distributed under the License is distributed on an "AS IS" BASIS, 117 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 118 | See the License for the specific language governing permissions and 119 | limitations under the License. 120 | -------------------------------------------------------------------------------- /lib/active_record/sql_analyzer/configuration.rb: -------------------------------------------------------------------------------- 1 | module ActiveRecord 2 | module SqlAnalyzer 3 | class Configuration 4 | attr_reader :options 5 | 6 | def initialize 7 | @options = {} 8 | setup_defaults 9 | end 10 | 11 | # Setup a custom proc that filters out lines before passing them to the loggers. 12 | # By default, this attempts to filter out all non-app code lines. 13 | def backtrace_filter_proc(proc) 14 | check_proc(proc, 1, "the backtrace lines") 15 | @options[:backtrace_filter_proc] = proc 16 | end 17 | 18 | # Setup a new analyzer for monitoring tables 19 | # 20 | # add_analyzer( 21 | # name: 'users', 22 | # tables: %w(users permissions), 23 | # logger: ActiveRecord::SqlAnalyzer::RedactedLogger 24 | # ) 25 | # 26 | # Will setup an analyzer that looks at the tables users and permissions 27 | # when it finds relevant data, it passes it through the `RedactedLogger` class. 28 | # When calling the proc passed to `log_sample_proc`, it will use the name 29 | # `users` to help identify it, as well as when logging to disk. 30 | # 31 | def add_analyzer(result) 32 | analyzer = Analyzer.new 33 | analyzer.name(result[:name]) 34 | analyzer.tables(result[:tables]) 35 | analyzer.logger(result[:logger]) 36 | analyzer.setup 37 | 38 | @options[:analyzers] << analyzer 39 | analyzer 40 | end 41 | 42 | # Root path where all logs go. 43 | # Defaults to `Rails.root.join('log')` 44 | def logger_root_path(path) 45 | unless Dir.exist?(path) 46 | raise ArgumentError, "Path '#{path}' is not a directory" 47 | end 48 | 49 | @options[:logger_root_path] = path 50 | end 51 | 52 | # Set a proc that determines whether or not to log a single event. 53 | # This must be set to log anything, and controls how many SQL queries you look at. 54 | # 55 | # Proc.new { |name| true } 56 | # 57 | # Will log everything no matter what 58 | # 59 | # Proc.new do |name| 60 | # rand(1..100) <= 50 61 | # end 62 | # 63 | # Will only log 50% of queries. 64 | # 65 | # You can hook this into something like Redis to allow dynamic control of the ratio 66 | # without having to redeploy/restart your application. 67 | # 68 | def log_sample_proc(proc) 69 | check_proc(proc, 1, "the analyzer name") 70 | @options[:should_log_sample_proc] = proc 71 | end 72 | 73 | # For hooking in more complicated redactions beyond a simple find/replace. 74 | def complex_sql_redactor_proc(proc) 75 | check_proc(proc, 1, "the SQL statement") 76 | @options[:sql_redactor_complex_proc] = proc 77 | end 78 | 79 | # Additional redactors to filter out data in SQL you don't want logged 80 | def add_sql_redactors(list) 81 | @options[:sql_redactors].concat(create_redactors(list)) 82 | end 83 | 84 | # Backtrace redactors filter out data in the backtrace 85 | # useful if you want to get rid of lines numbers 86 | def add_backtrace_redactors(list) 87 | @options[:backtrace_redactors].concat(create_redactors(list)) 88 | end 89 | 90 | # If the first line in the backtrace matches the regex given, we switch to 91 | # ambiguous tracing mode for that call where we log more of the backtrace. 92 | # 93 | # As an example, if you find you're only getting middleware, you could use: 94 | # 95 | # %r{\Aapp/middleware/query_string_sanitizer\.rb:\d+:in `call'\z} 96 | # 97 | # Which would log up to ambiguous_backtrace_lines (default 3) total lines, 98 | # rather than the default 1. 99 | def add_ambiguous_tracers(list) 100 | list.each do |row| 101 | unless row.is_a?(Regexp) 102 | raise ArgumentError, "Tracing filters must be a Regexp to match on" 103 | end 104 | end 105 | 106 | @options[:ambiguous_tracers].concat(list) 107 | end 108 | 109 | # How many total lines to log when the caller is ambiguous 110 | def ambiguous_backtrace_lines(lines) 111 | if !lines.is_a?(Integer) 112 | raise ArgumentError, "Lines must be a Fixnum" 113 | elsif lines <= 1 114 | raise ArgumentError, "Lines cannot be <= 1" 115 | end 116 | 117 | @options[:ambiguous_backtrace_lines] = lines 118 | end 119 | 120 | # Disable transaction consolidation. With transaction consolidation enabled, the logger will log full transactions 121 | # as single statements. 122 | def disable_consolidate_transactions 123 | @config[:consolidate_transactions] = false 124 | end 125 | 126 | def [](key) 127 | @options[key] 128 | end 129 | 130 | private 131 | 132 | def check_proc(proc, _arity, msg) 133 | if !proc.is_a?(Proc) 134 | raise ArgumentError, "You must pass a proc" 135 | elsif proc.arity != 1 136 | raise ArgumentError, "Proc must accept 1 argument for #{msg}" 137 | end 138 | end 139 | 140 | def create_redactors(list) 141 | list.map do |redact| 142 | if redact.length != 2 143 | raise ArgumentError, "Redactor row should only have two entries" 144 | elsif !redact.first.is_a?(Regexp) 145 | raise ArgumentError, "First value in pair must be a Regexp to match on" 146 | elsif !redact.last.is_a?(String) 147 | raise ArgumentError, "Last value in pair must be a String to replace with" 148 | end 149 | 150 | Redactor.new(*redact) 151 | end 152 | end 153 | 154 | def setup_defaults 155 | quoted_value_pattern = %{('([^\\\\']|\\\\.|'')*'|"([^\\\\"]|\\\\.|"")*")} 156 | @options[:sql_redactors] = [ 157 | Redactor.new(/\n/, " "), 158 | Redactor.new(/\s+/, " "), 159 | Redactor.new(/IN \([^)]+\)/i, "IN ('[REDACTED]')"), 160 | Redactor.new(/(\s|\b|`)(=|!=|>=|>|<=|<) ?(BINARY )?-?\d+(\.\d+)?/i, " = '[REDACTED]'"), 161 | Redactor.new(/(\s|\b|`)(=|!=|>=|>|<=|<) ?(BINARY )?x?#{quoted_value_pattern}/i, " = '[REDACTED]'"), 162 | Redactor.new(/VALUES \(.+\)$/i, "VALUES ('[REDACTED]')"), 163 | Redactor.new(/BETWEEN #{quoted_value_pattern} AND #{quoted_value_pattern}/i, "BETWEEN '[REDACTED]' AND '[REDACTED]'"), 164 | Redactor.new(/LIKE #{quoted_value_pattern}/i, "LIKE '[REDACTED]'"), 165 | Redactor.new(/ LIMIT \d+/i, ""), 166 | Redactor.new(/ OFFSET \d+/i, ""), 167 | Redactor.new(/INSERT INTO (`?\w+`?) \([^)]+\)/i, "INSERT INTO \\1 (REDACTED_COLUMNS)"), 168 | ] 169 | 170 | @options[:should_log_sample_proc] = Proc.new { |_name| false } 171 | @options[:sql_redactor_complex_proc] = Proc.new { |sql| sql } 172 | @options[:backtrace_redactors] = [] 173 | @options[:ambiguous_tracers] = [] 174 | @options[:ambiguous_backtrace_lines] = 3 175 | @options[:analyzers] = [] 176 | @options[:logger_root_path] = Rails.root.join('log') 177 | @options[:backtrace_filter_proc] = BacktraceFilter.proc 178 | @options[:consolidate_transactions] = true 179 | end 180 | end 181 | end 182 | end 183 | -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/redacted_logger_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | require "sql-parser" 3 | 4 | RSpec.describe ActiveRecord::SqlAnalyzer::RedactedLogger do 5 | let(:tmp_dir) { Dir.mktmpdir } 6 | let(:parser) { SQLParser::Parser.new } 7 | after { FileUtils.remove_entry(tmp_dir) } 8 | 9 | context "#filter_event" do 10 | let(:event) { {} } 11 | 12 | subject(:filter_event) do 13 | instance = described_class.new(tmp_dir, "foo") 14 | instance.filter_event(event) 15 | event 16 | end 17 | 18 | before do 19 | ActiveRecord::SqlAnalyzer.configure do |c| 20 | c.add_backtrace_redactors [ 21 | [/erb_erb_[0-9]+_[0-9]+/, ""] 22 | ] 23 | end 24 | 25 | # All raw SQL should be valid :) 26 | event_sql_list = event[:calls].map { |call| call[:sql] } 27 | expect { event_sql_list.map { |sql| parser.scan_str sql } }.not_to raise_exception if event_sql_list.any?(&:present?) 28 | end 29 | 30 | after do 31 | # All redacted SQL should be valid 32 | expect { parser.scan_str(filter_event[:sql]) }.not_to raise_exception if filter_event[:sql].present? 33 | end 34 | 35 | context "ambiguous backtraces" do 36 | let(:event) do 37 | { 38 | calls: [{ 39 | caller: %w(ambiguous foo bar), 40 | sql: "", 41 | }] 42 | } 43 | end 44 | 45 | before do 46 | ActiveRecord::SqlAnalyzer.configure do |c| 47 | c.add_ambiguous_tracers [/ambiguous/] 48 | end 49 | end 50 | 51 | it "switches to ambiguous backtrace logging" do 52 | expect(filter_event[:caller]).to eq("ambiguous, foo, bar") 53 | end 54 | end 55 | 56 | context "backtrace" do 57 | let(:event) do 58 | { 59 | calls: [{ 60 | caller: %w(foo-bar-erb_erb_1_5), 61 | sql: "", 62 | }] 63 | } 64 | end 65 | 66 | it "redacts" do 67 | expect(filter_event[:caller]).to eq("foo-bar-") 68 | end 69 | end 70 | 71 | context "sql quoted" do 72 | let(:event) do 73 | { 74 | calls: [{ 75 | caller: [""], 76 | sql: "SELECT * FROM foo WHERE name = 'hello\\'s name'", 77 | }] 78 | } 79 | end 80 | 81 | it "redacts" do 82 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name = '[REDACTED]'") 83 | end 84 | end 85 | 86 | context "sql quoted multiple WHERE" do 87 | let(:event) do 88 | { 89 | calls: [{ 90 | caller: [""], 91 | sql: "SELECT * FROM foo WHERE name = 'hello\\'s name' AND age = '21'", 92 | }] 93 | } 94 | end 95 | 96 | it "redacts" do 97 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name = '[REDACTED]' AND age = '[REDACTED]'") 98 | end 99 | end 100 | 101 | context "sql escaped and quoted" do 102 | let(:event) do 103 | { 104 | calls: [{ 105 | caller: [""], 106 | sql: "SELECT * FROM foo WHERE name = 'hello\\\'s name'", 107 | }] 108 | } 109 | end 110 | 111 | it "redacts" do 112 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name = '[REDACTED]'") 113 | end 114 | end 115 | 116 | context "sql case insensitivity" do 117 | let(:event) do 118 | { 119 | calls: [{ 120 | caller: [""], 121 | sql: "SELECT * FROM foo WHERE name lIkE 'hello'", 122 | }] 123 | } 124 | end 125 | 126 | it "redacts" do 127 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name LIKE '[REDACTED]'") 128 | end 129 | end 130 | 131 | context "sql" do 132 | let(:event) do 133 | { 134 | calls: [{ 135 | caller: [""], 136 | sql: "SELECT * FROM foo WHERE id = 1234", 137 | }] 138 | } 139 | end 140 | 141 | it "redacts" do 142 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE id = '[REDACTED]'") 143 | end 144 | end 145 | 146 | context "like quoted" do 147 | let(:event) do 148 | { 149 | calls: [{ 150 | caller: [""], 151 | sql: %{SELECT * FROM foo WHERE name LIKE 'A \\'quoted\\' value.' OR name LIKE "another ""quoted"" \\"value\\""}, 152 | }] 153 | } 154 | end 155 | 156 | it "redacts" do 157 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name LIKE '[REDACTED]' OR name LIKE '[REDACTED]'") 158 | end 159 | end 160 | 161 | context "like escaped and quoted" do 162 | let(:event) do 163 | { 164 | calls: [{ 165 | caller: [""], 166 | sql: "SELECT * FROM foo WHERE name LIKE 'A \\\'quoted\\\' value.'", 167 | }] 168 | } 169 | end 170 | 171 | it "redacts" do 172 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name LIKE '[REDACTED]'") 173 | end 174 | end 175 | 176 | context "in quoted" do 177 | let(:event) do 178 | { 179 | calls: [{ 180 | caller: [""], 181 | sql: "SELECT * FROM foo WHERE name IN ('A ''quoted'' value.')", 182 | }] 183 | } 184 | end 185 | 186 | it "redacts" do 187 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name IN ('[REDACTED]')") 188 | end 189 | end 190 | 191 | context "in escaped and quoted" do 192 | let(:event) do 193 | { 194 | calls: [{ 195 | caller: [""], 196 | sql: %{SELECT * FROM foo WHERE name IN ('A ''quoted'' value.', "another ""quoted"" \\"value\\"")}, 197 | }] 198 | } 199 | end 200 | 201 | it "redacts" do 202 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name IN ('[REDACTED]')") 203 | end 204 | end 205 | 206 | context "between strings" do 207 | let(:event) do 208 | { 209 | calls: [{ 210 | caller: [""], 211 | sql: "SELECT * FROM foo WHERE name BETWEEN 'A value.' AND 'Another value'", 212 | }] 213 | } 214 | end 215 | 216 | it "redacts" do 217 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name BETWEEN '[REDACTED]' AND '[REDACTED]'") 218 | end 219 | end 220 | 221 | context "between strings with escaped quotes" do 222 | let(:event) do 223 | { 224 | calls: [{ 225 | caller: [""], 226 | sql: "SELECT * FROM foo WHERE name BETWEEN 'A ''quoted'' value.' AND 'Another \\'value\\''", 227 | }] 228 | } 229 | end 230 | 231 | it "redacts" do 232 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name BETWEEN '[REDACTED]' AND '[REDACTED]'") 233 | end 234 | end 235 | 236 | context "in with = and other where clauses" do 237 | let(:event) do 238 | { 239 | calls: [{ 240 | caller: [""], 241 | sql: "SELECT * FROM foo WHERE name IN ('value=') AND name = 'value'", 242 | }] 243 | } 244 | end 245 | 246 | it "redacts" do 247 | expect(filter_event[:sql]).to eq("SELECT * FROM foo WHERE name IN ('[REDACTED]') AND name = '[REDACTED]'") 248 | end 249 | end 250 | 251 | context "insert" do 252 | let(:event) do 253 | { 254 | calls: [{ 255 | caller: [""], 256 | sql: "INSERT INTO `boom` (`bam`, `foo`) VALUES ('howdy', 'dowdy')", 257 | }] 258 | } 259 | end 260 | 261 | it "redacts" do 262 | expect(filter_event[:sql]).to eq("INSERT INTO `boom` (REDACTED_COLUMNS) VALUES ('[REDACTED]')") 263 | end 264 | end 265 | 266 | context "empty callstack" do 267 | let(:event) do 268 | { 269 | calls: [{ 270 | caller: [], 271 | sql: "INSERT INTO `boom` (`bam`, `foo`) VALUES ('howdy', 'dowdy')", 272 | }] 273 | } 274 | end 275 | 276 | it "redacts" do 277 | expect(filter_event[:caller]).to eq("") 278 | end 279 | end 280 | end 281 | end 282 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /spec/active_record/sql_analyzer/end_to_end_spec.rb: -------------------------------------------------------------------------------- 1 | RSpec.describe "End to End" do 2 | include WaitForPop 3 | 4 | let(:tmp_dir) { Dir.mktmpdir } 5 | after { FileUtils.remove_entry(tmp_dir) } 6 | 7 | let(:log_path) { "#{tmp_dir}/test_tag.log" } 8 | let(:log_data) { File.read(log_path) } 9 | 10 | let(:log_hash) do 11 | hash = {} 12 | log_data.split("\n").each do |line| 13 | sha = line.split("|", 2).last 14 | hash[sha] ||= 0 15 | hash[sha] += 1 16 | end 17 | hash 18 | end 19 | 20 | let(:log_reverse_hash) do 21 | Hash[log_hash.map(&:reverse)] 22 | end 23 | 24 | let(:log_def_path) { "#{tmp_dir}/test_tag_definitions.log" } 25 | let(:log_def_data) { File.read(log_def_path) } 26 | let(:log_def_hash) do 27 | hash = {} 28 | log_def_data.split("\n").each do |line| 29 | sha, event = line.split("|", 2) 30 | hash[sha] = JSON.parse(event) 31 | end 32 | 33 | hash 34 | end 35 | 36 | before do 37 | ActiveRecord::SqlAnalyzer.configure do |c| 38 | c.logger_root_path tmp_dir 39 | c.log_sample_proc(Proc.new { |_name| true }) 40 | 41 | c.add_analyzer( 42 | name: :test_tag, 43 | tables: %w(matching_table) 44 | ) 45 | end 46 | 47 | ActiveRecord::SqlAnalyzer.config[:analyzers].each do |analyzer| 48 | analyzer[:logger_instance].definition_log_file.sync = true 49 | analyzer[:logger_instance].log_file.sync = true 50 | end 51 | end 52 | 53 | def execute(sql) 54 | DBConnection.connection.execute(sql) 55 | wait_for_pop 56 | end 57 | 58 | def transaction 59 | DBConnection.connection.transaction { yield } 60 | wait_for_pop 61 | end 62 | 63 | it "does not log with a non-matching table" do 64 | execute "SELECT * FROM nonmatching_table" 65 | 66 | expect(log_data).to eq("") 67 | expect(log_def_data).to eq("") 68 | end 69 | 70 | it "logs with a matching + non-matching table in one query" do 71 | execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 1234" 72 | execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 4321" 73 | execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.test_string = 'abc'" 74 | 75 | expect(log_hash.length).to eq(2) 76 | 77 | id_sha = log_reverse_hash[2] 78 | str_sha = log_reverse_hash[1] 79 | 80 | expect(log_def_hash.length).to eq(2) 81 | 82 | expect(log_def_hash[id_sha]["sql"]).to include("mt.id = '[REDACTED]'") 83 | expect(log_def_hash[str_sha]["sql"]).to include("mt.test_string = '[REDACTED]'") 84 | end 85 | 86 | it "logs with only a matching table in a query" do 87 | execute "SELECT * FROM matching_table WHERE id = 1234" 88 | execute "SELECT * FROM matching_table WHERE id = 4321" 89 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 90 | 91 | expect(log_hash.length).to eq(2) 92 | 93 | id_sha = log_reverse_hash[2] 94 | str_sha = log_reverse_hash[1] 95 | 96 | expect(log_def_hash.length).to eq(2) 97 | 98 | expect(log_def_hash[id_sha]["sql"]).to include("id = '[REDACTED]'") 99 | expect(log_def_hash[str_sha]["sql"]).to include("test_string = '[REDACTED]'") 100 | end 101 | 102 | it "handles invalid UTF-8" do 103 | execute "SELECT * FROM matching_table WHERE test_string = '\xe5'" 104 | execute "SELECT * FROM matching_table WHERE test_string = 'foobar'" 105 | 106 | expect(log_reverse_hash.first.first).to eq(2) 107 | 108 | sha = log_reverse_hash[2] 109 | expect(log_def_hash[sha]["sql"]).to include("test_string = '[REDACTED]'") 110 | end 111 | 112 | it "logs multiple queries in a transaction correctly" do 113 | transaction do 114 | execute "SELECT * FROM matching_table WHERE id = 4321" 115 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 116 | end 117 | 118 | 2.times do 119 | transaction do 120 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 121 | execute "SELECT * FROM matching_table WHERE id = 4321" 122 | end 123 | end 124 | 125 | transaction_executed_once_sha = log_reverse_hash[1] 126 | transaction_executed_twice_sha = log_reverse_hash[2] 127 | 128 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 129 | "BEGIN; " \ 130 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 131 | "SELECT * FROM matching_table WHERE test_string = '[REDACTED]'; " \ 132 | "COMMIT;" 133 | ) 134 | 135 | expect(log_def_hash[transaction_executed_twice_sha]["sql"]).to eq( 136 | "BEGIN; " \ 137 | "SELECT * FROM matching_table WHERE test_string = '[REDACTED]'; " \ 138 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 139 | "COMMIT;" 140 | ) 141 | end 142 | 143 | it "Logs nested transactions correctly" do 144 | transaction do 145 | execute "SELECT * FROM matching_table WHERE id = 4321" 146 | transaction do 147 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 148 | end 149 | end 150 | 151 | transaction_executed_once_sha = log_reverse_hash[1] 152 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 153 | "BEGIN; " \ 154 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 155 | "SELECT * FROM matching_table WHERE test_string = '[REDACTED]'; " \ 156 | "COMMIT;" 157 | ) 158 | end 159 | 160 | it "Logs transactions with inserts correctly" do 161 | transaction do 162 | execute "INSERT INTO matching_table (test_string) VALUES ('test_value')" 163 | execute "SELECT * FROM matching_table WHERE id = 4321" 164 | end 165 | 166 | transaction_executed_once_sha = log_reverse_hash[1] 167 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 168 | "BEGIN; " \ 169 | "INSERT INTO matching_table (REDACTED_COLUMNS) VALUES ('[REDACTED]'); " \ 170 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 171 | "COMMIT;" 172 | ) 173 | end 174 | 175 | it "Logs mixed matching-nonmatching selects correctly" do 176 | transaction do 177 | execute "SELECT * FROM matching_table WHERE id = 4321" 178 | execute "SELECT * FROM nonmatching_table WHERE id = 4321" 179 | end 180 | 181 | transaction_executed_once_sha = log_reverse_hash[1] 182 | 183 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 184 | "BEGIN; " \ 185 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 186 | "COMMIT;" 187 | ) 188 | end 189 | 190 | it "Logs transaction with repeated selects correctly" do 191 | transaction do 192 | execute "SELECT * FROM matching_table WHERE id = 4321" 193 | ['blah', 'bloo'].each do |s| 194 | execute "SELECT * FROM matching_table WHERE test_string = '#{s}'" 195 | end 196 | end 197 | 198 | transaction_executed_once_sha = log_reverse_hash[1] 199 | 200 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 201 | "BEGIN; " \ 202 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 203 | "SELECT * FROM matching_table WHERE test_string = '[REDACTED]'; " \ 204 | "COMMIT;" 205 | ) 206 | end 207 | 208 | it "Logs transaction with repeated inserts correctly" do 209 | transaction do 210 | execute "SELECT * FROM matching_table WHERE id = 4321" 211 | 2.times do 212 | execute "INSERT INTO matching_table (test_string) VALUES ('test_value')" 213 | end 214 | end 215 | 216 | transaction_executed_once_sha = log_reverse_hash[1] 217 | 218 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 219 | "BEGIN; " \ 220 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 221 | "INSERT INTO matching_table (REDACTED_COLUMNS) VALUES ('[REDACTED]'); " \ 222 | "COMMIT;" 223 | ) 224 | end 225 | 226 | context "ActiveRecord generated transactions" do 227 | before do 228 | stub_const("Matching", Class.new(ActiveRecord::Base) do 229 | self.table_name = "matching_table" 230 | 231 | after_commit { self.class.last } 232 | end) 233 | 234 | stub_const("NonMatching", Class.new(ActiveRecord::Base) do 235 | self.table_name = "nonmatching_table" 236 | 237 | after_commit { self.class.last } 238 | end) 239 | end 240 | 241 | it "Logs the matching statements in the transaction and logs after_commit hooks outside the transaction" do 242 | Matching.transaction do 243 | Matching.create! 244 | NonMatching.create 245 | NonMatching.last 246 | end 247 | NonMatching.create 248 | 249 | expect(log_def_hash.map { |_hash, data| data["sql"] }).to match([ 250 | "INSERT INTO `matching_table` VALUES ()", 251 | "BEGIN; INSERT INTO `matching_table` VALUES (); INSERT INTO `nonmatching_table` VALUES (); COMMIT;", 252 | "SELECT `matching_table`.* FROM `matching_table` ORDER BY `matching_table`.`id` DESC" 253 | ]) 254 | end 255 | end 256 | 257 | it "Logs mixed matching-nonmatching with inserts correctly" do 258 | transaction do 259 | execute "SELECT * FROM matching_table WHERE id = 4321" 260 | execute "INSERT INTO nonmatching_table (id) VALUES (1)" 261 | end 262 | 263 | transaction_executed_once_sha = log_reverse_hash[1] 264 | 265 | expect(log_def_hash[transaction_executed_once_sha]["sql"]).to eq( 266 | "BEGIN; " \ 267 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; " \ 268 | "INSERT INTO nonmatching_table (REDACTED_COLUMNS) VALUES ('[REDACTED]'); " \ 269 | "COMMIT;" 270 | ) 271 | end 272 | 273 | it "Does not log nonmatching-only queries" do 274 | transaction do 275 | execute "SELECT * FROM nonmatching_table WHERE id = 4321" 276 | execute "SELECT * FROM nonmatching_table WHERE id = 4321" 277 | end 278 | 279 | expect(log_def_hash.size).to eq(0) 280 | end 281 | 282 | it "Does not log nonmatching-only insert transactions" do 283 | transaction do 284 | execute "INSERT INTO nonmatching_table (id) VALUES (1)" 285 | end 286 | 287 | expect(log_def_hash.size).to eq(0) 288 | end 289 | 290 | context "Selectively sampling" do 291 | before do 292 | ActiveRecord::SqlAnalyzer.configure do |c| 293 | times_called = 0 294 | # Return true every other call, starting with the first call 295 | c.log_sample_proc(Proc.new { |_name| (times_called += 1) % 2 == 1 }) 296 | end 297 | end 298 | 299 | it "Samples some but not other selects" do 300 | execute "SELECT * FROM matching_table WHERE id = 1" 301 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 302 | 303 | expect(log_def_hash.size).to eq(1) 304 | expect(log_def_hash.map { |_hash, query| query['sql'] }).to eq([ 305 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'" 306 | ]) 307 | end 308 | 309 | it "Samples some but not other whole transactions" do 310 | transaction do 311 | execute "SELECT * FROM matching_table WHERE id = 1" 312 | execute "SELECT * FROM matching_table WHERE test_string = 'abc'" 313 | end 314 | 315 | transaction do 316 | execute "SELECT * FROM matching_table WHERE id = 1 and test_string = 'abc'" 317 | execute "SELECT * FROM matching_table WHERE id > 4 and id < 8" 318 | end 319 | 320 | expect(log_def_hash.map { |_hash, data| data["sql"] }).to match([ 321 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'", 322 | "BEGIN; "\ 323 | "SELECT * FROM matching_table WHERE id = '[REDACTED]'; "\ 324 | "SELECT * FROM matching_table WHERE test_string = '[REDACTED]'; "\ 325 | "COMMIT;", 326 | "SELECT * FROM matching_table WHERE id = '[REDACTED]' and id = '[REDACTED]'" 327 | ]) 328 | end 329 | end 330 | 331 | context "when sampling is disabled" do 332 | before do 333 | ActiveRecord::SqlAnalyzer.configure do |c| 334 | c.log_sample_proc(Proc.new { |_name| false }) 335 | end 336 | end 337 | 338 | it "does not log" do 339 | execute "SELECT * FROM matching_table" 340 | 341 | expect(log_data).to eq("") 342 | expect(log_def_data).to eq("") 343 | end 344 | end 345 | end 346 | --------------------------------------------------------------------------------