├── log └── .keep ├── db └── backfill │ └── .keep ├── .rspec ├── Gemfile ├── bin ├── setup └── console ├── .gitignore ├── lib ├── backfiller.rb └── backfiller │ ├── tasks │ └── db.rake │ ├── cursor.rb │ ├── railtie.rb │ ├── configuration.rb │ ├── cursor │ └── postgresql.rb │ └── runner.rb ├── Rakefile ├── .travis.yml ├── .rubocop.yml ├── Changelog.md ├── spec ├── support │ └── logger_mock.rb ├── spec_helper.rb └── backfiller │ ├── cursor_spec.rb │ └── runner_spec.rb ├── backfiller.gemspec └── README.md /log/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /db/backfill/.keep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --require spec_helper 2 | --format documentation 3 | --color 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | gemspec 6 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require 'bundler/setup' 5 | require 'backfiller' 6 | 7 | require 'irb' 8 | IRB.start(__FILE__) 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | log/*.log 11 | 12 | # rspec failure tracking 13 | .rspec_status 14 | -------------------------------------------------------------------------------- /lib/backfiller.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'backfiller/configuration' 4 | require_relative 'backfiller/cursor' 5 | require_relative 'backfiller/runner' 6 | 7 | require_relative 'backfiller/railtie' if defined?(Rails::Railtie) 8 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'bundler/gem_tasks' 4 | require 'rspec/core/rake_task' 5 | require 'rubocop/rake_task' 6 | 7 | RSpec::Core::RakeTask.new(:spec) 8 | RuboCop::RakeTask.new 9 | 10 | desc 'CI build' 11 | task ci: %i[spec rubocop] 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.7.2 4 | gemfile: 5 | - Gemfile 6 | cache: bundler 7 | before_install: 8 | - gem install bundler 9 | before_script: 10 | - psql -c 'create database test;' -U postgres 11 | script: 12 | - bundle exec rake ci 13 | services: 14 | - postgresql 15 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | NewCops: enable 3 | SuggestExtensions: false 4 | 5 | Style/Documentation: 6 | Enabled: false 7 | 8 | Style/AccessorGrouping: 9 | Enabled: false 10 | 11 | Metrics/BlockLength: 12 | Exclude: 13 | - 'spec/**/*_spec.rb' 14 | 15 | Metrics/MethodLength: 16 | Max: 20 17 | -------------------------------------------------------------------------------- /lib/backfiller/tasks/db.rake: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | namespace :db do 4 | desc 'Run database backfill task' 5 | task :backfill, [:name] => :environment do |_, args| 6 | raise 'Please specify backfill task name' unless args[:name] 7 | 8 | Backfiller.logger.level = :info if Backfiller.logger 9 | Backfiller.run(args[:name]) 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.2.3 4 | - Fix cursor bug in case of enabled query cache (#6) 5 | 6 | ## 0.2.2 7 | 8 | - Always release connections in runner (#5). 9 | 10 | ## 0.2.1 11 | 12 | - `cursor_threshold` is nil by default. 13 | 14 | ## 0.2.0 15 | 16 | - Upgrade local gems 17 | - Add rubocop 18 | - Add specs 19 | - Configure travis 20 | - Add `cursor_threshold` feature 21 | 22 | ## 0.1.1 23 | 24 | - Support Rails 6.0.0 25 | -------------------------------------------------------------------------------- /lib/backfiller/cursor.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'cursor/postgresql' 4 | 5 | module Backfiller 6 | module Cursor 7 | def self.new(connection, *args) 8 | case connection 9 | when ActiveRecord::ConnectionAdapters::PostgreSQLAdapter 10 | Backfiller::Cursor::Postgresql.new(connection, *args) 11 | else 12 | raise "Unsupported connection #{connection.inspect}" 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /spec/support/logger_mock.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class LoggerMock 4 | attr_reader :messages 5 | 6 | def initialize 7 | @messages = [] 8 | end 9 | 10 | %i[ 11 | debug 12 | info 13 | warn 14 | error 15 | ].each do |name| 16 | define_method(name) do |message| 17 | @messages << message 18 | end 19 | 20 | define_method(:"#{name}?") do 21 | true 22 | end 23 | end 24 | 25 | def level 26 | Logger::DEBUG 27 | end 28 | 29 | def reset 30 | @messages.clear 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/backfiller/railtie.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Backfiller 4 | class Railtie < Rails::Railtie 5 | rake_tasks do 6 | load 'backfiller/tasks/db.rake' 7 | end 8 | 9 | initializer 'backfiller.configure' do 10 | Backfiller.configure do |config| 11 | config.task_directory = Rails.root.join('db', 'backfill') 12 | 13 | config.task_namespace = 'backfill' 14 | 15 | config.batch_size = 1_000 16 | 17 | config.logger = defined?(ApplicationRecord) ? ApplicationRecord.logger : ActiveRecord::Base.logger 18 | end 19 | end 20 | 21 | config.after_initialize do 22 | task_module = Backfiller.task_namespace.classify 23 | Object.const_set(task_module, Module.new) unless Object.const_defined?(task_module) 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/backfiller/configuration.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Backfiller 4 | class << self 5 | def configure 6 | yield self 7 | end 8 | 9 | # directory for backfill ruby classes 10 | attr_accessor :task_directory 11 | 12 | # ruby module of backfill classes 13 | attr_accessor :task_namespace 14 | 15 | # Max size of records in one cursor fetch 16 | attr_accessor :batch_size 17 | 18 | # Size of processed records after which cursor will be re-opened 19 | attr_accessor :cursor_threshold 20 | 21 | # Logger 22 | attr_accessor :logger 23 | 24 | # @param task_name [String] name of backfill task file 25 | def run(task_name) 26 | Backfiller::Runner.new(task_name).run 27 | end 28 | 29 | # @param message [String] log message 30 | def log(message) 31 | return unless logger 32 | 33 | logger.info "[Backfiller] #{message}" 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /lib/backfiller/cursor/postgresql.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Backfiller 4 | module Cursor 5 | class Postgresql 6 | attr_reader :connection 7 | 8 | def initialize(connection, name, query) 9 | @connection = connection 10 | @name = name 11 | @query = query 12 | end 13 | 14 | # Open cursor, call black and close cursor in transaction. 15 | # 16 | # @return [Object] yielded block result. 17 | def transaction 18 | result = nil 19 | 20 | @connection.transaction do 21 | Backfiller.log 'Open cursor' 22 | open 23 | 24 | result = yield 25 | 26 | Backfiller.log 'Close cursor' 27 | close 28 | end 29 | 30 | result 31 | end 32 | 33 | def open 34 | @connection.execute "DECLARE #{@name} NO SCROLL CURSOR WITHOUT HOLD FOR #{@query}" 35 | end 36 | 37 | def fetch(count) 38 | @connection.exec_query "FETCH #{count} FROM #{@name}" 39 | end 40 | 41 | def close 42 | @connection.execute "CLOSE #{@name}" 43 | end 44 | end 45 | end 46 | end 47 | -------------------------------------------------------------------------------- /backfiller.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path('lib', __dir__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = 'backfiller' 8 | spec.version = '0.2.3' 9 | spec.authors = ['Andriy Yanko'] 10 | spec.email = ['andriy.yanko@railsware.com'] 11 | 12 | spec.summary = 'Backfiller for null database columns' 13 | spec.homepage = 'https://github.com/railsware/backfiller' 14 | spec.license = 'MIT' 15 | 16 | spec.required_ruby_version = '>= 2.7.0' 17 | 18 | spec.files = `git ls-files -z`.split("\x0").reject do |f| 19 | f.match(%r{^(test|spec|features)/}) 20 | end 21 | spec.bindir = 'exe' 22 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 23 | spec.require_paths = ['lib'] 24 | 25 | spec.add_dependency 'activerecord', '>= 6.0.0' 26 | 27 | spec.add_development_dependency 'bundler', '~> 2.2.0' 28 | spec.add_development_dependency 'rake', '~> 13.0.0' 29 | spec.add_development_dependency 'rspec', '~> 3.10.0' 30 | spec.add_development_dependency 'rubocop', '~> 1.18.0' 31 | 32 | spec.add_development_dependency 'pg', '~> 1.2.0' 33 | spec.metadata['rubygems_mfa_required'] = 'true' 34 | end 35 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'bundler/setup' 4 | 5 | Bundler.require(:default) 6 | 7 | require 'active_support' 8 | require 'active_support/core_ext' 9 | require 'active_record' 10 | 11 | require_relative 'support/logger_mock' 12 | 13 | # Configure rspec matchers 14 | RSpec::Matchers.define_negated_matcher :not_change, :change 15 | 16 | # Create logging 17 | ActiveSupport::LogSubscriber.colorize_logging = false 18 | 19 | # Initialize ActiveRecord 20 | ActiveRecord::Base.logger = LoggerMock.new 21 | ActiveRecord::Base.establish_connection( 22 | url: 'postgresql://localhost/test', 23 | pool: 5 24 | ) 25 | 26 | # Configure Backfiller 27 | Backfiller.configure do |config| 28 | config.task_directory = File.expand_path('../db/backfill', __dir__) 29 | config.task_namespace = 'backfill' 30 | config.batch_size = 4 31 | config.cursor_threshold = 10 32 | config.logger = ActiveRecord::Base.logger 33 | end 34 | 35 | RSpec.configure do |config| 36 | # Enable flags like --only-failures and --next-failure 37 | config.example_status_persistence_file_path = '.rspec_status' 38 | 39 | # Disable RSpec exposing methods globally on `Module` and `main` 40 | config.disable_monkey_patching! 41 | 42 | config.expect_with :rspec do |c| 43 | c.syntax = :expect 44 | end 45 | 46 | config.before do 47 | ActiveRecord::Base.logger.reset 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /spec/backfiller/cursor_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Backfiller::Cursor do 4 | let(:cursor) { described_class.new(connection, 'backfill_cursor', select_sql) } 5 | let(:connection) { ActiveRecord::Base.connection } 6 | let(:select_sql) do 7 | <<~SQL 8 | SELECT * FROM ( 9 | VALUES 10 | (1, 'Alice'), 11 | (2, 'Bob'), 12 | (3, 'Carlos') 13 | ) AS t( 14 | id, name 15 | ) 16 | SQL 17 | end 18 | 19 | specify do 20 | expect(cursor).to be_instance_of(Backfiller::Cursor::Postgresql) 21 | end 22 | 23 | describe '#fetch' do 24 | shared_examples :fetches_results do 25 | subject do 26 | results = [] 27 | connection.transaction do 28 | cursor.open 29 | results << cursor.fetch(2) 30 | results << cursor.fetch(2) 31 | results << cursor.fetch(2) 32 | cursor.close 33 | end 34 | results 35 | end 36 | 37 | specify do 38 | expect(subject.size).to eq(3) 39 | 40 | expect(subject[0]).to be_instance_of(ActiveRecord::Result) 41 | expect(subject[0].length).to eq(2) 42 | expect(subject[0][0]).to eq('id' => 1, 'name' => 'Alice') 43 | expect(subject[0][1]).to eq('id' => 2, 'name' => 'Bob') 44 | 45 | expect(subject[1]).to be_instance_of(ActiveRecord::Result) 46 | expect(subject[1].length).to eq(1) 47 | expect(subject[1][0]).to eq('id' => 3, 'name' => 'Carlos') 48 | 49 | expect(subject[2]).to be_instance_of(ActiveRecord::Result) 50 | expect(subject[2].length).to eq(0) 51 | end 52 | end 53 | 54 | context 'with query cache disabled' do 55 | around do |ex| 56 | ActiveRecord::Base.uncached { ex.run } 57 | end 58 | 59 | it_behaves_like :fetches_results 60 | end 61 | 62 | context 'with query cache enabled' do 63 | around do |ex| 64 | ActiveRecord::Base.cache { ex.run } 65 | end 66 | 67 | it_behaves_like :fetches_results 68 | end 69 | end 70 | end 71 | -------------------------------------------------------------------------------- /lib/backfiller/runner.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Backfiller 4 | class Runner 5 | attr_reader \ 6 | :task, 7 | :connection_pool, 8 | :batch_size, 9 | :cursor_threshold, 10 | :process_method 11 | 12 | def initialize(task_name) 13 | @task = build_task(task_name) 14 | @connection_pool = @task.respond_to?(:connection_pool) ? @task.connection_pool : default_connection_pool 15 | @batch_size = @task.respond_to?(:batch_size) ? @task.batch_size : Backfiller.batch_size 16 | @cursor_threshold = @task.respond_to?(:cursor_threshold) ? @task.cursor_threshold : Backfiller.cursor_threshold 17 | @process_method = @task.respond_to?(:process_row) ? @task.method(:process_row) : method(:process_row) 18 | end 19 | 20 | # It uses two connections from pool: 21 | # * master [M] - reads data using cursor in transaction 22 | # * worker [W] - changes data based on record red from master 23 | # 24 | # @example 25 | # [M] BEGIN 26 | # [M] DECLARE backfill_cursor SCROLL CURSOR WITHOUT HOLD FOR SELECT * FROM users 27 | # // Start fetch and process loop: 28 | # [M] FETCH 1000 backfill_cursor 29 | # [W] UPDATE users SET full_name = '...' where id = 1 30 | # [W] ... 31 | # [W] UPDATE users SET full_name = '...' where id = 1000 32 | # [M] FETCH 1000 backfill_cursor 33 | # [W] UPDATE users SET full_name = '...' where id = 1001 34 | # [W] ... 35 | # [W] UPDATE users SET full_name = '...' where id = 2000 36 | # // Records per cursor transaction threshold reached. Reopen transaction. 37 | # [M] CLOSE backfill_cursor 38 | # [M] COMMIT 39 | # [M] BEGIN 40 | # [M] DECLARE backfill_cursor SCROLL CURSOR WITHOUT HOLD FOR SELECT * FROM users 41 | # [M] FETCH 1000 backfill_cursor 42 | # // The end of cursor reached. Break cursor loop and exit. 43 | # [M] CLOSE backfill_cursor 44 | # [M] COMMIT 45 | def run 46 | master_connection = acquire_connection 47 | worker_connection = acquire_connection 48 | 49 | begin 50 | run_cursor_loop(master_connection) do |row| 51 | process_method.call(worker_connection, row) 52 | end 53 | ensure 54 | release_connection(master_connection) 55 | release_connection(worker_connection) 56 | end 57 | end 58 | 59 | private 60 | 61 | def build_task(task_name) 62 | Backfiller.log "Build #{task_name} task" 63 | require File.join(Backfiller.task_directory, task_name) 64 | "#{Backfiller.task_namespace}/#{task_name}".classify.constantize.new 65 | end 66 | 67 | ########################################################################### 68 | 69 | def default_connection_pool 70 | defined?(ApplicationRecord) ? ApplicationRecord.connection_pool : ActiveRecord::Base.connection_pool 71 | end 72 | 73 | def acquire_connection 74 | connection_pool.checkout 75 | end 76 | 77 | def release_connection(connection) 78 | connection_pool.checkin(connection) 79 | end 80 | 81 | ########################################################################### 82 | 83 | # Run loop that re-open cursor transaction on threshold 84 | def run_cursor_loop(connection, &block) 85 | Backfiller.log 'Start cursor loop' 86 | 87 | total_count = 0 88 | cursor = build_cursor(connection) 89 | 90 | loop do 91 | finished, count = cursor.transaction do 92 | run_fetch_loop(cursor, &block) 93 | end 94 | 95 | total_count += count 96 | 97 | Backfiller.log "Total processed #{total_count}" 98 | break if finished 99 | end 100 | end 101 | 102 | # @return [Array] finished_status/processed_count 103 | def run_fetch_loop(cursor, &block) 104 | Backfiller.log 'Start fetch loop' 105 | count = 0 106 | 107 | loop do 108 | result = cursor.fetch(batch_size) 109 | 110 | return [true, count] if result.empty? 111 | 112 | result.each do |row| 113 | block.call(row) 114 | count += 1 115 | end 116 | 117 | Backfiller.log "Processed #{count}" 118 | 119 | return [false, count] if cursor_threshold && count > cursor_threshold 120 | end 121 | end 122 | 123 | ########################################################################### 124 | 125 | # Build cursor object that will use master connection. 126 | def build_cursor(connection) 127 | Backfiller::Cursor.new(connection, 'backfill_cursor', task.select_sql) 128 | end 129 | 130 | # Process row using worker connection. 131 | def process_row(connection, row) 132 | Array(task.execute_sql(connection, row)).each do |sql| 133 | connection.execute(sql) 134 | end 135 | end 136 | end 137 | end 138 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Backfill machine](https://railsware.github.io/backfiller/assets/backfill_machine.jpg) 2 | 3 | # Backfiller [![Build Status](https://travis-ci.com/railsware/backfiller.svg?branch=master)](https://travis-ci.com/railsware/backfiller) 4 | 5 | The backfill machine for null database columns. 6 | This gem maybe handly for `no-downtime` deployment especially when you need to fill columns for table with huge amount for records without locking the table. 7 | 8 | ## Typical no-downtime and non-locking cycle 9 | 10 | * add migration that adds new column (null: true) 11 | * deploy and run migration task 12 | * deploy code that starts filling new column in corresponding flows 13 | * add backfill task 14 | * deploy and run backfill task 15 | * [optional] add migration that invokes backfill task asn so keep all environments consistent (except production environment because we already backfilled data) 16 | * add migration that disallow null values (null: false) 17 | * deploy code that starts using new column 18 | 19 | ## Concept 20 | 21 | The idea is to prepare all data in selection method on database server and fetch it data using CURSOR feature and then build simple UPDATE queries. 22 | With this way we minimize db server resources usage and we lock only one record (atomic update). 23 | We use two connections to database: 24 | * master - to creates cursor in transaction and fetch data in batches. 25 | * worker - to execute small atomic update queries (no wrapper transaction) 26 | 27 | Even if backfill process crashes you may resolve issue and run it again to process remaining amount of data. 28 | 29 | ## Connection adapters 30 | 31 | Curently it support next ActiveRecord connection adapters: 32 | * PostgreSQL 33 | 34 | ## Installation 35 | 36 | Add this line to your application's Gemfile: 37 | 38 | ```ruby 39 | gem 'backfiller' 40 | ``` 41 | 42 | And then execute: 43 | 44 | $ bundle 45 | 46 | Or install it yourself as: 47 | 48 | $ gem install backfiller 49 | 50 | ## Usage 51 | 52 | Assume we want to backfill `profiles.name` column from `users.first_name`, `users.last_name` columns. 53 | 54 | Create backfill task into `db/backfill/profile_name.rb` and defined required methods: 55 | 56 | #### Single worker execution query 57 | 58 | ```ruby 59 | class Backfill::ProfileName 60 | 61 | def select_sql 62 | <<~SQL 63 | SELECT 64 | profile.id AS profile_id, 65 | CONCAT(users.first_name, ' ', users.last_name) AS profile_name 66 | FROM profiles 67 | INNER JOIN users ON 68 | users.id = profiles.user_id 69 | WHERE 70 | profiles.name IS NULL 71 | SQL 72 | end 73 | 74 | def execute_sql(connection, row) 75 | <<~SQL 76 | UPDATE profiles SET 77 | name = #{connection.quote(row['profile_name'])} 78 | WHERE 79 | id = #{connection.quote(row['profile_id'])} 80 | SQL 81 | end 82 | 83 | end 84 | ``` 85 | 86 | #### Multiple worker execution queries 87 | 88 | ```ruby 89 | class Backfill::ProfileName 90 | 91 | def select_sql 92 | <<~SQL 93 | SELECT 94 | profile.id AS profile_id, 95 | CONCAT(users.first_name, ' ', users.last_name) AS profile_name 96 | FROM profiles 97 | INNER JOIN users ON 98 | users.id = profiles.user_id 99 | WHERE 100 | profiles.name IS NULL 101 | SQL 102 | end 103 | 104 | def execute_sql(connection, row) 105 | [ 106 | 'BEGIN', 107 | <<~SQL, 108 | UPDATE profiles SET 109 | name = #{connection.quote(row['profile_name'])} 110 | WHERE 111 | id = #{connection.quote(row['profile_id'])} AND 112 | (SELECT pg_try_advisory_xact_lock(12345678)') = TRUE 113 | SQL 114 | 'COMMIT' 115 | ] 116 | end 117 | 118 | end 119 | 120 | ``` 121 | 122 | #### Custom row processing 123 | 124 | ```ruby 125 | class Backfill::ProfileName 126 | 127 | def select_sql 128 | <<~SQL 129 | SELECT 130 | profile.id AS profile_id, 131 | CONCAT(users.first_name, ' ', users.last_name) AS profile_name 132 | FROM profiles 133 | INNER JOIN users ON 134 | users.id = profiles.user_id 135 | WHERE 136 | profiles.name IS NULL 137 | SQL 138 | end 139 | 140 | def process_row(connection, row) 141 | connection.execute 'BEGIN' 142 | if connection.select_value 'SELECT pg_try_advisory_xact_lock(12345678)' 143 | connection.execute <<~SQL 144 | INSERT INTO contacts( 145 | full_name 146 | ) 147 | VALUES( 148 | #{connection.quote(row['profile_name'])}, 149 | ) 150 | SQL 151 | end 152 | connection.execute 'COMMIT' 153 | end 154 | 155 | end 156 | 157 | ``` 158 | And then just run rake task: 159 | 160 | ```bash 161 | $ rails db:backfill[profile_name] 162 | ``` 163 | 164 | ## Configuration 165 | 166 | For Rails application backfiller is initialized with next options 167 | 168 | * task_directory: `RAILS_ROOT/db/backfill` 169 | * task_namespace: `Backfill` 170 | * batch_size: `1_000` 171 | * cursor_threshold: `nil` 172 | * connection_pool: `ApplicationRecord.connection_pool` 173 | * logger: `ApplicationRecord.logger` 174 | 175 | You may change it globally via `config/initializers/backfiller.rb`: 176 | 177 | ```ruby 178 | Backfiller.configure do |config| 179 | config.foo = bar 180 | end 181 | ``` 182 | 183 | Or specify some options in certain backfill task 184 | 185 | ```ruby 186 | class Backfill::Foo 187 | def batch_size 188 | 100 189 | end 190 | 191 | def cursor_threshold 192 | 100_000 193 | end 194 | end 195 | ``` 196 | 197 | ## Authors 198 | 199 | * [Andriy Yanko](http://ayanko.github.io) 200 | -------------------------------------------------------------------------------- /spec/backfiller/runner_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Backfiller::Runner do 4 | let(:runner) do 5 | described_class.new(task_name) 6 | end 7 | 8 | let(:task_path) { File.join(Backfiller.task_directory, "#{task_name}.rb") } 9 | let(:task_name) { 'dummy' } 10 | 11 | before do 12 | Object.const_set('Backfill', Module.new) 13 | ActiveRecord::Base.connection.create_table(:backfiller_records) do |t| 14 | t.column :first_name, :string 15 | t.column :last_name, :string 16 | t.column :full_name, :string 17 | end 18 | end 19 | 20 | after do 21 | $LOADED_FEATURES.delete task_path 22 | FileUtils.rm task_path 23 | ActiveRecord::Base.connection.drop_table(:backfiller_records) 24 | Object.send :remove_const, 'Backfill' 25 | end 26 | 27 | describe '#initialize' do 28 | context 'no options' do 29 | before do 30 | File.write task_path, <<~TASK 31 | class Backfill::Dummy 32 | end 33 | TASK 34 | end 35 | 36 | specify do 37 | expect(runner.task.class).to eq(Backfill::Dummy) 38 | expect(runner.batch_size).to eq(4) 39 | expect(runner.cursor_threshold).to eq(10) 40 | expect(runner.process_method).to eq(runner.method(:process_row)) 41 | end 42 | end 43 | 44 | context 'with custom options' do 45 | before do 46 | File.write task_path, <<~TASK 47 | class Backfill::Dummy 48 | def batch_size 49 | 2000 50 | end 51 | 52 | def cursor_threshold 53 | 200000 54 | end 55 | 56 | def process_row 57 | end 58 | end 59 | TASK 60 | end 61 | 62 | specify do 63 | expect(runner.task.class).to eq(Backfill::Dummy) 64 | expect(runner.batch_size).to eq(2000) 65 | expect(runner.cursor_threshold).to eq(200_000) 66 | expect(runner.process_method).to eq(runner.task.method(:process_row)) 67 | end 68 | end 69 | end 70 | 71 | describe '#run' do 72 | subject { runner.run } 73 | 74 | let(:batch_size) { 2 } 75 | let(:cursor_threshold) { 5 } 76 | 77 | before do 78 | File.write task_path, <<~TASK 79 | class Backfill::Dummy 80 | def batch_size 81 | #{batch_size} 82 | end 83 | 84 | def cursor_threshold 85 | #{cursor_threshold} 86 | end 87 | 88 | def select_sql() 89 | 'SELECT * FROM backfiller_records WHERE full_name IS NULL' 90 | end 91 | 92 | def execute_sql(connection, row) 93 | 'UPDATE backfiller_records SET full_name = ' + 94 | connection.quote(row['first_name'] + ' ' + row['last_name']) + 95 | ' WHERE id = ' + connection.quote(row['id']) 96 | end 97 | end 98 | TASK 99 | end 100 | 101 | specify do 102 | ActiveRecord::Base.connection.insert_fixture( 103 | [ 104 | { 105 | first_name: 'Jon', 106 | last_name: 'Snow' 107 | }, 108 | { 109 | first_name: 'Aria', 110 | last_name: 'Stark' 111 | }, 112 | { 113 | first_name: 'George', 114 | last_name: 'Martin', 115 | full_name: 'George R. R. Martin' 116 | } 117 | ], :backfiller_records 118 | ) 119 | 120 | ActiveRecord::Base.logger.reset 121 | 122 | subject 123 | 124 | messages = ActiveRecord::Base.logger.messages 125 | expect(messages[0]).to eq('[Backfiller] Build dummy task') 126 | expect(messages[1]).to eq('[Backfiller] Start cursor loop') 127 | 128 | expect(messages[2]).to eq('[Backfiller] Open cursor') 129 | expect(messages[3]).to match(/TRANSACTION \(.*\) BEGIN/) 130 | expect(messages[4]).to include( 131 | 'DECLARE backfill_cursor NO SCROLL CURSOR WITHOUT HOLD FOR ' \ 132 | 'SELECT * FROM backfiller_records WHERE full_name IS NULL' 133 | ) 134 | expect(messages[5]).to eq('[Backfiller] Start fetch loop') 135 | expect(messages[6]).to include('FETCH 2 FROM backfill_cursor') 136 | expect(messages[7]).to include( 137 | "UPDATE backfiller_records SET full_name = 'Jon Snow' WHERE id = 1" 138 | ) 139 | expect(messages[8]).to include( 140 | "UPDATE backfiller_records SET full_name = 'Aria Stark' WHERE id = 2" 141 | ) 142 | expect(messages[9]).to eq('[Backfiller] Processed 2') 143 | expect(messages[10]).to include('FETCH 2 FROM backfill_cursor') 144 | expect(messages[11]).to eq('[Backfiller] Close cursor') 145 | expect(messages[12]).to include('CLOSE backfill_cursor') 146 | expect(messages[13]).to match(/TRANSACTION \(.*\) COMMIT/) 147 | expect(messages[14]).to eq('[Backfiller] Total processed 2') 148 | 149 | expect( 150 | ActiveRecord::Base.connection.select_all('SELECT * FROM backfiller_records ORDER BY id').to_a 151 | ).to eq( 152 | [ 153 | { 154 | 'id' => 1, 155 | 'first_name' => 'Jon', 156 | 'last_name' => 'Snow', 157 | 'full_name' => 'Jon Snow' 158 | }, 159 | { 160 | 'id' => 2, 161 | 'first_name' => 'Aria', 162 | 'last_name' => 'Stark', 163 | 'full_name' => 'Aria Stark' 164 | }, 165 | { 166 | 'id' => 3, 167 | 'first_name' => 'George', 168 | 'last_name' => 'Martin', 169 | 'full_name' => 'George R. R. Martin' 170 | } 171 | ] 172 | ) 173 | end 174 | 175 | context 'cursor threshold' do 176 | let(:messages) do 177 | ActiveRecord::Base.logger.messages 178 | end 179 | 180 | before do 181 | ActiveRecord::Base.connection.insert_fixture( 182 | [ 183 | { first_name: 'First01', last_name: 'Last01' }, 184 | { first_name: 'First02', last_name: 'Last02' }, 185 | { first_name: 'First03', last_name: 'Last03' }, 186 | { first_name: 'First04', last_name: 'Last04' }, 187 | { first_name: 'First05', last_name: 'Last05' }, 188 | { first_name: 'First06', last_name: 'Last06' }, 189 | { first_name: 'First07', last_name: 'Last07' } 190 | ], :backfiller_records 191 | ) 192 | 193 | ActiveRecord::Base.logger.reset 194 | 195 | subject 196 | end 197 | 198 | shared_examples :single_cursor_session do 199 | specify do 200 | expect(messages.size).to eq(26) 201 | 202 | expect(messages[0]).to eq('[Backfiller] Build dummy task') 203 | expect(messages[1]).to eq('[Backfiller] Start cursor loop') 204 | 205 | expect(messages[2]).to eq('[Backfiller] Open cursor') 206 | expect(messages[3]).to match(/TRANSACTION \(.*\) BEGIN/) 207 | expect(messages[4]).to include('DECLARE backfill_cursor') 208 | expect(messages[5]).to eq('[Backfiller] Start fetch loop') 209 | expect(messages[6]).to include('FETCH 2 FROM backfill_cursor') 210 | expect(messages[7]).to include("UPDATE backfiller_records SET full_name = 'First01 Last01' WHERE id = 1") 211 | expect(messages[8]).to include("UPDATE backfiller_records SET full_name = 'First02 Last02' WHERE id = 2") 212 | expect(messages[9]).to eq('[Backfiller] Processed 2') 213 | expect(messages[10]).to include('FETCH 2 FROM backfill_cursor') 214 | expect(messages[11]).to include("UPDATE backfiller_records SET full_name = 'First03 Last03' WHERE id = 3") 215 | expect(messages[12]).to include("UPDATE backfiller_records SET full_name = 'First04 Last04' WHERE id = 4") 216 | expect(messages[13]).to eq('[Backfiller] Processed 4') 217 | expect(messages[14]).to include('FETCH 2 FROM backfill_cursor') 218 | expect(messages[15]).to include("UPDATE backfiller_records SET full_name = 'First05 Last05' WHERE id = 5") 219 | expect(messages[16]).to include("UPDATE backfiller_records SET full_name = 'First06 Last06' WHERE id = 6") 220 | expect(messages[17]).to eq('[Backfiller] Processed 6') 221 | expect(messages[18]).to include('FETCH 2 FROM backfill_cursor') 222 | expect(messages[19]).to include("UPDATE backfiller_records SET full_name = 'First07 Last07' WHERE id = 7") 223 | expect(messages[20]).to eq('[Backfiller] Processed 7') 224 | expect(messages[21]).to include('FETCH 2 FROM backfill_cursor') 225 | expect(messages[22]).to eq('[Backfiller] Close cursor') 226 | expect(messages[23]).to include('CLOSE backfill_cursor') 227 | expect(messages[24]).to match(/TRANSACTION \(.*\) COMMIT/) 228 | expect(messages[25]).to eq('[Backfiller] Total processed 7') 229 | end 230 | end 231 | 232 | context 'nil' do 233 | let(:cursor_threshold) { nil } 234 | 235 | include_examples :single_cursor_session 236 | end 237 | 238 | context 'large' do 239 | let(:cursor_threshold) { 8 } 240 | 241 | include_examples :single_cursor_session 242 | end 243 | 244 | context 'small' do 245 | specify do 246 | expect(messages.size).to eq(34) 247 | 248 | expect(messages[0]).to eq('[Backfiller] Build dummy task') 249 | expect(messages[1]).to eq('[Backfiller] Start cursor loop') 250 | 251 | expect(messages[2]).to eq('[Backfiller] Open cursor') 252 | expect(messages[3]).to match(/TRANSACTION \(.*\) BEGIN/) 253 | expect(messages[4]).to include('DECLARE backfill_cursor') 254 | expect(messages[5]).to eq('[Backfiller] Start fetch loop') 255 | expect(messages[6]).to include('FETCH 2 FROM backfill_cursor') 256 | expect(messages[7]).to include("UPDATE backfiller_records SET full_name = 'First01 Last01' WHERE id = 1") 257 | expect(messages[8]).to include("UPDATE backfiller_records SET full_name = 'First02 Last02' WHERE id = 2") 258 | expect(messages[9]).to eq('[Backfiller] Processed 2') 259 | expect(messages[10]).to include('FETCH 2 FROM backfill_cursor') 260 | expect(messages[11]).to include("UPDATE backfiller_records SET full_name = 'First03 Last03' WHERE id = 3") 261 | expect(messages[12]).to include("UPDATE backfiller_records SET full_name = 'First04 Last04' WHERE id = 4") 262 | expect(messages[13]).to eq('[Backfiller] Processed 4') 263 | expect(messages[14]).to include('FETCH 2 FROM backfill_cursor') 264 | expect(messages[15]).to include("UPDATE backfiller_records SET full_name = 'First05 Last05' WHERE id = 5") 265 | expect(messages[16]).to include("UPDATE backfiller_records SET full_name = 'First06 Last06' WHERE id = 6") 266 | expect(messages[17]).to eq('[Backfiller] Processed 6') 267 | expect(messages[18]).to eq('[Backfiller] Close cursor') 268 | expect(messages[19]).to include('CLOSE backfill_cursor') 269 | expect(messages[20]).to match(/TRANSACTION \(.*\) COMMIT/) 270 | expect(messages[21]).to eq('[Backfiller] Total processed 6') 271 | 272 | expect(messages[22]).to eq('[Backfiller] Open cursor') 273 | expect(messages[23]).to match(/TRANSACTION \(.*\) BEGIN/) 274 | expect(messages[24]).to include('DECLARE backfill_cursor') 275 | expect(messages[25]).to eq('[Backfiller] Start fetch loop') 276 | expect(messages[26]).to include('FETCH 2 FROM backfill_cursor') 277 | expect(messages[27]).to include("UPDATE backfiller_records SET full_name = 'First07 Last07' WHERE id = 7") 278 | expect(messages[28]).to eq('[Backfiller] Processed 1') 279 | expect(messages[29]).to include('FETCH 2 FROM backfill_cursor') 280 | expect(messages[30]).to eq('[Backfiller] Close cursor') 281 | expect(messages[31]).to include('CLOSE backfill_cursor') 282 | expect(messages[32]).to match(/TRANSACTION \(.*\) COMMIT/) 283 | expect(messages[33]).to eq('[Backfiller] Total processed 7') 284 | end 285 | end 286 | end 287 | 288 | describe 'backfill raises an exception' do 289 | before do 290 | File.write task_path, <<~TASK 291 | class Backfill::Dummy 292 | 293 | def select_sql() 294 | raise RuntimeError, 'failed backfill' 295 | end 296 | 297 | def execute_sql(connection, row) 298 | raise RuntimeError, 'failed backfill' 299 | end 300 | end 301 | TASK 302 | end 303 | 304 | it 'connections are returned to the pool' do 305 | expect { subject }.to raise_error(RuntimeError, 'failed backfill') 306 | .and not_change { ActiveRecord::Base.connection_pool.stat[:busy] } 307 | end 308 | end 309 | end 310 | end 311 | --------------------------------------------------------------------------------