├── .about.yml
├── .gitignore
├── .travis.yml
├── CONTRIBUTING.md
├── Gemfile
├── LICENSE.md
├── README.md
├── Rakefile
├── elasticsearch-rails-ha.gemspec
├── lib
    └── elasticsearch
    │   └── rails
    │       ├── ha.rb
    │       └── ha
    │           ├── index_stager.rb
    │           ├── parallel_indexer.rb
    │           ├── tasks.rb
    │           └── version.rb
└── spec
    ├── es_helper.rb
    ├── index_stager_spec.rb
    ├── parallel_indexer_spec.rb
    ├── spec_helper.rb
    └── temp_db_helper.rb


/.about.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # .about.yml project metadata
 3 | #
 4 | # Short name that acts as the project identifier (required)
 5 | name: elasticsearch-rails-ha-gem
 6 | 
 7 | # Full proper name of the project (required)
 8 | full_name: Elasticsearch Rails high availability extensions RubyGem
 9 | 
10 | # The type of content in the repo
11 | # values: app, docs, policy
12 | type: app
13 | 
14 | # Describes whether a project team, working group/guild, etc. owns the repo (required)
15 | # values: guild, working-group, project
16 | owner_type: project
17 | 
18 | # Name of the main project repo if this is a sub-repo; name of the working group/guild repo if this is a working group/guild subproject
19 | #parent:
20 | 
21 | # Maturity stage of the project (required)
22 | # values: discovery, alpha, beta, live
23 | stage: beta
24 | 
25 | # Whether or not the project is actively maintained (required)
26 | # values: active, deprecated
27 | status: active
28 | 
29 | # Description of the project
30 | description: >
31 |   extensions to the standard Elasticsearch::Rails Rake tasks
32 | # Should be 'true' if the project has a continuous build (required)
33 | # values: true, false
34 | testable: true
35 | 
36 | # Team members contributing to the project (required)
37 | # Items:
38 | # - github: GitHub user name
39 | #   id: Internal team identifier/user name
40 | #   role: Team member's role; leads should be designated as 'lead'
41 | team:
42 | - github: pkarman
43 |   role: lead
44 | 
45 | # Partners for whom the project is developed
46 | #partners:
47 | #- 
48 | 
49 | # Brief descriptions of significant project developments
50 | #milestones:
51 | #- 
52 | 
53 | # Technologies used to build the project
54 | stack:
55 | - Ruby
56 | 
57 | # Brief description of the project's outcomes
58 | #impact:
59 | 
60 | # Services used to supply project status information
61 | # Items:
62 | # - name: Name of the service
63 | #   category: Type of the service
64 | #   url: URL for detailed information
65 | #   badge: URL for the status badge
66 | #services:
67 | #- 
68 | 
69 | # Licenses that apply to the project and/or its components (required)
70 | # Items by property name pattern:
71 | #   .*:
72 | #     name: Name of the license from the Software Package Data Exchange (SPDX): https://spdx.org/licenses/
73 | #     url: URL for the text of the license
74 | licenses:
75 |   'elasticsearch-rails-ha-gem':
76 |     name: CC0
77 |     url: https://github.com/18F/elasticsearch-rails-ha-gem/blob/master/LICENSE.md
78 | 
79 | # Blogs or websites associated with project development
80 | #blog:
81 | #- 
82 | 
83 | # Links to project artifacts
84 | # Items:
85 | # - url: URL for the link
86 | #   text: Anchor text for the link
87 | links:
88 | - url: https://rubygems.org/gems/elasticsearch-rails-ha
89 |   text: elasticsearch-rails-ha RubyGem
90 | 
91 | # Email addresses of points-of-contact
92 | contact:
93 | - url: mailto:peter.karman@gsa.gov
94 |   text: Peter Karman
95 | - url: https://github.com/18F/elasticsearch-rails-ha-gem/issues
96 |   text: 18F/elasticsearch-rails-ha-gem issues
97 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .*.swp
 2 | *.gem
 3 | /coverage/
 4 | /pkg/
 5 | 
 6 | # for a library or gem, you might want to ignore these files since the code is
 7 | # intended to run in multiple environments; otherwise, check them in:
 8 | Gemfile.lock
 9 | .ruby-version
10 | .ruby-gemset
11 | 
12 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
13 | .rvmrc
14 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 | rvm:
3 |   - 2.0
4 |   - 2.1
5 |   - 2.2
6 | services:
7 |   - elasticsearch
8 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Welcome!
 2 | 
 3 | We're so glad you're thinking about contributing to an 18F open source project! If you're unsure or afraid of anything, just ask or submit the issue or pull request anyways. The worst that can happen is that you'll be politely asked to change something. We appreciate any sort of contribution, and don't want a wall of rules to get in the way of that.
 4 | 
 5 | Before contributing, we encourage you to read our CONTRIBUTING policy (you are here), our LICENSE, and our README, all of which should be in this repository. If you have any questions, or want to read more about our underlying policies, you can consult the 18F Open Source Policy GitHub repository at https://github.com/18f/open-source-policy, or just shoot us an email/official government letterhead note to [18f@gsa.gov](mailto:18f@gsa.gov).
 6 | 
 7 | ## Public domain
 8 | 
 9 | This project is in the public domain within the United States, and
10 | copyright and related rights in the work worldwide are waived through
11 | the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
12 | 
13 | All contributions to this project will be released under the CC0
14 | dedication. By submitting a pull request, you are agreeing to comply
15 | with this waiver of copyright interest.
16 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | gemspec
4 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | As a work of the United States Government, this project is in the
 2 | public domain within the United States.
 3 | 
 4 | Additionally, we waive copyright and related rights in the work
 5 | worldwide through the CC0 1.0 Universal public domain dedication.
 6 | 
 7 | ## CC0 1.0 Universal Summary
 8 | 
 9 | This is a human-readable summary of the [Legal Code (read the full text)](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
10 | 
11 | ### No Copyright
12 | 
13 | The person who associated a work with this deed has dedicated the work to
14 | the public domain by waiving all of his or her rights to the work worldwide
15 | under copyright law, including all related and neighboring rights, to the
16 | extent allowed by law.
17 | 
18 | You can copy, modify, distribute and perform the work, even for commercial
19 | purposes, all without asking permission.
20 | 
21 | ### Other Information
22 | 
23 | In no way are the patent or trademark rights of any person affected by CC0,
24 | nor are the rights that other persons may have in the work or in how the
25 | work is used, such as publicity or privacy rights.
26 | 
27 | Unless expressly stated otherwise, the person who associated a work with
28 | this deed makes no warranties about the work, and disclaims liability for
29 | all uses of the work, to the fullest extent permitted by applicable law.
30 | When using or citing the work, you should not imply endorsement by the
31 | author or the affirmer.
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # elasticsearch-rails-ha RubyGem
 2 | 
 3 | [![Build Status](https://travis-ci.org/18F/elasticsearch-rails-ha-gem.svg?branch=master)](https://travis-ci.org/18F/elasticsearch-rails-ha-gem)
 4 | 
 5 | Elasticsearch for Rails, high availability extensions.
 6 | 
 7 | See also:
 8 | 
 9 | * [elasticsearch-rails](https://github.com/elastic/elasticsearch-rails)
10 | 
11 | ## Examples
12 | 
13 | Add the high availability tasks to your Rake task file `lib/tasks/elasticsearch.rake`:
14 | 
15 | ```
16 | require 'elasticsearch/rails/ha/tasks'
17 | ```
18 | 
19 | Import all the Articles on a machine with 4 cores available:
20 | 
21 | ```
22 | % bundle exec rake environment elasticsearch:ha:import NPROCS=4 CLASS='Article'
23 | ```
24 | 
25 | Stage an index alongside your live index, but do not make it live yet:
26 | 
27 | ```
28 | % bundle exec rake environment elasticsearch:ha:stage NPROCS=4 CLASS='Article'
29 | ```
30 | 
31 | Promote your staged index:
32 | 
33 | ```
34 | % bundle exec rake environment elasticsearch:ha:promote
35 | ```
36 | 
37 | ## Acknowledgements
38 | 
39 | Thanks to [Pop Up Archive](http://popuparchive.com/) for
40 | contributing the [original version of this code](https://github.com/popuparchive/pop-up-archive/blob/master/lib/tasks/search.rake) to the public domain.
41 | 
42 | ## Public domain
43 | 
44 | This project is in the worldwide [public domain](LICENSE.md). As stated in [CONTRIBUTING](CONTRIBUTING.md):
45 | 
46 | > This project is in the public domain within the United States, and copyright and related rights in the work worldwide are waived through the [CC0 1.0 Universal public domain dedication](https://creativecommons.org/publicdomain/zero/1.0/).
47 | >
48 | > All contributions to this project will be released under the CC0
49 | > dedication. By submitting a pull request, you are agreeing to comply
50 | > with this waiver of copyright interest.
51 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler/gem_tasks'
2 | require 'rspec/core/rake_task'
3 | 
4 | RSpec::Core::RakeTask.new
5 | 
6 | task default: :spec
7 | task test: :spec
8 | 


--------------------------------------------------------------------------------
/elasticsearch-rails-ha.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path('../lib', __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require 'elasticsearch/rails/ha/version'
 5 | 
 6 | Gem::Specification.new do |s|
 7 |   s.name          = 'elasticsearch-rails-ha'
 8 |   s.version       = Elasticsearch::Rails::HA::VERSION
 9 |   s.authors       = ['Peter Karman']
10 |   s.email         = ['peter.karman@gsa.gov']
11 |   s.summary       = 'High Availability extensions to the Elasticsearch::Rails gem'
12 |   s.description   = (
13 |     'High Availability extensions to the Elasticsearch::Rails gem'
14 |   )
15 |   s.homepage      = 'https://github.com/18F/elasticsearch-rails-ha-gem'
16 |   s.license       = 'CC0'
17 | 
18 |   s.files         = `git ls-files -z *.md bin lib`.split("\x0") + [
19 |   ]
20 |   s.executables   = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
21 | 
22 |   s.add_runtime_dependency 'elasticsearch-model'
23 |   s.add_runtime_dependency 'elasticsearch-rails'
24 |   s.add_runtime_dependency 'elasticsearch-indexstager'
25 |   s.add_runtime_dependency 'ansi'
26 | 
27 |   s.required_ruby_version = ">= 2.0.0"
28 |   s.add_development_dependency 'about_yml'
29 |   s.add_development_dependency "bundler", "~> 1.3"
30 |   s.add_development_dependency "rake"
31 |   s.add_development_dependency "rspec"
32 | 
33 |   s.add_development_dependency "elasticsearch-extensions"
34 |   s.add_development_dependency "sqlite3"
35 |   s.add_development_dependency "rails",  ">= 3.1"
36 | end
37 | 


--------------------------------------------------------------------------------
/lib/elasticsearch/rails/ha.rb:
--------------------------------------------------------------------------------
1 | require_relative 'ha/version'
2 | require_relative 'ha/parallel_indexer'
3 | require_relative 'ha/index_stager'
4 | 


--------------------------------------------------------------------------------
/lib/elasticsearch/rails/ha/index_stager.rb:
--------------------------------------------------------------------------------
 1 | require 'elasticsearch/index_stager'
 2 | 
 3 | module Elasticsearch
 4 |   module Rails
 5 |     module HA
 6 |       class IndexStager < Elasticsearch::IndexStager
 7 |         attr_reader :klass, :live_index_name
 8 | 
 9 |         def initialize(klass)
10 |           @klass = klass.constantize
11 |           @index_name = @klass.index_name
12 |           @es_client = @klass.__elasticsearch__.client
13 |         end
14 | 
15 |         def stage_index_name
16 |           if klass.respond_to?(:stage_index_name)
17 |             klass.stage_index_name
18 |           else
19 |             index_name + "_staged"
20 |           end
21 |         end
22 |       end
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/elasticsearch/rails/ha/parallel_indexer.rb:
--------------------------------------------------------------------------------
  1 | require 'active_record/base'
  2 | require 'ansi'
  3 | require 'pp'
  4 | 
  5 | module Elasticsearch
  6 |   module Rails
  7 |     module HA
  8 |       class ParallelIndexer
  9 | 
 10 |         attr_reader :klass, :idx_name, :nprocs, :batch_size, :max, :force, :verbose, :scope
 11 | 
 12 |         # leverage multiple cores to run indexing in parallel
 13 |         def initialize(opts)
 14 |           @klass    = opts[:klass] or fail "klass required"
 15 |           @idx_name = opts[:idx_name] or fail "idx_name required"
 16 |           @nprocs   = opts[:nprocs] or fail "nprocs required"
 17 |           @batch_size = opts[:batch_size] or fail "batch_size required"
 18 |           @max        = opts[:max]
 19 |           @force      = opts[:force]
 20 |           @verbose    = opts[:verbose]
 21 |           @scope      = opts[:scope]
 22 | 
 23 |           # make sure klass is not a simple string
 24 |           if @klass.is_a?(String)
 25 |             @klass = @klass.constantize
 26 |           end
 27 | 
 28 |           # calculate array of offsets based on nprocs
 29 |           @total_expected = klass.count
 30 |           @pool_size = (@total_expected / @nprocs.to_f).ceil
 31 |         end
 32 | 
 33 |         def run
 34 |           return if @pool_size < 1
 35 | 
 36 |           # get all ids since we can't assume there are no holes in the PK sequencing
 37 |           ids = klass.order('id ASC').pluck(:id)
 38 |           offsets = []
 39 |           ids.each_slice(@pool_size) do |chunk|
 40 |             #puts "chunk: size=#{chunk.size} #{chunk.first}..#{chunk.last}"
 41 |             offsets.push( chunk.first )
 42 |           end
 43 |           if @verbose
 44 |             puts ::ANSI.blue{ "Parallel Indexer: index=#{@idx_name} total=#{@total_expected} nprocs=#{@nprocs} pool_size=#{@pool_size} offsets=#{offsets} " }
 45 |           end
 46 | 
 47 |           if @force
 48 |             @verbose and puts ::ANSI.blue{ "Force creating new index" }
 49 |             klass.__elasticsearch__.create_index! force: true, index: idx_name
 50 |             klass.__elasticsearch__.refresh_index! index: idx_name
 51 |           end
 52 | 
 53 |           @current_db_config = ::ActiveRecord::Base.connection_config
 54 |           # IMPORTANT before forks in offsets loop
 55 |           ::ActiveRecord::Base.connection.disconnect!
 56 | 
 57 |           child_pids = []
 58 |           offsets.each do |start_at|
 59 |             child_pid = fork do
 60 |               run_child(start_at)
 61 |             end
 62 |             if child_pid
 63 |               child_pids << child_pid
 64 |             end
 65 |           end
 66 | 
 67 |           # reconnect in parent
 68 |           ::ActiveRecord::Base.establish_connection(@current_db_config)
 69 | 
 70 |           # Process.waitall seems to hang during tests. Do it manually.
 71 |           child_results = []
 72 | 
 73 |           child_pids.each do |pid|
 74 |             Process.wait(pid)
 75 |             child_results.push [pid, $?]
 76 |           end
 77 | 
 78 |           process_child_results(child_results)
 79 |         end
 80 | 
 81 |         def process_child_results(results)
 82 |           # check exit status of each child so we know if we should throw exception
 83 |           results.each do |pair|
 84 |             pid = pair[0]
 85 |             pstat = pair[1]
 86 |             exit_ok = true
 87 |             if pstat.exited?
 88 |               @verbose and puts ::ANSI.blue{ "PID #{pid} exited with #{pstat.exitstatus}" }
 89 |             end
 90 |             if pstat.signaled?
 91 |               puts ::ANSI.red{ " >> #{pid} exited with uncaught signal #{pstat.termsig}" }
 92 |               exit_ok = false
 93 |             end
 94 | 
 95 |             if !pstat.success?
 96 |               puts ::ANSI.red{ " >> #{pid} was not successful" }
 97 |               exit_ok = false
 98 |             end
 99 | 
100 |             if pair[1].exitstatus != 0
101 |               puts ::ANSI.red{ " >> #{pid} exited with non-zero status" }
102 |               exit_ok = false
103 |             end
104 | 
105 |             if !exit_ok
106 |               raise ::ANSI.red{ "PID #{pair[0]} exited abnormally, so the whole reindex fails" }
107 |             end
108 |           end
109 |         end
110 | 
111 |         def run_child(start_at)
112 |           # IMPORTANT after fork
113 |           ::ActiveRecord::Base.establish_connection(@current_db_config)
114 | 
115 |           # IMPORTANT for tests to determine whether at_end should run
116 |           ENV["I_AM_HA_CHILD"] = "true"
117 | 
118 |           completed = 0
119 |           errors    = []
120 |           @verbose and puts ::ANSI.blue{ "Start worker #{$$} at offset #{start_at}" }
121 |           pbar = ::ANSI::Progressbar.new("#{klass} [#{$$}]", @pool_size, STDOUT) rescue nil
122 |           checkpoint = false
123 |           if pbar
124 |             win_width = pbar.__send__ :get_width
125 |             title_width = (win_width / 4).to_i
126 |             pbar.format("#{klass} [#{$$}]: %3d%% %s %s", :percentage, :bar, :stat)
127 |             pbar.__send__ :show
128 |             pbar.bar_mark = '='
129 |           else
130 |             checkpoint = true
131 |           end
132 | 
133 |           @klass.__elasticsearch__.import return: 'errors',
134 |             index: @idx_name,
135 |             start: start_at,
136 |             scope: @scope,
137 |             batch_size: @batch_size    do |resp|
138 |               # show errors immediately (rather than buffering them)
139 |               errors += resp['items'].select { |k, v| k.values.first['error'] }
140 |               completed += resp['items'].size
141 |               if pbar && @verbose
142 |                 pbar.inc resp['items'].size
143 |               end
144 |               if checkpoint && @verbose
145 |                 puts ::ANSI.blue{ "[#{$$}] #{Time.now.utc.iso8601} : #{completed} records completed" }
146 |               end
147 |               STDERR.flush
148 |               STDOUT.flush
149 |               if errors.size > 0
150 |                 STDOUT.puts "ERRORS in #{$$}:"
151 |                 STDOUT.puts errors.pretty_inspect
152 |               end
153 |               if completed >= @pool_size || (@max && @max.to_i == completed)
154 |                 pbar.finish if pbar
155 |                 @verbose and puts ::ANSI.blue{ "Worker #{$$} finished #{completed} records" }
156 |                 exit!(true) # exit child worker
157 |               end
158 |             end # end do |resp| block
159 |         end
160 | 
161 |       end
162 |     end
163 |   end
164 | end
165 | 


--------------------------------------------------------------------------------
/lib/elasticsearch/rails/ha/tasks.rb:
--------------------------------------------------------------------------------
 1 | # Rake tasks to make parallel indexing and high availability easier.
 2 | 
 3 | require 'elasticsearch/rails/ha'
 4 | 
 5 | namespace :elasticsearch do
 6 |   namespace :ha do
 7 | 
 8 |     desc "import records in parallel"
 9 |     task :import do
10 |       nprocs     = ENV['NPROCS'] || 1
11 |       batch_size = ENV['BATCH']  || 100
12 |       max        = ENV['MAX']    || nil
13 |       klass      = ENV['CLASS'] or fail "CLASS required"
14 | 
15 |       indexer = Elasticsearch::Rails::HA::ParallelIndexer.new(
16 |         klass: klass.constantize,
17 |         idx_name: (ENV['INDEX'] || klass.constantize.index_name),
18 |         nprocs: nprocs.to_i,
19 |         batch_size: batch_size.to_i,
20 |         max: max,
21 |         scope: ENV.fetch('SCOPE', nil),
22 |         force: ENV['FORCE'],
23 |         verbose: !ENV['QUIET']
24 |       )
25 | 
26 |       indexer.run
27 |     end
28 | 
29 |     desc "stage an index"
30 |     task :stage do
31 |       nprocs     = ENV['NPROCS'] || 1
32 |       batch_size = ENV['BATCH']  || 100
33 |       max        = ENV['MAX']    || nil
34 |       klass      = ENV['CLASS'] or fail "CLASS required"
35 | 
36 |       stager = Elasticsearch::Rails::HA::IndexStager.new(klass)
37 |       indexer = Elasticsearch::Rails::HA::ParallelIndexer.new(
38 |         klass: stager.klass,
39 |         idx_name: (ENV['INDEX'] || stager.tmp_index_name),
40 |         nprocs: nprocs.to_i,
41 |         batch_size: batch_size.to_i,
42 |         max: max,
43 |         scope: ENV.fetch('SCOPE', nil),
44 |         force: true,
45 |         verbose: !ENV['QUIET']
46 |       )
47 |       indexer.run
48 |       stager.alias_stage_to_tmp_index
49 |       puts "[#{Time.now.utc.iso8601}] #{klass} index staged as #{stager.stage_index_name}"
50 |     end
51 | 
52 |     desc "promote staged index to live"
53 |     task :promote do
54 |       klass = ENV['CLASS'] or fail "CLASS required"
55 |       stager = Elasticsearch::Rails::HA::IndexStager.new(klass)
56 |       stager.promote(ENV['INDEX'])
57 |       puts "[#{Time.now.utc.iso8601}] #{klass} promoted #{stager.stage_index_name} to #{stager.live_index_name}"
58 |     end
59 | 
60 |   end
61 | end
62 | 


--------------------------------------------------------------------------------
/lib/elasticsearch/rails/ha/version.rb:
--------------------------------------------------------------------------------
1 | module Elasticsearch
2 |   module Rails
3 |     module HA
4 |       VERSION = '1.0.9'
5 |     end
6 |   end
7 | end
8 | 


--------------------------------------------------------------------------------
/spec/es_helper.rb:
--------------------------------------------------------------------------------
 1 | require 'elasticsearch/extensions/test/cluster'
 2 | require 'elasticsearch/extensions/test/startup_shutdown'
 3 | 
 4 | class ESHelper
 5 |   def self.setup
 6 |     logger = ::Logger.new(STDERR)
 7 |     logger.formatter = lambda { |s, d, p, m| "#{m.ansi(:faint, :cyan)}\n" }
 8 |     ActiveRecord::Base.logger = logger unless ENV['QUIET']
 9 |     ActiveRecord::LogSubscriber.colorize_logging = false
10 |     ActiveRecord::Migration.verbose = false
11 |     tracer = ::Logger.new(STDERR)
12 |     tracer.formatter = lambda { |s, d, p, m| "#{m.gsub(/^.*$/) { |n| '   ' + n }.ansi(:faint)}\n" }
13 |     es_host = "localhost:#{(ENV['TEST_CLUSTER_PORT'] || 9250)}"
14 |     Elasticsearch::Model.client = Elasticsearch::Client.new host: es_host, tracer: (ENV['QUIET'] ? nil : tracer)
15 |   end
16 | 
17 |   def self.startup
18 |     unless ENV["ES_SKIP"] || Elasticsearch::Extensions::Test::Cluster.running?
19 |       Elasticsearch::Extensions::Test::Cluster.start(nodes: 1)
20 |     end
21 |   end
22 | 
23 |   def self.shutdown
24 |     unless ENV["I_AM_HA_CHILD"]
25 |       Elasticsearch::Extensions::Test::Cluster.stop if Elasticsearch::Extensions::Test::Cluster.running?
26 |     end
27 |   end
28 | 
29 |   def self.client
30 |     Elasticsearch::Model.client
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/spec/index_stager_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | require 'pp'
 3 | 
 4 | describe Elasticsearch::Rails::HA::IndexStager do
 5 | 
 6 |   after(:each) do
 7 |     ESHelper.client.indices.delete index: "articles_staged" rescue false
 8 |   end
 9 | 
10 |   it "generates index names" do
11 |     stager = Elasticsearch::Rails::HA::IndexStager.new('Article')
12 |     expect(stager.stage_index_name).to eq "articles_staged"
13 |     expect(stager.tmp_index_name).to match(/^articles_\d{14}-\w{8}$/)
14 |   end
15 | 
16 |   it "stages an index" do
17 |     stager = stage_index
18 |     aliases = ESHelper.client.indices.get_aliases(index: stager.stage_index_name)
19 |     expect(aliases.keys.size).to eq 1
20 |     expect(aliases.keys[0]).to eq stager.tmp_index_name
21 |   end
22 | 
23 |   it "promotes a staged index to live" do
24 |     stager = stage_index
25 |     stager.promote
26 |     Article.__elasticsearch__.refresh_index!
27 | 
28 |     response = Article.search('title:test')
29 |     expect(response.results.size).to eq 2
30 | 
31 |     aliases = ESHelper.client.indices.get_aliases(index: Article.index_name)
32 |     expect(aliases.keys[0]).to eq stager.tmp_index_name
33 |   end
34 | 
35 |   it "handles first-time migration to staged paradigm" do
36 |     live_indexer = Elasticsearch::Rails::HA::ParallelIndexer.new(
37 |       klass: 'Article',
38 |       idx_name: Article.index_name,
39 |       nprocs: 1,
40 |       batch_size: 5,
41 |       force: true,
42 |       verbose: !ENV['QUIET']
43 |     )
44 |     live_indexer.run
45 | 
46 |     stager = stage_index
47 |     stager.promote
48 |     Article.__elasticsearch__.refresh_index!
49 | 
50 |     aliases = ESHelper.client.indices.get_aliases(index: Article.index_name)
51 |     expect(aliases.keys[0]).to eq stager.tmp_index_name
52 |   end
53 | 
54 |   def stage_index
55 |     stager = Elasticsearch::Rails::HA::IndexStager.new('Article')
56 |     indexer = Elasticsearch::Rails::HA::ParallelIndexer.new(
57 |       klass: stager.klass,
58 |       idx_name: stager.tmp_index_name,
59 |       nprocs: 1,
60 |       batch_size: 5,
61 |       force: true,
62 |       verbose: !ENV['QUIET']
63 |     )
64 |     indexer.run
65 |     stager.alias_stage_to_tmp_index
66 |     stager
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/spec/parallel_indexer_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'spec_helper'
 2 | 
 3 | describe Elasticsearch::Rails::HA::ParallelIndexer do
 4 |   it "creates index using parallel indexers" do
 5 |     indexer = Elasticsearch::Rails::HA::ParallelIndexer.new(
 6 |       klass: Article,
 7 |       idx_name: Article.index_name,
 8 |       nprocs: 2,
 9 |       batch_size: 2,
10 |       verbose: !ENV["QUIET"]
11 |     )
12 |     Article.__elasticsearch__.create_index! force: true
13 |     indexer.run
14 |     Article.__elasticsearch__.refresh_index!
15 |     response = Article.search('title:test')
16 | 
17 |     expect(response.results.size).to eq 2
18 |   end
19 | end
20 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | require 'ansi'
 2 | #require 'oj'
 3 | 
 4 | require 'rails/version'
 5 | require 'active_record'
 6 | require 'active_model'
 7 | 
 8 | require 'elasticsearch/model'
 9 | require 'elasticsearch/rails'
10 | require 'elasticsearch/rails/ha'
11 | 
12 | require 'temp_db_helper'
13 | require 'es_helper'
14 | 
15 | RSpec.configure do |config|
16 |   #config.profile_examples = 10
17 | 
18 |   config.order = :random
19 | 
20 |   Kernel.srand config.seed
21 | 
22 |   config.before(:suite) do
23 |     ESHelper.setup
24 |     ESHelper.startup
25 |   end
26 | 
27 |   config.after(:suite) do
28 |     ESHelper.shutdown
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/spec/temp_db_helper.rb:
--------------------------------------------------------------------------------
 1 | require 'tempfile'
 2 | 
 3 | # stub so we can setup schemas below
 4 | class Article < ActiveRecord::Base
 5 | end
 6 | 
 7 | class TempDBHelper
 8 |   @@_db_file = nil
 9 | 
10 |   def self.setup
11 |     if @@_db_file
12 |       refresh_db
13 |     end
14 |     setup_schemas
15 |     seed_data
16 |   end
17 | 
18 |   def self.quiet
19 |     ENV['QUIET']
20 |   end
21 | 
22 |   def self.db_file
23 |     @@_db_file ||= Tempfile.new('elasticsearch-rails-ha-test.db')
24 |   end
25 | 
26 |   def self.refresh_db
27 |     quiet or puts "Removing temp db file at #{db_file.path}"
28 |     db_file.close!
29 |     db_file.unlink
30 |     @@_db_file = nil
31 |     open_connection
32 |   end
33 | 
34 |   def self.open_connection
35 |     quiet or puts "Opening db connection to #{db_file.path}"
36 |     ActiveRecord::Base.establish_connection( :adapter => 'sqlite3', :database => db_file.path )
37 |   end
38 | 
39 |   def self.setup_schemas
40 |     open_connection
41 |     Article.connection.create_table :articles do |t|
42 |       t.string   :title
43 |       t.string   :body
44 |       t.datetime :created_at, :default => 'NOW()'
45 |     end
46 |   end
47 | 
48 |   def self.seed_data
49 |     Article.delete_all
50 |     Article.create! title: 'Test',           body: ''
51 |     Article.create! title: 'Testing Coding', body: ''
52 |     Article.create! title: 'Coding',         body: ''
53 |   end
54 | end
55 | 
56 | TempDBHelper.setup
57 | 
58 | # extend class with ES definitions -- must do this after setup
59 | #ActiveRecord::Base.raise_in_transactional_callbacks = true
60 | class Article < ActiveRecord::Base
61 |   include Elasticsearch::Model
62 |   include Elasticsearch::Model::Callbacks
63 |   settings index: { number_of_shards: 1, number_of_replicas: 0 } do
64 |     mapping do
65 |       indexes :title,      type: 'string', analyzer: 'snowball'
66 |       indexes :body,       type: 'string'
67 |       indexes :created_at, type: 'date'
68 |     end
69 |   end
70 | end
71 | 
72 | 


--------------------------------------------------------------------------------