├── .gitignore
├── bin
    └── eschema
├── examples
    └── db
    │   └── es
    │       └── schema
    │           ├── default.analysis.rb
    │           └── articles.schema.rb
├── lib
    ├── elastic-schema
    │   ├── schema
    │   │   ├── type.rb
    │   │   ├── settings.rb
    │   │   ├── index.rb
    │   │   ├── fields_set.rb
    │   │   ├── mappings.rb
    │   │   ├── definition.rb
    │   │   ├── analysis.rb
    │   │   ├── field.rb
    │   │   └── migration.rb
    │   ├── command.rb
    │   └── cli.rb
    ├── elastic-schema.rb
    └── utils
    │   └── hash.rb
├── elastic-schema.gemspec
├── License.txt
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | Gemfile.lock
3 | .bundle
4 | bundle
5 | 


--------------------------------------------------------------------------------
/bin/eschema:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | 
3 | require 'elastic-schema'
4 | ElasticSchema::CLI.new(ARGV).run!
5 | 


--------------------------------------------------------------------------------
/examples/db/es/schema/default.analysis.rb:
--------------------------------------------------------------------------------
 1 | ElasticSchema::Schema::Analysis.new do
 2 |   name :default
 3 | 
 4 |   filter :word_filter, { type: :word_delimiter }
 5 |   analyzer :lowcase_word_delimiter, {
 6 |     type:      :custom,
 7 |     tokenizer: :standard,
 8 |     filter:    %i(lowercase asciifolding word_filter)
 9 |   }
10 | end
11 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/type.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 |   class Type
 3 |     attr_reader :name, :mappings, :fields
 4 | 
 5 |     def initialize(name, mappings, &block)
 6 |       @name     = name
 7 |       @mappings = mappings
 8 |       instance_eval(&block)
 9 |     end
10 | 
11 |     def field(field_name, field_type = nil, opts = {}, &block)
12 |       fields << Field.new(field_name, field_type, opts, &block)
13 |     end
14 | 
15 |     def fields
16 |       @fields ||= FieldsSet.new(self)
17 |     end
18 | 
19 |     def parent
20 |       mappings
21 |     end
22 | 
23 |     def to_hash
24 |       { name.to_s => fields.to_hash }
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/settings.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 |   class Settings
 3 |     attr_accessor :analysis
 4 |     attr_reader   :index
 5 | 
 6 |     def initialize(index, opts = {})
 7 |       @index = index
 8 |       opts.deep_stringify_keys!
 9 | 
10 |       %w(analysis).each do |attr|
11 |         send(:"#{attr}=", opts[attr]) if opts.has_key?(attr)
12 |       end
13 |     end
14 | 
15 |     def to_hash
16 |       main_hash = {}
17 | 
18 |       if analysis && (analysis_hash = Analysis.analysis_for(analysis)).any?
19 |         main_hash.update("analysis" => analysis_hash)
20 |       end
21 | 
22 |       main_hash.any? ? { "settings" => { "index" => main_hash } } : {}
23 |     end
24 |   end
25 | end
26 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/index.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 |   class Index
 3 |     attr_reader :name, :definition, :mappings, :settings
 4 | 
 5 |     def initialize(name, definition)
 6 |       @name       = name
 7 |       @definition = definition
 8 |     end
 9 | 
10 |     def analysis(name)
11 |       @settings ||= Settings.new(self, analysis: name)
12 |     end
13 | 
14 |     def type(type_name, &block)
15 |       @mappings ||= Mappings.new(self)
16 |       mappings.type(type_name, &block)
17 |     end
18 | 
19 |     def to_hash
20 |       main_hash = {}
21 |       main_hash.update(mappings.to_hash) if mappings
22 |       main_hash.update(settings.to_hash) if settings && settings.to_hash.any?
23 |       { name.to_s => main_hash }
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/lib/elastic-schema.rb:
--------------------------------------------------------------------------------
 1 | require 'elasticsearch'
 2 | require 'utils/hash'
 3 | require 'active_support/core_ext/hash'
 4 | 
 5 | module ElasticSchema
 6 |   autoload :CLI,     'elastic-schema/cli'
 7 |   autoload :Command, 'elastic-schema/command'
 8 | 
 9 |   module Schema
10 |     autoload :Definition, 'elastic-schema/schema/definition'
11 |     autoload :Migration,  'elastic-schema/schema/migration'
12 |     autoload :Field,      'elastic-schema/schema/field'
13 |     autoload :FieldsSet,  'elastic-schema/schema/fields_set'
14 |     autoload :Mappings,   'elastic-schema/schema/mappings'
15 |     autoload :Type,       'elastic-schema/schema/type'
16 |     autoload :Settings,   'elastic-schema/schema/settings'
17 |     autoload :Analysis,   'elastic-schema/schema/analysis'
18 |     autoload :Index,      'elastic-schema/schema/index'
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/fields_set.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 | 
 3 |   class FieldsSet
 4 | 
 5 |     FieldAlreadyDefined = Class.new(StandardError)
 6 | 
 7 |     attr_reader :fields, :parent
 8 | 
 9 |     def initialize(parent)
10 |       @fields = []
11 |       @parent = parent
12 |     end
13 | 
14 |     def << field
15 |       field.parent = self
16 |       fail FieldAlreadyDefined.new("'#{field.full_name}' already exists.") if find(field.name)
17 |       fields << field
18 |     end
19 | 
20 |     def find(field_name)
21 |       fields.bsearch { |field| field.name == field_name }
22 |     end
23 | 
24 |     def full_name
25 |       parent.full_name
26 |     end
27 | 
28 |     def to_hash
29 |       return {} if fields.empty?
30 |       { 'properties' => fields.inject({}) { |_fields, field| _fields.update(field.to_hash) } }
31 |     end
32 |   end
33 | 
34 | end
35 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/mappings.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 | 
 3 |   class Mappings
 4 |     TypeAlreadyDefined = Class.new(StandardError)
 5 | 
 6 |     attr_reader :index, :types
 7 | 
 8 |     def initialize(index)
 9 |       @index = index
10 |       @types = {}
11 |     end
12 | 
13 |     def type(name, &block)
14 |       name = name.to_s
15 | 
16 |       if types.has_key?(name)
17 |         fail TypeAlreadyDefined.new("There is already a schema defined for type '#{name}' in index '#{name}'.")
18 |       end
19 | 
20 |       @types[name] = Type.new(name, self, &block)
21 |     end
22 | 
23 |     def parent
24 |       index
25 |     end
26 | 
27 |     def full_name
28 |       parent.name
29 |     end
30 | 
31 |     def to_hash
32 |       types_hash = types.inject({}) { |_types_hash, (_, type)| _types_hash.update(type.to_hash) }
33 |       { "mappings" => types_hash }
34 |     end
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/examples/db/es/schema/articles.schema.rb:
--------------------------------------------------------------------------------
 1 | ElasticSchema::Schema::Definition.new do
 2 |   index    :articles
 3 |   analysis :default
 4 | 
 5 |   type :article do
 6 |     field :title, :string, analyzer: :lowcase_word_delimiter
 7 |     field :content, :string, analyzer: :lowcase_word_delimiter
 8 |     field :author do
 9 |       field :name do
10 |         field :first_name, :string
11 |         field :last_name, :string
12 |       end
13 |       field :email, :string, index: :not_analyzed
14 |     end
15 |     field :indexed_at, :date, index: :not_analyzed
16 |   end
17 | 
18 |   type :comment do
19 |     field :article_id, :integer
20 |     field :content, :string, analyzer: :lowcase_word_delimiter
21 |     field :author do
22 |       field :name do
23 |         field :first_name, :string
24 |         field :last_name, :string
25 |       end
26 |       field :email, :string, index: :not_analyzed
27 |     end
28 |     field :indexed_at, :date, index: :not_analyzed
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/elastic-schema.gemspec:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | 
 3 | $LOAD_PATH.unshift File.expand_path("../lib", __FILE__)
 4 | 
 5 | Gem::Specification.new do |s|
 6 |   s.name        = "elastic-schema"
 7 |   s.version     = '0.2.9'
 8 |   s.platform    = Gem::Platform::RUBY
 9 |   s.license     = "MIT"
10 |   s.authors     = ["Leandro Camargo"]
11 |   s.email       = "leandroico@gmail.com"
12 |   s.homepage    = "http://github.com/leandro/elastic-schema"
13 |   s.summary     = "Elasticsearch schema manager for Ruby"
14 |   s.description = "A stateful way to approach Elasticsearch document mappings and data migrations"
15 | 
16 |   s.required_ruby_version = '>= 2.0'
17 | 
18 |   s.add_dependency 'elasticsearch-api'
19 |   s.add_dependency 'activesupport'
20 | 
21 |   s.files            = `git ls-files -- lib/*`.split("\n")
22 |   s.files           += ["License.txt"]
23 |   s.executables      = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
24 |   s.extra_rdoc_files = [ "README.md" ]
25 |   s.rdoc_options     = ["--charset=UTF-8"]
26 |   s.require_path     = "lib"
27 | end
28 | 


--------------------------------------------------------------------------------
/License.txt:
--------------------------------------------------------------------------------
 1 | (The MIT License)
 2 | 
 3 | Copyright (c) 2015 Leandro Camargo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/definition.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 | 
 3 |   class Definition
 4 | 
 5 |     SchemaConflict = Class.new(StandardError)
 6 |     NoIndexDefined = Class.new(StandardError)
 7 | 
 8 |     @@definitions = {}
 9 | 
10 |     def initialize(&block)
11 |       instance_eval(&block)
12 | 
13 |       if @@definitions[definition_id]
14 |         fail SchemaConflict.new("There is already a schema definition for #{definition_id}")
15 |       end
16 | 
17 |       @@definitions[definition_id] = self
18 |     end
19 | 
20 |     def analysis(name)
21 |       fail NoIndexDefined.new("There is not index defined yet.") if index.nil?
22 |       index.analysis(name)
23 |     end
24 | 
25 |     def type(name, &block)
26 |       fail NoIndexDefined.new("There is not index defined yet.") if index.nil?
27 |       index.type(name, &block)
28 |     end
29 | 
30 |     def index(name = nil)
31 |       return @index if name.nil?
32 |       @index = Index.new(name, self)
33 |     end
34 | 
35 |     def to_hash
36 |       index.to_hash
37 |     end
38 | 
39 |     def self.definitions
40 |       @@definitions
41 |     end
42 | 
43 |     private
44 | 
45 |     def definition_id
46 |       index.name.to_s
47 |     end
48 |   end
49 | 
50 | end
51 | 


--------------------------------------------------------------------------------
/lib/utils/hash.rb:
--------------------------------------------------------------------------------
 1 | class Hash
 2 |   def deep_slice(*keys)
 3 |     keys.inject({}) do |new_hash, key|
 4 |       if key.is_a?(Array)
 5 |         inner_hash = new_hash.include?(key.first) ? new_hash[key.first] : {}
 6 |         inner_keys = key[1..-1]
 7 |         inner_keys = inner_keys.first if inner_keys.size == 1
 8 |         next new_hash unless self[key.first].is_a?(Hash)
 9 |         inner_hash.deep_merge!(self[key.first].deep_slice(inner_keys))
10 |         new_hash.update(key.first => inner_hash)
11 |       else
12 |         next new_hash unless self.has_key?(key)
13 |         new_hash.update(key => self[key])
14 |       end
15 |     end
16 |   end
17 | 
18 |   def deep_transform_values(&block)
19 |     return enum_for(:deep_transform_values) unless block_given?
20 | 
21 |     inject(self.class.new) do |memo, (key, value)|
22 |       value = value.is_a?(Hash) ? value.deep_transform_values(&block) : yield(value)
23 |       memo.update(key => value)
24 |     end
25 |   end
26 | 
27 |   def deep_transform_values!(&block)
28 |     return enum_for(:deep_transform_values) unless block_given?
29 | 
30 |     inject(self) do |memo, (key, value)|
31 |       value = value.is_a?(Hash) ? value.deep_transform_values(&block) : yield(value)
32 |       memo.update(key => value)
33 |     end
34 |   end
35 | end
36 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/analysis.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 |   class Analysis
 3 |     @@filters   = {}
 4 |     @@analyzers = {}
 5 | 
 6 |     def initialize(&block)
 7 |       instance_eval(&block)
 8 |     end
 9 | 
10 |     def name(name)
11 |       @name = name.to_s
12 |     end
13 | 
14 |     def filter(name, opts = {})
15 |       set_name                       = @name || 'global'
16 |       @@filters[set_name]          ||= {}
17 |       @@filters[set_name][name.to_s] = stringfy_symbols(opts)
18 |     end
19 | 
20 |     def analyzer(name, opts)
21 |       set_name                         = @name || 'global'
22 |       @@analyzers[set_name]          ||= {}
23 |       @@analyzers[set_name][name.to_s] = stringfy_symbols(opts)
24 |     end
25 | 
26 |     def self.analysis_for(name = nil)
27 |       name          = name ? name.to_s : 'global'
28 |       analysis_hash = {}
29 | 
30 |       analysis_hash.update("filter" => @@filters[name]) if @@filters.has_key?(name)
31 |       analysis_hash.update("analyzer" => @@analyzers[name]) if @@analyzers.has_key?(name)
32 |       analysis_hash
33 |     end
34 | 
35 |     private
36 | 
37 |     def stringfy_symbols(hash)
38 |       hash.inject({}) do |_hash, (key, value)|
39 |         value = value.is_a?(Array) ? value.map { |item| item.to_s } : value.to_s
40 |         _hash.update(key.to_s => value)
41 |       end
42 |     end
43 |   end
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/command.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema
 2 | 
 3 |   class Command
 4 | 
 5 |     attr_reader :client, :root, :schema_dir, :options, :schema_file, :analysis_file,
 6 |                 :bulk_size
 7 | 
 8 |     def initialize(options)
 9 |       @options       = options
10 |       @client        = Elasticsearch::Client.new(host: options[:host])
11 |       @root          = File.expand_path(options[:root])
12 |       @schema_dir    = File.join(@root, options[:schema_dir]) if options[:schema_dir]
13 |       @schema_file   = File.join(@root, options[:schema_file]) if options[:schema_file]
14 |       @analysis_file = File.join(@root, options[:analysis_file]) if options[:analysis_file]
15 |       @bulk_size     = options[:bulk_size]
16 |     end
17 | 
18 |     def run(command)
19 |       send(command)
20 |     end
21 | 
22 |     private
23 | 
24 |     # Creates the indices/types and raise an exception if the any of the indices/types already exists
25 |     def create
26 |       opts = { client: client, analysis_files: analysis_files, schema_files: schema_files }
27 |       opts.update(bulk_size: bulk_size) if bulk_size
28 |       Schema::Migration.new(opts).load_definitions.run
29 |     end
30 | 
31 |     def schema_files
32 |       (schema_dir ? Dir[schema_pattern] : [schema_file]).compact
33 |     end
34 | 
35 |     def analysis_files
36 |       (schema_dir ? Dir[analysis_pattern] : [analysis_file]).compact
37 |     end
38 | 
39 |     def schema_pattern
40 |       File.join(schema_dir, '*.schema.rb')
41 |     end
42 | 
43 |     def analysis_pattern
44 |       File.join(schema_dir, '{analysis.rb,*.analysis.rb}')
45 |     end
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/field.rb:
--------------------------------------------------------------------------------
 1 | module ElasticSchema::Schema
 2 | 
 3 |   class Field
 4 |     attr_accessor :parent
 5 |     attr_reader :name, :type, :children, :attributes
 6 | 
 7 |     def initialize(field_name, field_type = nil, attrs, &block)
 8 |       @name       = field_name.to_s
 9 |       @parent     = attrs.delete(:parent)
10 |       @children   = FieldsSet.new(self)
11 |       @attributes = normalize_attributes(attrs)
12 |       field_type  = (block_given? ? 'object' : 'string') if field_type.nil?
13 |       @type       = field_type.to_s
14 | 
15 |       filter_attributes_for_special_cases
16 |       instance_eval(&block) if block_given?
17 |     end
18 | 
19 |     def field(field_name, field_type = nil, opts = {}, &block)
20 |       children << Field.new(field_name, field_type, opts, &block)
21 |     end
22 | 
23 |     def full_name
24 |       "#{parent.full_name}.#{name}"
25 |     end
26 | 
27 |     def to_hash
28 |       attrs = type == 'object' ? {} : { 'type' => type }
29 |       { name => attrs.merge(attributes).merge(children.to_hash) }
30 |     end
31 | 
32 |     private
33 | 
34 |     def normalize_attributes(attrs)
35 |       value_converter = ->(v) { [TrueClass, FalseClass, NilClass].include?(v.class) ? v : v.to_s }
36 |       attrs.deep_stringify_keys.deep_transform_values(&value_converter)
37 |     end
38 | 
39 |     def filter_attributes_for_special_cases
40 |       case type
41 |       when 'date'
42 |         attributes.update('format' => 'dateOptionalTime') if type == 'date'
43 |         attributes.delete('index')
44 |       when *%w(integer long float double boolean null)
45 |         attributes.delete('index')
46 |         attributes.delete('analyzer')
47 |       when 'attachment'
48 |         @attributes = default_attachment_attributes.deep_merge(attributes)
49 |       end
50 |     end
51 | 
52 |     def default_attachment_attributes
53 |       {
54 |         "fields" => {
55 |           "file"           => { "type" => "string" },
56 |           "author"         => { "type" => "string" },
57 |           "title"          => { "type" => "string" },
58 |           "name"           => { "type" => "string" },
59 |           "date"           => { "type" => "date", "format" => "dateOptionalTime" },
60 |           "keywords"       => { "type" => "string" },
61 |           "content_type"   => { "type" => "string" },
62 |           "content_length" => { "type" => "integer" },
63 |           "language"       => { "type" => "string" }
64 |         }
65 |       }
66 |     end
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/cli.rb:
--------------------------------------------------------------------------------
 1 | require 'optparse'
 2 | 
 3 | module ElasticSchema
 4 | 
 5 |   class CLI
 6 | 
 7 |     COMMANDS = %w(update check create drop recreate)
 8 | 
 9 |     def self.commands
10 |       COMMANDS
11 |     end
12 | 
13 |     def initialize(argv)
14 |       @argv = argv
15 | 
16 |       # Default options values
17 |       @options = {
18 |         root: Dir.pwd,
19 |         host: '127.0.0.1:9200'
20 |       }
21 | 
22 |       parse!
23 |     end
24 | 
25 |     def parser
26 |       @parser ||= OptionParser.new do |opts|
27 |         opts.banner = "Usage: eschema [options] #{self.class.commands.join('|')}"
28 | 
29 |         opts.separator ""
30 |         opts.separator "Setting options:"
31 | 
32 |         opts.on("-a", "--analysis_file FILE",
33 |                 "define the analysis file to be used (overwritten by -d)") { |analysis_file| @options[:analysis_file] = analysis_file }
34 | 
35 |         opts.on("-b", "--bulk_size SIZE",
36 |                 "set documents bulk size for each bulk iteration performed during documents migration (default: 1000)") { |bulk_size| @options[:bulk_size] = bulk_size }
37 | 
38 |         opts.on("-d", "--schema_dir DIR",
39 |                 "set directory where schema and analysis files are") { |schema_dir| @options[:schema_dir] = schema_dir }
40 | 
41 |         opts.on("-f", "--schema_file FILE",
42 |                 "define one fingle schema file to be used (overwritten by -d)") { |schema_file| @options[:schema_file] = schema_file }
43 | 
44 |         opts.on("-h", "--host HOST",
45 |                 "set address:port to connect to Elasticsearch (default: #{@options[:host]})") { |host| @options[:host] = host }
46 | 
47 |         opts.on("-r", "--root PATH",
48 |                 "set app root directory (default: #{@options[:root]})") { |root| @options[:root] = root }
49 | 
50 |       end
51 |     end
52 | 
53 |     # Parse the options.
54 |     def parse!
55 |       parser.parse! @argv
56 |       @command   = @argv.shift
57 |       @arguments = @argv
58 |     end
59 | 
60 |     # Parse the current shell arguments and run the command.
61 |     # Exits on error.
62 |     def run!
63 |       if self.class.commands.include?(@command)
64 |         run_command
65 |       elsif @command.nil?
66 |         puts "Command required"
67 |         puts @parser
68 |         exit 1
69 |       else
70 |         abort "Unknown command: #{@command}. Available commands: #{self.class.commands.join(', ')}"
71 |       end
72 |     end
73 | 
74 |     def run_command
75 |       Command.new(@options).run(@command)
76 |     end
77 |   end
78 | end
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Description
  2 | 
  3 | A declarative way to approach Elasticsearch document mappings and data migrations/reindexing.
  4 | 
  5 | The idea is to provide an easy and versionable way to register the mappings of your Elasticsearch indices and types.
  6 | Once any of the mappings and/or settings suffers any change by a developer, this tool kit will provide you means to keep your running elastic search server up-to-date regarding the recent changes.
  7 | 
  8 | The default strategy adopted by this tool is to create a new index with temporary name in order to create a whole new mapping that reflects the up-to-date mapping in the codebase. Once it's done it'll try (by default) to reindex all the data present in old index to the new one and once it's done it'll remove the old index and rename the new one.
  9 | 
 10 | ## Usage
 11 | 
 12 | Go to your Ruby project where you Gemfile is located:
 13 | 
 14 | ```shell
 15 | $ cd ~/projects/my-ruby-project
 16 | $ vim Gemfile
 17 | ```
 18 | 
 19 | Add the following line to your Gemfile
 20 | 
 21 | ```ruby
 22 | gem "elastic-schema", :git => "git://github.com/leandro/elastic-schema.git"
 23 | ```
 24 | 
 25 | Choose a directory where you're going to put your Elasticsearch schemas. Or create one for yourself:
 26 | 
 27 | ```shell
 28 | $ mkdir -p db/es/
 29 | ```
 30 | 
 31 | In order to see a working example, create the following file in the given your chosen directory:
 32 | 
 33 | ```shell
 34 | vim ./db/es/default.analysis.rb
 35 | ```
 36 | 
 37 | ```ruby
 38 | ElasticSchema::Schema::Analysis.new do
 39 |   name :default
 40 | 
 41 |   filter :word_filter, { type: :word_delimiter }
 42 |   analyzer :lowcase_word_delimiter, {
 43 |     type:      :custom,
 44 |     tokenizer: :standard,
 45 |     filter:    %i(lowercase asciifolding word_filter)
 46 |   }
 47 | end
 48 | ```
 49 | 
 50 | And also:
 51 | 
 52 | ```shell
 53 | vim ./db/es/articles.schema.rb
 54 | ```
 55 | 
 56 | ```ruby
 57 | ElasticSchema::Schema::Definition.new do
 58 |   index    :articles
 59 |   analysis :default
 60 | 
 61 |   type :article do
 62 |     field :title, :string, analyzer: :lowcase_word_delimiter
 63 |     field :content, :string, analyzer: :lowcase_word_delimiter
 64 |     field :author do
 65 |       field :name do
 66 |         field :first_name, :string
 67 |         field :last_name, :string
 68 |       end
 69 |       field :email, :string, index: :not_analyzed
 70 |     end
 71 |     field :indexed_at, :date, index: :not_analyzed
 72 |   end
 73 | 
 74 |   type :comment do
 75 |     field :article_id, :integer
 76 |     field :content, :string, analyzer: :lowcase_word_delimiter
 77 |     field :author do
 78 |       field :name do
 79 |         field :first_name, :string
 80 |         field :last_name, :string
 81 |       end
 82 |       field :email, :string, index: :not_analyzed
 83 |     end
 84 |     field :indexed_at, :date, index: :not_analyzed
 85 |   end
 86 | end
 87 | ```
 88 | 
 89 | Then, run bundle install in your app root directory and run:
 90 | 
 91 | ```shell
 92 | $ bundle exec eschema -h 127.0.0.1:9200 -d db/es/ create
 93 | Initiating schema updates: 1 out of 1 will be updated.
 94 | Creating index 'articles_v1436452769'
 95 | Creating type 'article' in index 'articles_v1436452769'
 96 | Creating type 'comment' in index 'articles_v1436452769'
 97 | Creating alias 'articles' to index 'articles_v1436452769'
 98 | ```
 99 | 
100 | And lets say you have some documents inside the index later on:
101 | 
102 | ```shell
103 | curl -XPUT http://127.0.0.1:9200/articles/article/1 -d '{"title": "Article A", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}'
104 | curl -XPUT http://127.0.0.1:9200/articles/article/2 -d '{"title": "Article B", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}'
105 | curl -XPUT http://127.0.0.1:9200/articles/article/3 -d '{"title": "Article C", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}'
106 | curl -XPUT http://127.0.0.1:9200/articles/comment/1 -d '{"article_id": 1, "content": "First comment.", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}'
107 | curl -XPUT http://127.0.0.1:9200/articles/comment/2 -d '{"article_id": 1, "content": "Second comment.", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}'
108 | ```
109 | 
110 | Now, for instance, you change the analyzer for the 'content' field in your 'comment' type schema:
111 | 
112 | ```ruby
113 | # ...
114 | field :content, :string, analyzer: :snowball
115 | # ...
116 | ```
117 | 
118 | And then runs again the command. And you'll have this nice output:
119 | 
120 | ```shell
121 | $ bundle exec eschema -h 127.0.0.1:9200 -d db/es/ create
122 | Initiating schema updates: 1 out of 1 will be updated.
123 | Creating index 'articles_v1436453128'
124 | Creating type 'article' in index 'articles_v1436453128'
125 | Creating type 'comment' in index 'articles_v1436453128'
126 | Migrating 3 documents from type 'article' in index 'articles' to index 'articles_v1436453128'
127 | Migrating 2 documents from type 'comment' in index 'articles' to index 'articles_v1436453128'
128 | Creating alias 'articles' to index 'articles_v1436453128'
129 | Deleting index 'articles_v1436452769'
130 | ```
131 | 
132 | In case you want to specify one schema file at a time you can use `-f` (for schema file) and `-a` (for analysis file) instead of using `-d`. For further information just run:
133 | 
134 | ```shell
135 | bundle exec eschema --help
136 | ```
137 | 
138 | ## Important observations
139 | 
140 | * All index schema you create, its file name must match the '*.schema.rb' pattern.
141 | * The same goes for analysis settings, where it must match the '*.analysis.rb' or just naming it as 'analysis.rb' will also work.
142 | * If you have indices with multiple types in it, make sure your index schema definition has **all the types definitions** in it, otherwise the missing types will be lost during the migration, given in most cases a new index will be created and the old one will be deleted.
143 | 
144 | ## Missing parts
145 | 
146 | * Allow to make old index deletion to be something optional
147 | * Handle multi-tenant indices arrangements
148 | * Add tests
149 | 
150 | ## Contribute
151 | 
152 | If you want to contribute, please fork this project, make the changes and create a Pull Request mentioning me.
153 | 


--------------------------------------------------------------------------------
/lib/elastic-schema/schema/migration.rb:
--------------------------------------------------------------------------------
  1 | module ElasticSchema::Schema
  2 | 
  3 |   class Migration
  4 | 
  5 |     BULK_SIZE = 1000
  6 | 
  7 |     attr_reader :schema_files, :client, :actual_schemas, :timestamp, :analysis_files,
  8 |                 :bulk_size
  9 | 
 10 |     def initialize(options)
 11 |       @client         = options[:client]
 12 |       @analysis_files = options[:analysis_files]
 13 |       @schema_files   = options[:schema_files]
 14 |       @bulk_size      = (options[:bulk_size] || BULK_SIZE).to_i
 15 |       @actual_schemas = {}
 16 |       @timestamp      = Time.new.to_i
 17 |     end
 18 | 
 19 |     def load_definitions
 20 |       analysis_files.each { |schema_file| require schema_file }
 21 |       schema_files.each { |schema_file| require schema_file }
 22 |       self
 23 |     end
 24 | 
 25 |     def run
 26 |       schemas_to_update = types_to_update
 27 |       total_schemas     = schemas.size
 28 |       needs_update      = schemas_to_update.size
 29 | 
 30 |       if total_schemas > 0
 31 |         if needs_update < 1
 32 |           puts "Woo-hoo! Everything is already up-to-date!"
 33 |         else
 34 |           puts "Initiating schema updates: #{needs_update} out of #{total_schemas} will be updated."
 35 |         end
 36 |       else
 37 |         puts "There are no schemas to be processed in the provided directory."
 38 |       end
 39 | 
 40 |       create_or_update_indices(schemas_to_update)
 41 |     end
 42 | 
 43 |     private
 44 | 
 45 |     def create_or_update_indices(selected_schemas)
 46 |       selected_schemas.each do |index_name, schema|
 47 |         index_body = schema.index.to_hash.values.first
 48 | 
 49 |         if index_exists?(index_name)
 50 |           if must_create_new_index?(schema, index_name)
 51 |             migrate_data(index_name, index_body)
 52 |           else
 53 |             types = updatable_or_creatable_types(schema, index_name)
 54 |             create_or_update_types(schema, types)
 55 |           end
 56 |         else
 57 |           new_index = new_index_name(index_name)
 58 |           create_index(new_index, index_body)
 59 |           alias_index(new_index, index_name)
 60 |         end
 61 |       end
 62 |     end
 63 | 
 64 |     def create_or_update_types(schema, types)
 65 |       mappings = schema.index.mappings.to_hash['mappings']
 66 | 
 67 |       types.each do |type|
 68 |         mapping = mappings[type]
 69 |         put_mapping(schema.index.name, type, { type => mapping })
 70 |       end
 71 |     end
 72 | 
 73 |     # Migrates data from index/type to a new index/type and create an alias to it
 74 |     def migrate_data(index_name, index_body)
 75 |       new_index = new_index_name(index_name)
 76 |       create_index(new_index, index_body)
 77 |       copy_all_documents_between_indices(index_name, new_index)
 78 |       delete_index_with_same_name_as_alias(index_name)
 79 |       alias_index(new_index, index_name)
 80 |       delete_older_indices(index_name)
 81 |     end
 82 | 
 83 |     def alias_index(index, alias_name)
 84 |       puts "Creating alias '#{alias_name}' to index '#{index}'"
 85 |       client.indices.put_alias(index: index, name: alias_name)
 86 |     end
 87 | 
 88 |     def delete_older_indices(alias_name)
 89 |       older_indices = indices_from_alias(alias_name).keys - [new_index_name(alias_name)]
 90 |       older_indices.each { |index| delete_index(index) if index_exists?(index) }
 91 |     end
 92 | 
 93 |     def delete_index_with_same_name_as_alias(alias_name)
 94 |       delete_index(alias_name) if !alias_exists?(alias_name) && index_exists?(alias_name)
 95 |     end
 96 | 
 97 |     def copy_all_documents_between_indices(old_index, new_index)
 98 |       types = actual_schemas[old_index].values.first['mappings'].keys
 99 |       types.each { |type| copy_documents_for_type(type, old_index, new_index) }
100 |     end
101 | 
102 |     def copy_documents_for_type(type, old_index, new_index)
103 |       return unless (doc_count = documents_count(old_index, type)) > 0
104 | 
105 |       puts "Migrating #{doc_count} documents from type '#{type}' in index '#{old_index}' to index '#{new_index}'"
106 | 
107 |       result         = client.search index: old_index, type: type, search_type: 'scan', scroll: '1m', size: bulk_size
108 |       alias_name     = new_index.split("_")[0..-2].join("_")
109 |       fields_filter  = fields_whilelist(alias_name, type)
110 | 
111 |       while (result = client.scroll(scroll_id: result['_scroll_id'], scroll: '1m')) && (docs = result['hits']['hits']).any?
112 |         body = docs.map do |document|
113 |                  bulk_item = { index: { _index: new_index, _type: type } }
114 |                  source    = document['_source'].deep_slice(*fields_filter)
115 |                  bulk_item[:index].update(_id: document['_id'], data: source)
116 |                  bulk_item
117 |                end
118 |         client.bulk(body: body)
119 |       end
120 |     end
121 | 
122 |     def fields_whilelist(alias_name, type)
123 |       mapping = schemas[alias_name].to_hash.values.first['mappings'][type]['properties']
124 |       extract_field_names(mapping).map { |f| f.include?('.') ? f.split('.') : f }
125 |     end
126 | 
127 |     def updatable_or_creatable_types(schema, index_name)
128 |       old_mappings = actual_schemas[index_name].values.first['mappings']
129 |       new_mappings = schema.index.mappings.to_hash['mappings']
130 | 
131 |       new_mappings.keys.select do |type|
132 |         old_fields         = old_mappings[type]['properties'] rescue {}
133 |         new_fields         = new_mappings[type]['properties']
134 |         old_mapping_fields = extract_field_names(old_fields)
135 |         new_mapping_fields = extract_field_names(new_fields)
136 | 
137 |         (new_mapping_fields - old_mapping_fields).any?
138 |       end
139 |     end
140 | 
141 |     def must_create_new_index?(schema, index)
142 |       has_diverging_settings?(schema, index) || has_conflicting_mappings?(schema, index)
143 |     end
144 | 
145 |     def has_conflicting_mappings?(schema, index)
146 |       old_mappings = actual_schemas[index].values.first['mappings']
147 |       new_mappings = schema.index.mappings.to_hash['mappings']
148 | 
149 |       old_mappings.each do |type, old_mapping|
150 |         old_fields = old_mapping['properties']
151 |         new_fields = new_mappings[type]['properties'] rescue nil
152 | 
153 |         next if new_fields.nil?
154 | 
155 |         old_mapping_fields = extract_field_names(old_fields)
156 |         new_mapping_fields = extract_field_names(new_fields)
157 |         shared_fields      = old_mapping_fields & new_mapping_fields
158 | 
159 |         return true if shared_fields != old_mapping_fields
160 | 
161 |         old_mapping_fields = old_mapping_fields.map do |full_name|
162 |           full_name = full_name.split('.').join('.properties.').split('.')
163 |           full_name.size == 1 ? full_name.first : full_name
164 |         end
165 |         new_mapping_fields = new_mapping_fields.map do |full_name|
166 |           full_name = full_name.split('.').join('.properties.').split('.')
167 |           full_name.size == 1 ? full_name.first : full_name
168 |         end
169 | 
170 |         return true if old_fields.deep_slice(*old_mapping_fields) != new_fields.deep_slice(*new_mapping_fields)
171 |       end
172 | 
173 |       return false
174 |     end
175 | 
176 |     # For now we're only comparing analysis settings
177 |     def has_diverging_settings?(schema, index)
178 |       old_settings = actual_schemas[index].values.first['settings']['index']['analysis'] rescue {}
179 |       new_settings = schema.index.settings.to_hash['settings']['index']['analysis'] rescue {}
180 |       new_settings != old_settings
181 |     end
182 | 
183 |     def extract_field_names(mapping, name = '')
184 |       mapping.inject([]) do |names, (key, value)|
185 |         full_name = name.empty? ? key : "#{name}.#{key}"
186 | 
187 |         if value.is_a?(Hash)
188 |           full_name = name if key == 'properties'
189 |           expanded_names = extract_field_names(value, full_name)
190 |         else
191 |           expanded_names = name
192 |         end
193 | 
194 |         names.concat(Array(expanded_names))
195 |       end.uniq.sort
196 |     end
197 | 
198 |     def documents_count(index, type)
199 |       client.count(index: index, type: type)['count']
200 |     end
201 | 
202 |     def indices_from_alias(alias_name)
203 |       client.indices.get_alias(name: alias_name)
204 |     end
205 | 
206 |     def delete_index(index)
207 |       puts "Deleting index '#{index}'"
208 |       client.indices.delete(index: index)
209 |     end
210 | 
211 |     def create_index(index, body)
212 |       puts "Creating index '#{index}'"
213 | 
214 |       types = body["mappings"].keys rescue []
215 |       types.each { |type| puts "Creating type '#{type}' in index '#{index}'" }
216 | 
217 |       client.indices.create(index: index, body: body)
218 |     end
219 | 
220 |     def alias_exists?(alias_name)
221 |       client.indices.exists_alias(name: alias_name)
222 |     end
223 | 
224 |     def index_exists?(index)
225 |       client.indices.exists(index: index)
226 |     end
227 | 
228 |     def put_mapping(index, type, mapping)
229 |       puts "Creating/updating type '#{type}' in index '#{index}'"
230 |       client.indices.put_mapping(index: index, type: type, body: mapping)
231 |     end
232 | 
233 |     def new_index_name(index)
234 |       "#{index}_v#{timestamp}"
235 |     end
236 | 
237 |     # Get all the index/type in ES that diverge from the definitions
238 |     def types_to_update
239 |       schemas.select do |index_name, schema|
240 |         current_schema              = fetch_index(index_name)
241 |         @actual_schemas[index_name] = current_schema
242 |         !has_same_index_structures?(current_schema.values.first || {}, schema.to_hash.values.first)
243 |       end
244 |     end
245 | 
246 |     def has_same_index_structures?(old_index_body, new_index_body)
247 |       old_index_body = old_index_body.deep_slice('mappings', %w(settings index analysis)) rescue {}
248 |       new_index_body = new_index_body.deep_slice('mappings', %w(settings index analysis)) rescue {}
249 |       old_index_body == new_index_body
250 |     end
251 | 
252 |     def fetch_index(index)
253 |       begin
254 |         client.indices.get(index: index)
255 |       rescue Elasticsearch::Transport::Transport::Errors::NotFound
256 |         {}
257 |       end
258 |     end
259 | 
260 |     def schemas
261 |       @schemas ||= ElasticSchema::Schema::Definition.definitions
262 |     end
263 |   end
264 | end
265 | 


--------------------------------------------------------------------------------