├── .gitignore ├── bin └── eschema ├── examples └── db │ └── es │ └── schema │ ├── default.analysis.rb │ └── articles.schema.rb ├── lib ├── elastic-schema │ ├── schema │ │ ├── type.rb │ │ ├── settings.rb │ │ ├── index.rb │ │ ├── fields_set.rb │ │ ├── mappings.rb │ │ ├── definition.rb │ │ ├── analysis.rb │ │ ├── field.rb │ │ └── migration.rb │ ├── command.rb │ └── cli.rb ├── elastic-schema.rb └── utils │ └── hash.rb ├── elastic-schema.gemspec ├── License.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | Gemfile.lock 3 | .bundle 4 | bundle 5 | -------------------------------------------------------------------------------- /bin/eschema: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'elastic-schema' 4 | ElasticSchema::CLI.new(ARGV).run! 5 | -------------------------------------------------------------------------------- /examples/db/es/schema/default.analysis.rb: -------------------------------------------------------------------------------- 1 | ElasticSchema::Schema::Analysis.new do 2 | name :default 3 | 4 | filter :word_filter, { type: :word_delimiter } 5 | analyzer :lowcase_word_delimiter, { 6 | type: :custom, 7 | tokenizer: :standard, 8 | filter: %i(lowercase asciifolding word_filter) 9 | } 10 | end 11 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/type.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | class Type 3 | attr_reader :name, :mappings, :fields 4 | 5 | def initialize(name, mappings, &block) 6 | @name = name 7 | @mappings = mappings 8 | instance_eval(&block) 9 | end 10 | 11 | def field(field_name, field_type = nil, opts = {}, &block) 12 | fields << Field.new(field_name, field_type, opts, &block) 13 | end 14 | 15 | def fields 16 | @fields ||= FieldsSet.new(self) 17 | end 18 | 19 | def parent 20 | mappings 21 | end 22 | 23 | def to_hash 24 | { name.to_s => fields.to_hash } 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/settings.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | class Settings 3 | attr_accessor :analysis 4 | attr_reader :index 5 | 6 | def initialize(index, opts = {}) 7 | @index = index 8 | opts.deep_stringify_keys! 9 | 10 | %w(analysis).each do |attr| 11 | send(:"#{attr}=", opts[attr]) if opts.has_key?(attr) 12 | end 13 | end 14 | 15 | def to_hash 16 | main_hash = {} 17 | 18 | if analysis && (analysis_hash = Analysis.analysis_for(analysis)).any? 19 | main_hash.update("analysis" => analysis_hash) 20 | end 21 | 22 | main_hash.any? ? { "settings" => { "index" => main_hash } } : {} 23 | end 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/index.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | class Index 3 | attr_reader :name, :definition, :mappings, :settings 4 | 5 | def initialize(name, definition) 6 | @name = name 7 | @definition = definition 8 | end 9 | 10 | def analysis(name) 11 | @settings ||= Settings.new(self, analysis: name) 12 | end 13 | 14 | def type(type_name, &block) 15 | @mappings ||= Mappings.new(self) 16 | mappings.type(type_name, &block) 17 | end 18 | 19 | def to_hash 20 | main_hash = {} 21 | main_hash.update(mappings.to_hash) if mappings 22 | main_hash.update(settings.to_hash) if settings && settings.to_hash.any? 23 | { name.to_s => main_hash } 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/elastic-schema.rb: -------------------------------------------------------------------------------- 1 | require 'elasticsearch' 2 | require 'utils/hash' 3 | require 'active_support/core_ext/hash' 4 | 5 | module ElasticSchema 6 | autoload :CLI, 'elastic-schema/cli' 7 | autoload :Command, 'elastic-schema/command' 8 | 9 | module Schema 10 | autoload :Definition, 'elastic-schema/schema/definition' 11 | autoload :Migration, 'elastic-schema/schema/migration' 12 | autoload :Field, 'elastic-schema/schema/field' 13 | autoload :FieldsSet, 'elastic-schema/schema/fields_set' 14 | autoload :Mappings, 'elastic-schema/schema/mappings' 15 | autoload :Type, 'elastic-schema/schema/type' 16 | autoload :Settings, 'elastic-schema/schema/settings' 17 | autoload :Analysis, 'elastic-schema/schema/analysis' 18 | autoload :Index, 'elastic-schema/schema/index' 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/fields_set.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | 3 | class FieldsSet 4 | 5 | FieldAlreadyDefined = Class.new(StandardError) 6 | 7 | attr_reader :fields, :parent 8 | 9 | def initialize(parent) 10 | @fields = [] 11 | @parent = parent 12 | end 13 | 14 | def << field 15 | field.parent = self 16 | fail FieldAlreadyDefined.new("'#{field.full_name}' already exists.") if find(field.name) 17 | fields << field 18 | end 19 | 20 | def find(field_name) 21 | fields.bsearch { |field| field.name == field_name } 22 | end 23 | 24 | def full_name 25 | parent.full_name 26 | end 27 | 28 | def to_hash 29 | return {} if fields.empty? 30 | { 'properties' => fields.inject({}) { |_fields, field| _fields.update(field.to_hash) } } 31 | end 32 | end 33 | 34 | end 35 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/mappings.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | 3 | class Mappings 4 | TypeAlreadyDefined = Class.new(StandardError) 5 | 6 | attr_reader :index, :types 7 | 8 | def initialize(index) 9 | @index = index 10 | @types = {} 11 | end 12 | 13 | def type(name, &block) 14 | name = name.to_s 15 | 16 | if types.has_key?(name) 17 | fail TypeAlreadyDefined.new("There is already a schema defined for type '#{name}' in index '#{name}'.") 18 | end 19 | 20 | @types[name] = Type.new(name, self, &block) 21 | end 22 | 23 | def parent 24 | index 25 | end 26 | 27 | def full_name 28 | parent.name 29 | end 30 | 31 | def to_hash 32 | types_hash = types.inject({}) { |_types_hash, (_, type)| _types_hash.update(type.to_hash) } 33 | { "mappings" => types_hash } 34 | end 35 | end 36 | end 37 | -------------------------------------------------------------------------------- /examples/db/es/schema/articles.schema.rb: -------------------------------------------------------------------------------- 1 | ElasticSchema::Schema::Definition.new do 2 | index :articles 3 | analysis :default 4 | 5 | type :article do 6 | field :title, :string, analyzer: :lowcase_word_delimiter 7 | field :content, :string, analyzer: :lowcase_word_delimiter 8 | field :author do 9 | field :name do 10 | field :first_name, :string 11 | field :last_name, :string 12 | end 13 | field :email, :string, index: :not_analyzed 14 | end 15 | field :indexed_at, :date, index: :not_analyzed 16 | end 17 | 18 | type :comment do 19 | field :article_id, :integer 20 | field :content, :string, analyzer: :lowcase_word_delimiter 21 | field :author do 22 | field :name do 23 | field :first_name, :string 24 | field :last_name, :string 25 | end 26 | field :email, :string, index: :not_analyzed 27 | end 28 | field :indexed_at, :date, index: :not_analyzed 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /elastic-schema.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | $LOAD_PATH.unshift File.expand_path("../lib", __FILE__) 4 | 5 | Gem::Specification.new do |s| 6 | s.name = "elastic-schema" 7 | s.version = '0.2.9' 8 | s.platform = Gem::Platform::RUBY 9 | s.license = "MIT" 10 | s.authors = ["Leandro Camargo"] 11 | s.email = "leandroico@gmail.com" 12 | s.homepage = "http://github.com/leandro/elastic-schema" 13 | s.summary = "Elasticsearch schema manager for Ruby" 14 | s.description = "A stateful way to approach Elasticsearch document mappings and data migrations" 15 | 16 | s.required_ruby_version = '>= 2.0' 17 | 18 | s.add_dependency 'elasticsearch-api' 19 | s.add_dependency 'activesupport' 20 | 21 | s.files = `git ls-files -- lib/*`.split("\n") 22 | s.files += ["License.txt"] 23 | s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) } 24 | s.extra_rdoc_files = [ "README.md" ] 25 | s.rdoc_options = ["--charset=UTF-8"] 26 | s.require_path = "lib" 27 | end 28 | -------------------------------------------------------------------------------- /License.txt: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2015 Leandro Camargo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/definition.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | 3 | class Definition 4 | 5 | SchemaConflict = Class.new(StandardError) 6 | NoIndexDefined = Class.new(StandardError) 7 | 8 | @@definitions = {} 9 | 10 | def initialize(&block) 11 | instance_eval(&block) 12 | 13 | if @@definitions[definition_id] 14 | fail SchemaConflict.new("There is already a schema definition for #{definition_id}") 15 | end 16 | 17 | @@definitions[definition_id] = self 18 | end 19 | 20 | def analysis(name) 21 | fail NoIndexDefined.new("There is not index defined yet.") if index.nil? 22 | index.analysis(name) 23 | end 24 | 25 | def type(name, &block) 26 | fail NoIndexDefined.new("There is not index defined yet.") if index.nil? 27 | index.type(name, &block) 28 | end 29 | 30 | def index(name = nil) 31 | return @index if name.nil? 32 | @index = Index.new(name, self) 33 | end 34 | 35 | def to_hash 36 | index.to_hash 37 | end 38 | 39 | def self.definitions 40 | @@definitions 41 | end 42 | 43 | private 44 | 45 | def definition_id 46 | index.name.to_s 47 | end 48 | end 49 | 50 | end 51 | -------------------------------------------------------------------------------- /lib/utils/hash.rb: -------------------------------------------------------------------------------- 1 | class Hash 2 | def deep_slice(*keys) 3 | keys.inject({}) do |new_hash, key| 4 | if key.is_a?(Array) 5 | inner_hash = new_hash.include?(key.first) ? new_hash[key.first] : {} 6 | inner_keys = key[1..-1] 7 | inner_keys = inner_keys.first if inner_keys.size == 1 8 | next new_hash unless self[key.first].is_a?(Hash) 9 | inner_hash.deep_merge!(self[key.first].deep_slice(inner_keys)) 10 | new_hash.update(key.first => inner_hash) 11 | else 12 | next new_hash unless self.has_key?(key) 13 | new_hash.update(key => self[key]) 14 | end 15 | end 16 | end 17 | 18 | def deep_transform_values(&block) 19 | return enum_for(:deep_transform_values) unless block_given? 20 | 21 | inject(self.class.new) do |memo, (key, value)| 22 | value = value.is_a?(Hash) ? value.deep_transform_values(&block) : yield(value) 23 | memo.update(key => value) 24 | end 25 | end 26 | 27 | def deep_transform_values!(&block) 28 | return enum_for(:deep_transform_values) unless block_given? 29 | 30 | inject(self) do |memo, (key, value)| 31 | value = value.is_a?(Hash) ? value.deep_transform_values(&block) : yield(value) 32 | memo.update(key => value) 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/analysis.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | class Analysis 3 | @@filters = {} 4 | @@analyzers = {} 5 | 6 | def initialize(&block) 7 | instance_eval(&block) 8 | end 9 | 10 | def name(name) 11 | @name = name.to_s 12 | end 13 | 14 | def filter(name, opts = {}) 15 | set_name = @name || 'global' 16 | @@filters[set_name] ||= {} 17 | @@filters[set_name][name.to_s] = stringfy_symbols(opts) 18 | end 19 | 20 | def analyzer(name, opts) 21 | set_name = @name || 'global' 22 | @@analyzers[set_name] ||= {} 23 | @@analyzers[set_name][name.to_s] = stringfy_symbols(opts) 24 | end 25 | 26 | def self.analysis_for(name = nil) 27 | name = name ? name.to_s : 'global' 28 | analysis_hash = {} 29 | 30 | analysis_hash.update("filter" => @@filters[name]) if @@filters.has_key?(name) 31 | analysis_hash.update("analyzer" => @@analyzers[name]) if @@analyzers.has_key?(name) 32 | analysis_hash 33 | end 34 | 35 | private 36 | 37 | def stringfy_symbols(hash) 38 | hash.inject({}) do |_hash, (key, value)| 39 | value = value.is_a?(Array) ? value.map { |item| item.to_s } : value.to_s 40 | _hash.update(key.to_s => value) 41 | end 42 | end 43 | end 44 | end 45 | -------------------------------------------------------------------------------- /lib/elastic-schema/command.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema 2 | 3 | class Command 4 | 5 | attr_reader :client, :root, :schema_dir, :options, :schema_file, :analysis_file, 6 | :bulk_size 7 | 8 | def initialize(options) 9 | @options = options 10 | @client = Elasticsearch::Client.new(host: options[:host]) 11 | @root = File.expand_path(options[:root]) 12 | @schema_dir = File.join(@root, options[:schema_dir]) if options[:schema_dir] 13 | @schema_file = File.join(@root, options[:schema_file]) if options[:schema_file] 14 | @analysis_file = File.join(@root, options[:analysis_file]) if options[:analysis_file] 15 | @bulk_size = options[:bulk_size] 16 | end 17 | 18 | def run(command) 19 | send(command) 20 | end 21 | 22 | private 23 | 24 | # Creates the indices/types and raise an exception if the any of the indices/types already exists 25 | def create 26 | opts = { client: client, analysis_files: analysis_files, schema_files: schema_files } 27 | opts.update(bulk_size: bulk_size) if bulk_size 28 | Schema::Migration.new(opts).load_definitions.run 29 | end 30 | 31 | def schema_files 32 | (schema_dir ? Dir[schema_pattern] : [schema_file]).compact 33 | end 34 | 35 | def analysis_files 36 | (schema_dir ? Dir[analysis_pattern] : [analysis_file]).compact 37 | end 38 | 39 | def schema_pattern 40 | File.join(schema_dir, '*.schema.rb') 41 | end 42 | 43 | def analysis_pattern 44 | File.join(schema_dir, '{analysis.rb,*.analysis.rb}') 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/field.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | 3 | class Field 4 | attr_accessor :parent 5 | attr_reader :name, :type, :children, :attributes 6 | 7 | def initialize(field_name, field_type = nil, attrs, &block) 8 | @name = field_name.to_s 9 | @parent = attrs.delete(:parent) 10 | @children = FieldsSet.new(self) 11 | @attributes = normalize_attributes(attrs) 12 | field_type = (block_given? ? 'object' : 'string') if field_type.nil? 13 | @type = field_type.to_s 14 | 15 | filter_attributes_for_special_cases 16 | instance_eval(&block) if block_given? 17 | end 18 | 19 | def field(field_name, field_type = nil, opts = {}, &block) 20 | children << Field.new(field_name, field_type, opts, &block) 21 | end 22 | 23 | def full_name 24 | "#{parent.full_name}.#{name}" 25 | end 26 | 27 | def to_hash 28 | attrs = type == 'object' ? {} : { 'type' => type } 29 | { name => attrs.merge(attributes).merge(children.to_hash) } 30 | end 31 | 32 | private 33 | 34 | def normalize_attributes(attrs) 35 | value_converter = ->(v) { [TrueClass, FalseClass, NilClass].include?(v.class) ? v : v.to_s } 36 | attrs.deep_stringify_keys.deep_transform_values(&value_converter) 37 | end 38 | 39 | def filter_attributes_for_special_cases 40 | case type 41 | when 'date' 42 | attributes.update('format' => 'dateOptionalTime') if type == 'date' 43 | attributes.delete('index') 44 | when *%w(integer long float double boolean null) 45 | attributes.delete('index') 46 | attributes.delete('analyzer') 47 | when 'attachment' 48 | @attributes = default_attachment_attributes.deep_merge(attributes) 49 | end 50 | end 51 | 52 | def default_attachment_attributes 53 | { 54 | "fields" => { 55 | "file" => { "type" => "string" }, 56 | "author" => { "type" => "string" }, 57 | "title" => { "type" => "string" }, 58 | "name" => { "type" => "string" }, 59 | "date" => { "type" => "date", "format" => "dateOptionalTime" }, 60 | "keywords" => { "type" => "string" }, 61 | "content_type" => { "type" => "string" }, 62 | "content_length" => { "type" => "integer" }, 63 | "language" => { "type" => "string" } 64 | } 65 | } 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/elastic-schema/cli.rb: -------------------------------------------------------------------------------- 1 | require 'optparse' 2 | 3 | module ElasticSchema 4 | 5 | class CLI 6 | 7 | COMMANDS = %w(update check create drop recreate) 8 | 9 | def self.commands 10 | COMMANDS 11 | end 12 | 13 | def initialize(argv) 14 | @argv = argv 15 | 16 | # Default options values 17 | @options = { 18 | root: Dir.pwd, 19 | host: '127.0.0.1:9200' 20 | } 21 | 22 | parse! 23 | end 24 | 25 | def parser 26 | @parser ||= OptionParser.new do |opts| 27 | opts.banner = "Usage: eschema [options] #{self.class.commands.join('|')}" 28 | 29 | opts.separator "" 30 | opts.separator "Setting options:" 31 | 32 | opts.on("-a", "--analysis_file FILE", 33 | "define the analysis file to be used (overwritten by -d)") { |analysis_file| @options[:analysis_file] = analysis_file } 34 | 35 | opts.on("-b", "--bulk_size SIZE", 36 | "set documents bulk size for each bulk iteration performed during documents migration (default: 1000)") { |bulk_size| @options[:bulk_size] = bulk_size } 37 | 38 | opts.on("-d", "--schema_dir DIR", 39 | "set directory where schema and analysis files are") { |schema_dir| @options[:schema_dir] = schema_dir } 40 | 41 | opts.on("-f", "--schema_file FILE", 42 | "define one fingle schema file to be used (overwritten by -d)") { |schema_file| @options[:schema_file] = schema_file } 43 | 44 | opts.on("-h", "--host HOST", 45 | "set address:port to connect to Elasticsearch (default: #{@options[:host]})") { |host| @options[:host] = host } 46 | 47 | opts.on("-r", "--root PATH", 48 | "set app root directory (default: #{@options[:root]})") { |root| @options[:root] = root } 49 | 50 | end 51 | end 52 | 53 | # Parse the options. 54 | def parse! 55 | parser.parse! @argv 56 | @command = @argv.shift 57 | @arguments = @argv 58 | end 59 | 60 | # Parse the current shell arguments and run the command. 61 | # Exits on error. 62 | def run! 63 | if self.class.commands.include?(@command) 64 | run_command 65 | elsif @command.nil? 66 | puts "Command required" 67 | puts @parser 68 | exit 1 69 | else 70 | abort "Unknown command: #{@command}. Available commands: #{self.class.commands.join(', ')}" 71 | end 72 | end 73 | 74 | def run_command 75 | Command.new(@options).run(@command) 76 | end 77 | end 78 | end 79 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | A declarative way to approach Elasticsearch document mappings and data migrations/reindexing. 4 | 5 | The idea is to provide an easy and versionable way to register the mappings of your Elasticsearch indices and types. 6 | Once any of the mappings and/or settings suffers any change by a developer, this tool kit will provide you means to keep your running elastic search server up-to-date regarding the recent changes. 7 | 8 | The default strategy adopted by this tool is to create a new index with temporary name in order to create a whole new mapping that reflects the up-to-date mapping in the codebase. Once it's done it'll try (by default) to reindex all the data present in old index to the new one and once it's done it'll remove the old index and rename the new one. 9 | 10 | ## Usage 11 | 12 | Go to your Ruby project where you Gemfile is located: 13 | 14 | ```shell 15 | $ cd ~/projects/my-ruby-project 16 | $ vim Gemfile 17 | ``` 18 | 19 | Add the following line to your Gemfile 20 | 21 | ```ruby 22 | gem "elastic-schema", :git => "git://github.com/leandro/elastic-schema.git" 23 | ``` 24 | 25 | Choose a directory where you're going to put your Elasticsearch schemas. Or create one for yourself: 26 | 27 | ```shell 28 | $ mkdir -p db/es/ 29 | ``` 30 | 31 | In order to see a working example, create the following file in the given your chosen directory: 32 | 33 | ```shell 34 | vim ./db/es/default.analysis.rb 35 | ``` 36 | 37 | ```ruby 38 | ElasticSchema::Schema::Analysis.new do 39 | name :default 40 | 41 | filter :word_filter, { type: :word_delimiter } 42 | analyzer :lowcase_word_delimiter, { 43 | type: :custom, 44 | tokenizer: :standard, 45 | filter: %i(lowercase asciifolding word_filter) 46 | } 47 | end 48 | ``` 49 | 50 | And also: 51 | 52 | ```shell 53 | vim ./db/es/articles.schema.rb 54 | ``` 55 | 56 | ```ruby 57 | ElasticSchema::Schema::Definition.new do 58 | index :articles 59 | analysis :default 60 | 61 | type :article do 62 | field :title, :string, analyzer: :lowcase_word_delimiter 63 | field :content, :string, analyzer: :lowcase_word_delimiter 64 | field :author do 65 | field :name do 66 | field :first_name, :string 67 | field :last_name, :string 68 | end 69 | field :email, :string, index: :not_analyzed 70 | end 71 | field :indexed_at, :date, index: :not_analyzed 72 | end 73 | 74 | type :comment do 75 | field :article_id, :integer 76 | field :content, :string, analyzer: :lowcase_word_delimiter 77 | field :author do 78 | field :name do 79 | field :first_name, :string 80 | field :last_name, :string 81 | end 82 | field :email, :string, index: :not_analyzed 83 | end 84 | field :indexed_at, :date, index: :not_analyzed 85 | end 86 | end 87 | ``` 88 | 89 | Then, run bundle install in your app root directory and run: 90 | 91 | ```shell 92 | $ bundle exec eschema -h 127.0.0.1:9200 -d db/es/ create 93 | Initiating schema updates: 1 out of 1 will be updated. 94 | Creating index 'articles_v1436452769' 95 | Creating type 'article' in index 'articles_v1436452769' 96 | Creating type 'comment' in index 'articles_v1436452769' 97 | Creating alias 'articles' to index 'articles_v1436452769' 98 | ``` 99 | 100 | And lets say you have some documents inside the index later on: 101 | 102 | ```shell 103 | curl -XPUT http://127.0.0.1:9200/articles/article/1 -d '{"title": "Article A", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}' 104 | curl -XPUT http://127.0.0.1:9200/articles/article/2 -d '{"title": "Article B", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}' 105 | curl -XPUT http://127.0.0.1:9200/articles/article/3 -d '{"title": "Article C", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}' 106 | curl -XPUT http://127.0.0.1:9200/articles/comment/1 -d '{"article_id": 1, "content": "First comment.", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}' 107 | curl -XPUT http://127.0.0.1:9200/articles/comment/2 -d '{"article_id": 1, "content": "Second comment.", "author": {"name": {"first_name": "Leandro", "last_name": "Camargo"}}, "indexed_at": "2015-07-08"}' 108 | ``` 109 | 110 | Now, for instance, you change the analyzer for the 'content' field in your 'comment' type schema: 111 | 112 | ```ruby 113 | # ... 114 | field :content, :string, analyzer: :snowball 115 | # ... 116 | ``` 117 | 118 | And then runs again the command. And you'll have this nice output: 119 | 120 | ```shell 121 | $ bundle exec eschema -h 127.0.0.1:9200 -d db/es/ create 122 | Initiating schema updates: 1 out of 1 will be updated. 123 | Creating index 'articles_v1436453128' 124 | Creating type 'article' in index 'articles_v1436453128' 125 | Creating type 'comment' in index 'articles_v1436453128' 126 | Migrating 3 documents from type 'article' in index 'articles' to index 'articles_v1436453128' 127 | Migrating 2 documents from type 'comment' in index 'articles' to index 'articles_v1436453128' 128 | Creating alias 'articles' to index 'articles_v1436453128' 129 | Deleting index 'articles_v1436452769' 130 | ``` 131 | 132 | In case you want to specify one schema file at a time you can use `-f` (for schema file) and `-a` (for analysis file) instead of using `-d`. For further information just run: 133 | 134 | ```shell 135 | bundle exec eschema --help 136 | ``` 137 | 138 | ## Important observations 139 | 140 | * All index schema you create, its file name must match the '*.schema.rb' pattern. 141 | * The same goes for analysis settings, where it must match the '*.analysis.rb' or just naming it as 'analysis.rb' will also work. 142 | * If you have indices with multiple types in it, make sure your index schema definition has **all the types definitions** in it, otherwise the missing types will be lost during the migration, given in most cases a new index will be created and the old one will be deleted. 143 | 144 | ## Missing parts 145 | 146 | * Allow to make old index deletion to be something optional 147 | * Handle multi-tenant indices arrangements 148 | * Add tests 149 | 150 | ## Contribute 151 | 152 | If you want to contribute, please fork this project, make the changes and create a Pull Request mentioning me. 153 | -------------------------------------------------------------------------------- /lib/elastic-schema/schema/migration.rb: -------------------------------------------------------------------------------- 1 | module ElasticSchema::Schema 2 | 3 | class Migration 4 | 5 | BULK_SIZE = 1000 6 | 7 | attr_reader :schema_files, :client, :actual_schemas, :timestamp, :analysis_files, 8 | :bulk_size 9 | 10 | def initialize(options) 11 | @client = options[:client] 12 | @analysis_files = options[:analysis_files] 13 | @schema_files = options[:schema_files] 14 | @bulk_size = (options[:bulk_size] || BULK_SIZE).to_i 15 | @actual_schemas = {} 16 | @timestamp = Time.new.to_i 17 | end 18 | 19 | def load_definitions 20 | analysis_files.each { |schema_file| require schema_file } 21 | schema_files.each { |schema_file| require schema_file } 22 | self 23 | end 24 | 25 | def run 26 | schemas_to_update = types_to_update 27 | total_schemas = schemas.size 28 | needs_update = schemas_to_update.size 29 | 30 | if total_schemas > 0 31 | if needs_update < 1 32 | puts "Woo-hoo! Everything is already up-to-date!" 33 | else 34 | puts "Initiating schema updates: #{needs_update} out of #{total_schemas} will be updated." 35 | end 36 | else 37 | puts "There are no schemas to be processed in the provided directory." 38 | end 39 | 40 | create_or_update_indices(schemas_to_update) 41 | end 42 | 43 | private 44 | 45 | def create_or_update_indices(selected_schemas) 46 | selected_schemas.each do |index_name, schema| 47 | index_body = schema.index.to_hash.values.first 48 | 49 | if index_exists?(index_name) 50 | if must_create_new_index?(schema, index_name) 51 | migrate_data(index_name, index_body) 52 | else 53 | types = updatable_or_creatable_types(schema, index_name) 54 | create_or_update_types(schema, types) 55 | end 56 | else 57 | new_index = new_index_name(index_name) 58 | create_index(new_index, index_body) 59 | alias_index(new_index, index_name) 60 | end 61 | end 62 | end 63 | 64 | def create_or_update_types(schema, types) 65 | mappings = schema.index.mappings.to_hash['mappings'] 66 | 67 | types.each do |type| 68 | mapping = mappings[type] 69 | put_mapping(schema.index.name, type, { type => mapping }) 70 | end 71 | end 72 | 73 | # Migrates data from index/type to a new index/type and create an alias to it 74 | def migrate_data(index_name, index_body) 75 | new_index = new_index_name(index_name) 76 | create_index(new_index, index_body) 77 | copy_all_documents_between_indices(index_name, new_index) 78 | delete_index_with_same_name_as_alias(index_name) 79 | alias_index(new_index, index_name) 80 | delete_older_indices(index_name) 81 | end 82 | 83 | def alias_index(index, alias_name) 84 | puts "Creating alias '#{alias_name}' to index '#{index}'" 85 | client.indices.put_alias(index: index, name: alias_name) 86 | end 87 | 88 | def delete_older_indices(alias_name) 89 | older_indices = indices_from_alias(alias_name).keys - [new_index_name(alias_name)] 90 | older_indices.each { |index| delete_index(index) if index_exists?(index) } 91 | end 92 | 93 | def delete_index_with_same_name_as_alias(alias_name) 94 | delete_index(alias_name) if !alias_exists?(alias_name) && index_exists?(alias_name) 95 | end 96 | 97 | def copy_all_documents_between_indices(old_index, new_index) 98 | types = actual_schemas[old_index].values.first['mappings'].keys 99 | types.each { |type| copy_documents_for_type(type, old_index, new_index) } 100 | end 101 | 102 | def copy_documents_for_type(type, old_index, new_index) 103 | return unless (doc_count = documents_count(old_index, type)) > 0 104 | 105 | puts "Migrating #{doc_count} documents from type '#{type}' in index '#{old_index}' to index '#{new_index}'" 106 | 107 | result = client.search index: old_index, type: type, search_type: 'scan', scroll: '1m', size: bulk_size 108 | alias_name = new_index.split("_")[0..-2].join("_") 109 | fields_filter = fields_whilelist(alias_name, type) 110 | 111 | while (result = client.scroll(scroll_id: result['_scroll_id'], scroll: '1m')) && (docs = result['hits']['hits']).any? 112 | body = docs.map do |document| 113 | bulk_item = { index: { _index: new_index, _type: type } } 114 | source = document['_source'].deep_slice(*fields_filter) 115 | bulk_item[:index].update(_id: document['_id'], data: source) 116 | bulk_item 117 | end 118 | client.bulk(body: body) 119 | end 120 | end 121 | 122 | def fields_whilelist(alias_name, type) 123 | mapping = schemas[alias_name].to_hash.values.first['mappings'][type]['properties'] 124 | extract_field_names(mapping).map { |f| f.include?('.') ? f.split('.') : f } 125 | end 126 | 127 | def updatable_or_creatable_types(schema, index_name) 128 | old_mappings = actual_schemas[index_name].values.first['mappings'] 129 | new_mappings = schema.index.mappings.to_hash['mappings'] 130 | 131 | new_mappings.keys.select do |type| 132 | old_fields = old_mappings[type]['properties'] rescue {} 133 | new_fields = new_mappings[type]['properties'] 134 | old_mapping_fields = extract_field_names(old_fields) 135 | new_mapping_fields = extract_field_names(new_fields) 136 | 137 | (new_mapping_fields - old_mapping_fields).any? 138 | end 139 | end 140 | 141 | def must_create_new_index?(schema, index) 142 | has_diverging_settings?(schema, index) || has_conflicting_mappings?(schema, index) 143 | end 144 | 145 | def has_conflicting_mappings?(schema, index) 146 | old_mappings = actual_schemas[index].values.first['mappings'] 147 | new_mappings = schema.index.mappings.to_hash['mappings'] 148 | 149 | old_mappings.each do |type, old_mapping| 150 | old_fields = old_mapping['properties'] 151 | new_fields = new_mappings[type]['properties'] rescue nil 152 | 153 | next if new_fields.nil? 154 | 155 | old_mapping_fields = extract_field_names(old_fields) 156 | new_mapping_fields = extract_field_names(new_fields) 157 | shared_fields = old_mapping_fields & new_mapping_fields 158 | 159 | return true if shared_fields != old_mapping_fields 160 | 161 | old_mapping_fields = old_mapping_fields.map do |full_name| 162 | full_name = full_name.split('.').join('.properties.').split('.') 163 | full_name.size == 1 ? full_name.first : full_name 164 | end 165 | new_mapping_fields = new_mapping_fields.map do |full_name| 166 | full_name = full_name.split('.').join('.properties.').split('.') 167 | full_name.size == 1 ? full_name.first : full_name 168 | end 169 | 170 | return true if old_fields.deep_slice(*old_mapping_fields) != new_fields.deep_slice(*new_mapping_fields) 171 | end 172 | 173 | return false 174 | end 175 | 176 | # For now we're only comparing analysis settings 177 | def has_diverging_settings?(schema, index) 178 | old_settings = actual_schemas[index].values.first['settings']['index']['analysis'] rescue {} 179 | new_settings = schema.index.settings.to_hash['settings']['index']['analysis'] rescue {} 180 | new_settings != old_settings 181 | end 182 | 183 | def extract_field_names(mapping, name = '') 184 | mapping.inject([]) do |names, (key, value)| 185 | full_name = name.empty? ? key : "#{name}.#{key}" 186 | 187 | if value.is_a?(Hash) 188 | full_name = name if key == 'properties' 189 | expanded_names = extract_field_names(value, full_name) 190 | else 191 | expanded_names = name 192 | end 193 | 194 | names.concat(Array(expanded_names)) 195 | end.uniq.sort 196 | end 197 | 198 | def documents_count(index, type) 199 | client.count(index: index, type: type)['count'] 200 | end 201 | 202 | def indices_from_alias(alias_name) 203 | client.indices.get_alias(name: alias_name) 204 | end 205 | 206 | def delete_index(index) 207 | puts "Deleting index '#{index}'" 208 | client.indices.delete(index: index) 209 | end 210 | 211 | def create_index(index, body) 212 | puts "Creating index '#{index}'" 213 | 214 | types = body["mappings"].keys rescue [] 215 | types.each { |type| puts "Creating type '#{type}' in index '#{index}'" } 216 | 217 | client.indices.create(index: index, body: body) 218 | end 219 | 220 | def alias_exists?(alias_name) 221 | client.indices.exists_alias(name: alias_name) 222 | end 223 | 224 | def index_exists?(index) 225 | client.indices.exists(index: index) 226 | end 227 | 228 | def put_mapping(index, type, mapping) 229 | puts "Creating/updating type '#{type}' in index '#{index}'" 230 | client.indices.put_mapping(index: index, type: type, body: mapping) 231 | end 232 | 233 | def new_index_name(index) 234 | "#{index}_v#{timestamp}" 235 | end 236 | 237 | # Get all the index/type in ES that diverge from the definitions 238 | def types_to_update 239 | schemas.select do |index_name, schema| 240 | current_schema = fetch_index(index_name) 241 | @actual_schemas[index_name] = current_schema 242 | !has_same_index_structures?(current_schema.values.first || {}, schema.to_hash.values.first) 243 | end 244 | end 245 | 246 | def has_same_index_structures?(old_index_body, new_index_body) 247 | old_index_body = old_index_body.deep_slice('mappings', %w(settings index analysis)) rescue {} 248 | new_index_body = new_index_body.deep_slice('mappings', %w(settings index analysis)) rescue {} 249 | old_index_body == new_index_body 250 | end 251 | 252 | def fetch_index(index) 253 | begin 254 | client.indices.get(index: index) 255 | rescue Elasticsearch::Transport::Transport::Errors::NotFound 256 | {} 257 | end 258 | end 259 | 260 | def schemas 261 | @schemas ||= ElasticSchema::Schema::Definition.definitions 262 | end 263 | end 264 | end 265 | --------------------------------------------------------------------------------