├── .gitignore ├── .rspec ├── .rubocop.yml ├── .travis.yml ├── Gemfile ├── Gemfile.ci ├── Gemfile.dev ├── LICENSE.txt ├── README.md ├── Rakefile ├── _config.yml ├── lib └── sequel │ └── plugins │ ├── elasticsearch.rb │ └── elasticsearch │ ├── result.rb │ └── version.rb ├── sequel-elasticsearch.gemspec └── spec ├── sequel └── plugins │ ├── elasticsearch │ └── result_spec.rb │ └── elasticsearch_spec.rb ├── spec_helper.rb └── support └── scroll_one.json /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.*.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | /vendor 11 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format documentation 2 | --color 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | Metrics/LineLength: 2 | Max: 120 3 | Style/Documentation: 4 | Enabled: false 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | dist: xenial 4 | services: 5 | - elasticsearch 6 | language: ruby 7 | rvm: 8 | - 2.4 9 | - 2.5 10 | - 2.6 11 | - 2.7 12 | gemfile: Gemfile.ci 13 | env: 14 | global: 15 | - CC_TEST_REPORTER_ID=f35fd15664b071c621a239733eb3b063caf333ac7e795d3a20690942f64caf62 16 | before_install: 17 | - gem install bundler 18 | before_script: 19 | - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter 20 | - chmod +x ./cc-test-reporter 21 | - "./cc-test-reporter before-build" 22 | - export TZ=Africa/Johannesburg 23 | - sleep 10 24 | after_script: 25 | - "./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT" 26 | deploy: 27 | provider: rubygems 28 | gem: sequel-elasticsearch 29 | on: 30 | tags: true 31 | repo: jrgns/sequel-elasticsearch 32 | api_key: 33 | secure: xP8zLfBbDTTUpJP8DUL9llfGIGwuTkifmySfoTo7YHOoEC9hFhviT2+PdeoTLywjAIsIwjvghzB2rBERcAPkbu1ahVow9+/0n3jV4UmgRNm2bFcAlIEb8j2gSUlK/ETrIXRBUTUnMg60qhaxIIPd5VQY4eOv2EbmujUqfJgN6xJTTQkiuN4G/UK6Cbhi9d1YDcviY/un9XKIRZQuUvkHQ9vBpK2xJEmisymg0ljK/uj9/dAjKUtyZ7c3QKjcTcQeZ/YKxkKuwaTO5TyHTfKkzUS6M9wS280zbHj3KS98xYX1NTGqDTlq1YPPKAtedhh1+xiCoT50BcIFHmOAymTfvMeGrRQp4fxGOaLMV3q9hP91PAY/Eul3B4gJrAulDahc5xy4TECvM+GdIitNHWpXchH3iE70vWybjFFOTRrf4ftjh20BbeaGJG2FVLD4haC8Y73B6Q8Yf2/qu9Wc+vyXkCXRwiGBl+x1N1gmRoF2w5jyMfEnAJ/7wN4sTcLRvN8PViOKbZ9cNyjgsdwoJoqcH045HJhyRALl5pXeoTANzyPw7Dm2yGyzw2Rnv3TqJD4eeJr8dQyQgR83P7yl/GDrG493QY0Mkl8iMEmS/GFTX6L4p7qqiJqDYBtIuW4/XET2Ui7HrV/5midzTiC5nGNZdHkQkLNMlkIvaSpROXV058I= 34 | after_success: 35 | - bundle exec codeclimate-test-reporter 36 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | # Specify your gem's dependencies in sequel-elasticsearch.gemspec 6 | gemspec 7 | -------------------------------------------------------------------------------- /Gemfile.ci: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in proxes.gemspec 4 | gemspec 5 | 6 | gem 'sqlite3' 7 | gem 'codeclimate' 8 | -------------------------------------------------------------------------------- /Gemfile.dev: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gem 'elasticsearch' 4 | gem 'rake' 5 | gem 'rspec' 6 | gem 'rubocop' 7 | gem 'sequel' 8 | gem 'simplecov' 9 | gem 'sqlite3' 10 | gem 'webmock' 11 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2018 Jurgens du Toit 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sequel::Elasticsearch 2 | 3 | Sequel::Elasticsearch allows you to transparently mirror your database, or specific tables, to Elasticsearch. It's especially useful if you want the power of search through Elasticsearch, but keep the sanity and structure of a relational database. 4 | 5 | [![Build Status](https://travis-ci.org/jrgns/sequel-elasticsearch.svg?branch=master)](https://travis-ci.org/jrgns/sequel-elasticsearch) 6 | [![Maintainability](https://api.codeclimate.com/v1/badges/ff453fe81303a2fa7c02/maintainability)](https://codeclimate.com/github/jrgns/sequel-elasticsearch/maintainability) 7 | [![Test Coverage](https://api.codeclimate.com/v1/badges/ff453fe81303a2fa7c02/test_coverage)](https://codeclimate.com/github/jrgns/sequel-elasticsearch/test_coverage) 8 | 9 | ## Installation 10 | 11 | Add this line to your application's Gemfile: 12 | 13 | ```ruby 14 | gem 'sequel-elasticsearch' 15 | ``` 16 | 17 | And then execute: 18 | 19 | $ bundle 20 | 21 | Or install it yourself as: 22 | 23 | $ gem install sequel-elasticsearch 24 | 25 | ## Usage 26 | 27 | Require the gem with: 28 | 29 | ```ruby 30 | require 'sequel/plugins/elasticsearch' 31 | ``` 32 | 33 | You'll need an Elasticsearch cluster to sync your data to. By default the gem will try to connect to `http://localhost:9200`. Set the `ELASTICSEARCH_URL` ENV variable to the URL of your cluster. 34 | 35 | This is a Sequel plugin, so you can enable it DB wide: 36 | 37 | ```ruby 38 | Sequel::Model.plugin :elasticsearch 39 | 40 | ``` 41 | 42 | Or per model: 43 | 44 | ```ruby 45 | Document.plugin Sequel::Elasticsearch 46 | 47 | # or 48 | 49 | class Document < Sequel::Model 50 | plugin :elasticsearch 51 | end 52 | ``` 53 | 54 | There's a couple of options you can set: 55 | 56 | ```ruby 57 | Sequel::Model.plugin :elasticsearch, 58 | elasticsearch: { log: true }, # Options to pass the the Elasticsearch ruby client 59 | index: 'all-my-data', # The index in which the data should be stored. Defaults to the table name associated with the model 60 | type: 'is-mine' # The type in which the data should be stored. 61 | ``` 62 | 63 | And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster. 64 | 65 | ### Indexing 66 | 67 | Ensure that you create the [index mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) for your data before using this plugin, otherwise you might get some weird results. 68 | 69 | The records will by default be indexed using the `values` call of the model. Should you need to customize what's indexed, you can define a `indexed_values` method (or `as_indexed_json` method if you prefer the Rails way). 70 | 71 | ### Searching 72 | 73 | Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html). 74 | 75 | ```ruby 76 | Document.es('title:Sequel') 77 | Document.es('title:Sequel AND body:Elasticsearch') 78 | ``` 79 | 80 | The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model: 81 | 82 | ```ruby 83 | results = Document.es('title:Sequel') 84 | results.each { |e| p e } 85 | # Outputs 86 | # #1, :title=>"Sequel", :body=>"Document 1"}> 87 | # #2, :title=>"Sequel", :body=>"Document 2"}> 88 | ``` 89 | 90 | The result also contains the meta info about the Elasticsearch query result: 91 | 92 | ```ruby 93 | results = Document.es('title:Sequel') 94 | p results.count # The number of documents included in this result 95 | p results.total # The total number of documents in the index that matches the search 96 | p results.timed_out # If the search timed out or not 97 | p results.took # How long, in miliseconds the search took 98 | ``` 99 | 100 | You can also use the scroll API to search and fetch large datasets: 101 | 102 | ```ruby 103 | # Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration 104 | scroll = Document.es('test', scroll: '5m') 105 | p scroll_id # Outputs the scroll_id for this specific scrolling snapshot 106 | puts "Found #{scroll.count} of #{scroll.total} documents" 107 | scroll.each { |e| p e } 108 | while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do 109 | puts "Found #{scroll.count} of #{scroll.total} documents" 110 | scroll.each { |e| p e } 111 | end 112 | ``` 113 | 114 | ### Import 115 | 116 | You can import the whole dataset, or specify a dataset to be imported. This will create a new, timestamped index for your dataset, and import all the records from that dataset into the index. An alias will be created (or updated) to point to the newly created index. 117 | 118 | ```ruby 119 | Document.import! # Import all the Document records. Use the default settings. 120 | 121 | Document.import!(dataset: Document.where(active: true)) # Import all the active Document records 122 | 123 | Document.import!( 124 | index: 'active-documents', # Use the active-documents index 125 | dataset: Document.where(active: true), # Only index active documents 126 | batch_size: 20 # Send documents to Elasticsearch in batches of 20 records 127 | ) 128 | ``` 129 | 130 | ## Development 131 | 132 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. 133 | 134 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). 135 | 136 | ## Contributing 137 | 138 | Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/sequel-elasticsearch. 139 | 140 | Features that needs to be built: 141 | 142 | - [x] An `es` method to search through the data on the cluster. 143 | - [x] Let `es` return an enumerator of `Sequel::Model` instances. 144 | - [ ] A rake task to create or suggest mappings for a table. 145 | 146 | ## License 147 | 148 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT). 149 | 150 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | libdir = File.expand_path(File.dirname(__FILE__) + '/lib') 4 | $LOAD_PATH.unshift(libdir) unless $LOAD_PATH.include?(libdir) 5 | 6 | require 'bundler/gem_tasks' 7 | begin 8 | require 'rspec/core/rake_task' 9 | RSpec::Core::RakeTask.new(:spec) 10 | rescue LoadError 11 | puts 'Did not load RSpec' 12 | end 13 | 14 | task default: :spec 15 | 16 | desc 'Propose mappings based on a Sequel model' 17 | task :sequel_mappings do 18 | end 19 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-architect -------------------------------------------------------------------------------- /lib/sequel/plugins/elasticsearch.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'elasticsearch' 4 | require 'sequel/plugins/elasticsearch/result' 5 | 6 | # Sequel: The Database Toolkit for Ruby 7 | module Sequel 8 | # Sequel Plugins - http://sequel.jeremyevans.net/plugins.html 9 | module Plugins 10 | # The Sequel::Elasticsearch model plugin 11 | # 12 | # @example Simple usage 13 | # 14 | # require 'sequel-elasticsearch' 15 | # Document.plugin Sequel::Elasticsearch 16 | # Document.es('test') 17 | # 18 | module Elasticsearch 19 | # Apply the plugin to the specified model 20 | def self.apply(model, _opts = OPTS) 21 | model 22 | end 23 | 24 | # Configure the plugin 25 | def self.configure(model, opts = OPTS) 26 | model.elasticsearch_opts = opts[:elasticsearch] || {} 27 | model.elasticsearch_index = (opts[:index] || model.table_name.to_s.downcase).to_sym 28 | model.elasticsearch_type = opts[:type]&.to_sym 29 | model 30 | end 31 | 32 | # The class methods that will be added to the Sequel::Model 33 | module ClassMethods 34 | # The extra options that will be passed to the Elasticsearch client. 35 | attr_accessor :elasticsearch_opts 36 | # The Elasticsearch index to which the documents will be written. 37 | attr_accessor :elasticsearch_index 38 | # The Elasticsearch type to which the documents will be written. 39 | attr_accessor :elasticsearch_type 40 | 41 | # Return the Elasticsearch client used to communicate with the cluster. 42 | def es_client 43 | @es_client = ::Elasticsearch::Client.new elasticsearch_opts 44 | end 45 | 46 | # Execute a search on the Model's Elasticsearch index without catching Errors. 47 | def es!(query = '', opts = {}) 48 | opts = { 49 | index: elasticsearch_index, 50 | type: elasticsearch_type 51 | }.merge(opts) 52 | query.is_a?(String) ? opts[:q] = query : opts[:body] = query 53 | Result.new es_client.search(opts), self 54 | end 55 | 56 | # Fetch the next page in a scroll without catching Errors. 57 | def scroll!(scroll_id, duration) 58 | scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result 59 | return nil unless scroll_id 60 | 61 | Result.new es_client.scroll(body: scroll_id, scroll: duration), self 62 | end 63 | 64 | # Execute a search or a scroll on the Model's Elasticsearch index. 65 | # This method is "safe" in that it will catch the more common Errors. 66 | def es(query = '', opts = {}) 67 | call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) } 68 | end 69 | 70 | # Wrapper method in which error handling is done for Elasticsearch calls. 71 | def call_es 72 | yield 73 | rescue ::Elasticsearch::Transport::Transport::Errors::NotFound, 74 | ::Elasticsearch::Transport::Transport::Error, 75 | Faraday::ConnectionFailed => e 76 | db.loggers.first.warn e if db.loggers.count.positive? 77 | nil 78 | end 79 | 80 | # Import the whole dataset into Elasticsearch. 81 | # 82 | # This assumes that a template that covers all the possible index names 83 | # have been created. See +timestamped_index+ for examples of the indices 84 | # that will be created. 85 | # 86 | # This adds or updates records to the last index created by this utility. 87 | # Use the +reindex!+ method to create a completely new index and alias. 88 | def import!(index: nil, dataset: nil, batch_size: 100) 89 | dataset ||= self.dataset 90 | index_name = index || last_index || elasticsearch_index 91 | 92 | # Index all the documents 93 | body = [] 94 | dataset.each_page(batch_size) do |ds| 95 | body = [] 96 | ds.all.each do |row| 97 | print '.' 98 | body << { update: import_object(index_name, row) } 99 | end 100 | puts '/' 101 | es_client.bulk body: body 102 | body = nil 103 | end 104 | end 105 | 106 | def import_object(idx, row) 107 | val = { 108 | _index: idx, 109 | _id: row.document_id, 110 | data: { doc: row.as_indexed_json, doc_as_upsert: true } 111 | } 112 | val[:_type] = elasticsearch_type if elasticsearch_type 113 | val 114 | end 115 | 116 | # Creates a new index in Elasticsearch from the specified dataset, as 117 | # well as an alias to the new index. 118 | # 119 | # See the documentation on +import!+ for more details. 120 | def reindex!(index: nil, dataset: nil, batch_size: 100) 121 | index_name = index || timestamped_index 122 | import!(index: index_name, dataset: dataset, batch_size: batch_size) 123 | 124 | # Create an alias to the newly created index 125 | alias_index(index_name) 126 | end 127 | 128 | # Remove previous aliases and point the `elasticsearch_index` to the new index. 129 | def alias_index(new_index) 130 | es_client.indices.update_aliases body: { 131 | actions: [ 132 | { remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } }, 133 | { add: { index: new_index, alias: elasticsearch_index } } 134 | ] 135 | } 136 | end 137 | 138 | # Find the last created index that matches the specified index name. 139 | def last_index 140 | es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first 141 | rescue ::Elasticsearch::Transport::Transport::Errors::NotFound 142 | nil 143 | end 144 | 145 | # Generate a timestamped index name. 146 | # This will use the current timestamp to construct index names like this: 147 | # 148 | # base-name-20191004.123456 149 | def timestamped_index 150 | time_str = Time.now.strftime('%Y%m%d.%H%M%S') # TODO: Make the format configurable 151 | "#{elasticsearch_index}-#{time_str}".to_sym 152 | end 153 | end 154 | 155 | # The instance methods that will be added to the Sequel::Model 156 | module InstanceMethods 157 | def elasticsearch_index 158 | self.class.elasticsearch_index 159 | end 160 | 161 | def elasticsearch_type 162 | self.class.elasticsearch_type 163 | end 164 | 165 | # Sequel::Model after_create hook to add the new record to the Elasticsearch index. 166 | # It's "safe" in that it won't raise an error if it fails. 167 | def after_create 168 | super 169 | self.class.call_es { _index_document } 170 | end 171 | 172 | # Sequel::Model after_destroy hook to remove the record from the Elasticsearch index. 173 | # It's "safe" in that it won't raise an error if it fails. 174 | def after_destroy 175 | super 176 | self.class.call_es { _destroy_document } 177 | end 178 | 179 | # Sequel::Model after_update hook to update the record in the Elasticsearch index. 180 | # It's "safe" in that it won't raise an error if it fails. 181 | def after_update 182 | super 183 | self.class.call_es { _index_document } 184 | end 185 | 186 | # Return the Elasticsearch client used to communicate with the cluster. 187 | def es_client 188 | self.class.es_client 189 | end 190 | 191 | # Mirror the Elasticsearch Rails plugin. Use this to override what data 192 | # is sent to Elasticsearch 193 | def as_indexed_json 194 | indexed_values 195 | end 196 | 197 | # Internal reference for index_document. Override this for alternate 198 | # implementations of indexing the document. 199 | def _index_document(opts = {}) 200 | index_document(opts) 201 | end 202 | 203 | # Create or update the document on the Elasticsearch cluster. 204 | def index_document(opts = {}) 205 | params = document_path(opts) 206 | params[:body] = as_indexed_json 207 | es_client.index params 208 | end 209 | 210 | # Internal reference for destroy_document. Override this for alternate 211 | # implementations of removing the document. 212 | def _destroy_document(opts = {}) 213 | destroy_document(opts) 214 | end 215 | 216 | # Remove the document from the Elasticsearch cluster. 217 | def destroy_document(opts = {}) 218 | es_client.delete document_path(opts) 219 | end 220 | 221 | # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster. 222 | def document_path(opts = {}) 223 | { 224 | index: opts.delete(:index) || elasticsearch_index, 225 | type: opts.delete(:type) || elasticsearch_type, 226 | id: opts.delete(:id) || document_id 227 | } 228 | end 229 | 230 | # Determine the ID to be used for the document in the Elasticsearch cluster. 231 | # It will join the values of a multi field primary key with an underscore. 232 | def document_id 233 | doc_id = pk 234 | doc_id = doc_id.join('_') if doc_id.is_a? Array 235 | doc_id 236 | end 237 | 238 | private 239 | 240 | # Values to be indexed 241 | def indexed_values 242 | # TODO: Deprecate this method in favour of as_indexed_json 243 | values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) } 244 | end 245 | end 246 | end 247 | end 248 | end 249 | -------------------------------------------------------------------------------- /lib/sequel/plugins/elasticsearch/result.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Sequel 4 | module Plugins 5 | module Elasticsearch 6 | # A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset. 7 | class Result 8 | include Enumerable 9 | 10 | # The original result returned from the Elasticsearch client 11 | attr_reader :result 12 | # The scroll id, if set, from the result 13 | attr_reader :scroll_id 14 | # The total number of documents in the Elasticsearch result 15 | attr_reader :total 16 | # The time, in miliseconds, the Elasticsearch call took to complete 17 | attr_reader :took 18 | # If the Elasticsearch call timed out or note 19 | attr_reader :timed_out 20 | # The model class associated with this result 21 | attr_reader :model 22 | 23 | # Initialize the Result 24 | # 25 | # * +result+ The result returns from the Elasticsearch client / +.es+ call. 26 | # * +model+ The model class on which the results should be applied. 27 | def initialize(result, model = nil) 28 | return unless result && result['hits'] 29 | 30 | @result = result 31 | @scroll_id = result['_scroll_id'] 32 | @total = result['hits']['total'] 33 | @timed_out = result['timed_out'] 34 | @took = result['took'] 35 | @model = model 36 | 37 | result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) } 38 | end 39 | 40 | # Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array. 41 | def each 42 | return [] unless result['hits'] && result['hits']['hits'].count.positive? 43 | 44 | result['hits']['hits'].each { |h| yield h } 45 | end 46 | 47 | # Send back the complete result set 48 | def all 49 | result['hits']['hits'] 50 | end 51 | 52 | # Send all undefined methods to the +result['hits']['hits']+ array. 53 | def method_missing(meth, *args, &block) 54 | respond_to_missing?(meth) ? result['hits']['hits'].send(meth, *args, &block) : super 55 | end 56 | 57 | # Send all undefined methods to the +result['hits']['hits']+ array. 58 | def respond_to_missing?(meth, include_private = false) 59 | result['hits']['hits'].respond_to?(meth, include_private) || super 60 | end 61 | 62 | private 63 | 64 | # Convert an Elasticsearch hit to a Sequel::Model 65 | def convert(hit) 66 | return hit unless model 67 | 68 | source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v } 69 | model.call source 70 | end 71 | end 72 | end 73 | end 74 | end 75 | -------------------------------------------------------------------------------- /lib/sequel/plugins/elasticsearch/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Sequel 4 | # The Sequel::Elasticsearch model plugin 5 | # 6 | # See https://jrgns.github.io/sequel-elasticsearch 7 | module Elasticsearch 8 | # The Gem's version. 9 | VERSION = '0.4.13' 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /sequel-elasticsearch.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path('lib', __dir__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require 'sequel/plugins/elasticsearch/version' 6 | 7 | Gem::Specification.new do |spec| 8 | spec.name = 'sequel-elasticsearch' 9 | spec.version = Sequel::Elasticsearch::VERSION 10 | spec.authors = ['Jurgens du Toit'] 11 | spec.email = ['jrgns@jadeit.co.za'] 12 | 13 | spec.summary = 'A plugin for the Sequel gem to sync data to Elasticsearch.' 14 | spec.description = 'A plugin for the Sequel gem to sync data to Elasticsearch.' 15 | spec.homepage = 'https://github.com/jrgns/sequel-elasticsearch' 16 | spec.license = 'MIT' 17 | 18 | spec.files = `git ls-files -z`.split("\x0").reject do |f| 19 | f.match(%r{^(test|spec|features)/}) 20 | end 21 | spec.bindir = 'exe' 22 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 23 | spec.require_paths = ['lib'] 24 | 25 | spec.add_dependency 'elasticsearch', '>= 1.0' 26 | spec.add_dependency 'sequel', '>= 4.0' 27 | 28 | spec.add_development_dependency 'bundler', '>= 1.13' 29 | spec.add_development_dependency 'rake', '~> 12.3.3' 30 | spec.add_development_dependency 'rspec', '~> 3.0' 31 | spec.add_development_dependency 'rubocop', '>= 0.52' 32 | spec.add_development_dependency 'rubocop-rspec', '>= 1.37' 33 | spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18' 34 | spec.add_development_dependency 'sqlite3', '~> 1.4' 35 | spec.add_development_dependency 'timecop', '>= 0.9' 36 | spec.add_development_dependency 'webmock', '~> 3.2' 37 | end 38 | -------------------------------------------------------------------------------- /spec/sequel/plugins/elasticsearch/result_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'json' 4 | require 'sequel' 5 | require 'elasticsearch' 6 | require 'sequel/plugins/elasticsearch' 7 | 8 | describe Sequel::Plugins::Elasticsearch::Result do 9 | def fixture(name) 10 | File.read("spec/support/#{name}") 11 | end 12 | 13 | let(:result) do 14 | { 15 | 'took' => 234, 16 | 'timed_out' => false, 17 | 'hits' => { 18 | 'hits' => [{ one: 'one', two: 'two' }, { one: 'three', two: 'four' }], 19 | 'total' => 2 20 | } 21 | } 22 | end 23 | 24 | let(:scroll_result) { result.merge('_scroll_id' => '123scrollid') } 25 | 26 | describe '.new' do 27 | let(:es_result) do 28 | described_class.new(result) 29 | end 30 | 31 | it 'creates an enumerable' do 32 | expect(es_result).to be_a Enumerable 33 | end 34 | 35 | it 'handles an empty result' do 36 | expect { described_class.new(nil) }.not_to raise_error 37 | end 38 | 39 | it 'sets the result total property' do 40 | expect(es_result.total).not_to be nil 41 | expect(es_result.total).to eq result['hits']['total'] 42 | end 43 | 44 | it 'sets the result timed_out property' do 45 | expect(es_result.timed_out).not_to be nil 46 | expect(es_result.timed_out).to eq result['timed_out'] 47 | end 48 | 49 | it 'sets the result took property' do 50 | expect(es_result.took).not_to be nil 51 | expect(es_result.took).to eq result['took'] 52 | end 53 | 54 | it 'accesses the enumerable elements correctly' do 55 | expect(es_result).to include one: 'one', two: 'two' 56 | expect(es_result).to include one: 'three', two: 'four' 57 | expect(es_result).not_to include one: 'five', two: 'six' 58 | end 59 | 60 | it 'reports the size of the hits array correctly' do 61 | expect(es_result.count).to eq result['hits']['hits'].count 62 | end 63 | end 64 | 65 | describe '#method_missing' do 66 | let(:es_result) do 67 | described_class.new(result) 68 | end 69 | 70 | it 'sends all methods to the hits array' do 71 | expect(es_result.count).to eq 2 72 | end 73 | end 74 | 75 | context 'scrollable' do 76 | let(:es_result) do 77 | described_class.new(scroll_result) 78 | end 79 | 80 | it 'sets the result scroll_id property' do 81 | expect(es_result.scroll_id).not_to be nil 82 | expect(es_result.scroll_id).to eq scroll_result['_scroll_id'] 83 | end 84 | 85 | it 'iterates through the whole result set' do 86 | skip 'feature still pending' 87 | stub_request(:get, 'http://localhost:9200/_search?q=test&scroll=1m&size=2') 88 | .to_return(status: 200, body: fixture('scroll_one.json'), headers: { 'Content-Type' => 'application/json' }) 89 | stub_request(:get, 'http://localhost:9200/_search/scroll') 90 | .to_return(status: 200, body: fixture('scroll_two.json'), headers: { 'Content-Type' => 'application/json' }) 91 | 92 | client = Elasticsearch::Client.new 93 | result = described_class.new client.search(q: 'test', scroll: '1m', size: 2) 94 | expect(result.total).to eq 5 95 | expect(result.count).to eq 2 96 | expect(result.map { |e| e }.count).to eq 5 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /spec/sequel/plugins/elasticsearch_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen-string-literal: true 2 | 3 | require 'sequel' 4 | require 'sequel/plugins/elasticsearch' 5 | require 'sequel/plugins/elasticsearch/result' 6 | require 'timecop' 7 | 8 | # rubocop: disable Metrics/BlockLength 9 | describe Sequel::Plugins::Elasticsearch do 10 | before(:all) do 11 | DB.create_table!(:documents) do 12 | primary_key :id 13 | String :title 14 | String :content, text: true 15 | Integer :views 16 | TrueClass :active 17 | DateTime :created_at 18 | end 19 | 20 | DB.create_table!(:complex_documents) do 21 | Integer :one 22 | Integer :two 23 | primary_key %i[one two] 24 | String :title 25 | String :content, text: true 26 | end 27 | end 28 | 29 | let(:model) do 30 | Class.new(Sequel::Model(:documents)) 31 | end 32 | 33 | describe '.configure' do 34 | it 'defaults to the model table name for the index' do 35 | model.plugin :elasticsearch 36 | expect(model.send(:elasticsearch_index)).to eq :documents 37 | end 38 | 39 | it 'allows you to specify the index' do 40 | model.plugin :elasticsearch, index: :customIndex 41 | expect(model.elasticsearch_index).to eq :customIndex 42 | end 43 | 44 | it 'uses the specified index' do 45 | model.plugin :elasticsearch, index: :customIndex 46 | stub_request(:put, %r{http://localhost:9200/customIndex/_doc/\d+}) 47 | doc = model.new.save 48 | expect(WebMock).to have_requested(:put, "http://localhost:9200/customIndex/_doc/#{doc.id}") 49 | end 50 | 51 | it 'only uses type if given' do 52 | model.plugin :elasticsearch 53 | expect(model.send(:elasticsearch_type)).to be_nil 54 | end 55 | 56 | it 'allows you to specify the type' do 57 | model.plugin :elasticsearch, type: :customType 58 | expect(model.send(:elasticsearch_type)).to eq :customType 59 | end 60 | 61 | it 'uses the specified type' do 62 | model.plugin :elasticsearch, type: :customType 63 | WebMock.allow_net_connect! 64 | doc = model.new.save 65 | expect(WebMock).to have_requested(:put, "http://localhost:9200/#{model.table_name}/customType/#{doc.id}") 66 | end 67 | 68 | it 'uses the default type' do 69 | model.plugin :elasticsearch 70 | WebMock.allow_net_connect! 71 | doc = model.new(content: Time.now).save 72 | expect(WebMock).to have_requested(:put, "http://localhost:9200/#{model.table_name}/_doc/#{doc.id}") 73 | end 74 | 75 | it 'allows you to pass down Elasticsearch client options' do 76 | model.plugin :elasticsearch, elasticsearch: { log: true } 77 | expect(model.new.es_client.transport.options).to include log: true 78 | end 79 | end 80 | 81 | describe 'ClassMethods' do 82 | describe '.es' do 83 | before do 84 | WebMock.allow_net_connect! 85 | model.plugin :elasticsearch 86 | end 87 | 88 | it 'does a basic query string search' do 89 | model.es('test') 90 | expect(WebMock).to have_requested(:get, 'http://localhost:9200/documents/_search?q=test') 91 | end 92 | 93 | it 'does a complex query search' do 94 | model.es(query: { match: { title: 'test' } }) 95 | expect(WebMock) 96 | .to have_requested(:post, 'http://localhost:9200/documents/_search') 97 | .with(body: '{"query":{"match":{"title":"test"}}}') 98 | end 99 | 100 | it 'handles not found exceptions' do 101 | expect { model.es('test') }.not_to raise_error 102 | stub_request(:get, %r{http://localhost:9200/documents/_search.*}) 103 | .to_return(status: 404) 104 | end 105 | 106 | it 'handles connection failed exceptions' do 107 | stub_request(:get, %r{http://localhost:9200/documents/_search.*}) 108 | allow(Faraday::Connection).to receive(:get).and_raise(Faraday::ConnectionFailed) 109 | expect { model.es('test') }.not_to raise_error 110 | end 111 | 112 | it 'returns an enumerable' do 113 | stub_request(:get, %r{http://localhost:9200/documents/_search.*}) 114 | expect(model.es('test')).to be_a Enumerable 115 | end 116 | 117 | it 'handles scroll requests' do 118 | stub = stub_request(:get, 'http://localhost:9200/documents/_search?q=test&scroll=1m') 119 | model.es('test', scroll: '1m') 120 | expect(stub).to have_been_requested.once 121 | end 122 | 123 | it 'handles scroll results' 124 | end 125 | 126 | describe '.es!' do 127 | it 'does not handle exceptions' do 128 | stub_request(:get, %r{http://localhost:9200/documents/_search.*}) 129 | .to_return(status: 500) 130 | model.plugin :elasticsearch 131 | expect { model.es!('test') }.to raise_error Elasticsearch::Transport::Transport::Error 132 | end 133 | end 134 | 135 | describe '.scroll!' do 136 | before do 137 | model.plugin :elasticsearch 138 | end 139 | 140 | it 'accepts a scroll_id' do 141 | stub = stub_request(:post, 'http://localhost:9200/_search/scroll?scroll%5Bscroll%5D=1m') 142 | 143 | model.scroll!('somescrollid', scroll: '1m') 144 | expect(stub).to have_been_requested.once 145 | end 146 | 147 | it 'accepts a Result' do 148 | result = Sequel::Plugins::Elasticsearch::Result.new('_scroll_id' => 'somescrollid') 149 | allow(result).to receive(:scroll_id).and_return('somescrollid') 150 | stub = stub_request(:post, 'http://localhost:9200/_search/scroll?scroll%5Bscroll%5D=1m') 151 | .to_return(status: 200) 152 | 153 | model.scroll!(result, scroll: '1m') 154 | 155 | expect(stub).to have_been_requested.once 156 | end 157 | 158 | it 'does not handle exceptions' do 159 | stub_request(:get, 'http://localhost:9200/_search/scroll?scroll=1m&scroll_id=somescrollid') 160 | .to_return(status: 500) 161 | expect { model.scroll!('somescrollid', '1m') }.to raise_error Elasticsearch::Transport::Transport::Error # Getting Faraday::ConnectionFailed ?? 162 | end 163 | end 164 | 165 | describe '.timestamped_index' do 166 | it 'returns the index appended with a timestamp' do 167 | model.plugin :elasticsearch 168 | Timecop.freeze(Time.local(2019, 12, 4, 21, 26, 12)) do 169 | expect(model.timestamped_index).to eq :'documents-20191204.212612' 170 | end 171 | end 172 | end 173 | end 174 | 175 | describe 'InstanceMethods' do 176 | let(:simple_doc) do 177 | @subj ||= begin 178 | subj = Class.new(Sequel::Model(:documents)) 179 | subj.plugin :elasticsearch 180 | subj 181 | end 182 | end 183 | 184 | let(:complex_doc) do 185 | @subj ||= begin 186 | subj = Class.new(Sequel::Model(:complex_documents)) 187 | subj.plugin :elasticsearch 188 | subj 189 | end 190 | end 191 | 192 | describe '#es_client' do 193 | it 'returns an Elasticsearch Transport Client' do 194 | expect(simple_doc.new.send(:es_client)).to be_a Elasticsearch::Transport::Client 195 | end 196 | end 197 | 198 | describe '#document_id' do 199 | it 'returns the value of the primary key for simple primary keys' do 200 | stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+}) 201 | doc = simple_doc.new.save 202 | expect(doc.send(:document_id)).to eq doc.id 203 | end 204 | 205 | it 'returns the value of the primary key for composite primary keys' do 206 | complex_doc.insert(one: 1, two: 2) 207 | doc = complex_doc.first 208 | expect(doc.send(:document_id)).to eq "#{doc.one}_#{doc.two}" 209 | end 210 | end 211 | 212 | describe '#as_indexed_json' do 213 | let(:doc) do 214 | simple_doc.new( 215 | title: 'title', 216 | content: 'content', 217 | views: 4, 218 | active: true, 219 | created_at: Time.parse('2018-02-07T22:18:42+02:00') 220 | ) 221 | end 222 | 223 | it 'correctly formats dates and other types' do 224 | expect(doc.as_indexed_json).to include( 225 | title: 'title', content: 'content', views: 4, active: true, created_at: '2018-02-07T22:18:42+02:00' 226 | ) 227 | end 228 | 229 | it 'can be extended' do 230 | doc = simple_doc.new 231 | def doc.as_indexed_json 232 | { test: 'this' } 233 | end 234 | expect(doc.as_indexed_json).to include(test: 'this') 235 | end 236 | end 237 | 238 | describe '#document_path' do 239 | it 'returns the document index, type and id for documents' do 240 | stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+}) 241 | doc = simple_doc.new.save 242 | expect(doc.document_path).to include index: simple_doc.table_name 243 | expect(doc.document_path).to include id: doc.id 244 | end 245 | end 246 | 247 | describe '#save' do 248 | it 'indexes the document using the document path and model values' do 249 | stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+}) 250 | doc = simple_doc.new.save 251 | expect(WebMock) 252 | .to have_requested(:put, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{doc.id}") 253 | end 254 | end 255 | 256 | describe '#update' do 257 | let(:doc) do 258 | doc = simple_doc.new.save 259 | doc.title = 'updated' 260 | doc.save 261 | doc 262 | end 263 | 264 | it 'indexes the document using the document path and model values' do 265 | stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+}) 266 | expect(WebMock) 267 | .to have_requested(:put, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{doc.id}") 268 | .times(2) 269 | end 270 | end 271 | 272 | describe '#destroy' do 273 | let(:id) do 274 | doc = simple_doc.new.save 275 | id = doc.pk 276 | doc.destroy 277 | id 278 | end 279 | 280 | it 'destroys the document using the document path' do 281 | stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+}) 282 | stub_request(:delete, %r{http://localhost:9200/documents/_doc/\d+}) 283 | expect(WebMock) 284 | .to have_requested(:delete, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{id}") 285 | end 286 | end 287 | end 288 | end 289 | # rubocop: enable Metrics/BlockLength 290 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | ENV['RACK_ENV'] ||= 'test' 4 | ENV['DATABASE_URL'] ||= 'sqlite::memory:' 5 | ENV['ELASTICSEARCH_URL'] ||= 'http://localhost:9200' 6 | 7 | require 'sequel' 8 | require 'webmock/rspec' 9 | require 'simplecov' 10 | SimpleCov.start 11 | 12 | DB = Sequel.connect ENV['DATABASE_URL'] 13 | 14 | RSpec.configure do |config| 15 | config.filter_run :focus 16 | config.run_all_when_everything_filtered = true 17 | end 18 | -------------------------------------------------------------------------------- /spec/support/scroll_one.json: -------------------------------------------------------------------------------- 1 | { 2 | "_scroll_id": "DnF1ZXJ5VGhlbkZldGNoCgAAAAAAAACLFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAkRZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAJIWekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACTFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAlBZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAIwWekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACNFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAjhZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAI8WekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACQFnpFbXRybktRUndPbnlkQ1BkcUpOR1E=", 3 | "_shards": { 4 | "failed": 0, 5 | "skipped": 0, 6 | "successful": 10, 7 | "total": 10 8 | }, 9 | "hits": { 10 | "hits": [ 11 | { 12 | "_id": "AV-3Skh5ek-DGROVf5Zo", 13 | "_index": "logstash-2017.11.13", 14 | "_score": 1.0, 15 | "_source": { 16 | "@timestamp": "2017-11-13T21:30:19.247Z", 17 | "@version": "1", 18 | "host": "somehost", 19 | "message": "I, [2017-11-13T22:18:01.997727 #5298] INFO -- : Message 1", 20 | "path": "/home/jrgns/Code/EagerELK/proxes/logs/proxes.log" 21 | }, 22 | "_type": "logs" 23 | }, 24 | { 25 | "_id": "AV-3Skh5ek-DGROVf5Zr", 26 | "_index": "logstash-2017.11.13", 27 | "_score": 1.0, 28 | "_source": { 29 | "@timestamp": "2017-11-13T21:30:19.247Z", 30 | "@version": "1", 31 | "host": "somehost", 32 | "message": "I, [2017-11-13T22:18:02.205017 #5298] INFO -- : Message 2", 33 | "path": "/home/jrgns/Code/EagerELK/proxes/logs/proxes.log" 34 | }, 35 | "_type": "logs" 36 | } 37 | ], 38 | "max_score": 1.0, 39 | "total": 5 40 | }, 41 | "timed_out": false, 42 | "took": 40 43 | } 44 | --------------------------------------------------------------------------------