├── .gitignore
├── .rspec
├── .rubocop.yml
├── .travis.yml
├── Gemfile
├── Gemfile.ci
├── Gemfile.dev
├── LICENSE.txt
├── README.md
├── Rakefile
├── _config.yml
├── lib
    └── sequel
    │   └── plugins
    │       ├── elasticsearch.rb
    │       └── elasticsearch
    │           ├── result.rb
    │           └── version.rb
├── sequel-elasticsearch.gemspec
└── spec
    ├── sequel
        └── plugins
        │   ├── elasticsearch
        │       └── result_spec.rb
        │   └── elasticsearch_spec.rb
    ├── spec_helper.rb
    └── support
        └── scroll_one.json


/.gitignore:
--------------------------------------------------------------------------------
 1 | /.bundle/
 2 | /.yardoc
 3 | /Gemfile.*.lock
 4 | /_yardoc/
 5 | /coverage/
 6 | /doc/
 7 | /pkg/
 8 | /spec/reports/
 9 | /tmp/
10 | /vendor
11 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --format documentation
2 | --color
3 | --require spec_helper
4 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
1 | Metrics/LineLength:
2 |   Max: 120
3 | Style/Documentation:
4 |   Enabled: false
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | os:
 2 |   - linux
 3 | dist: xenial
 4 | services:
 5 |   - elasticsearch
 6 | language: ruby
 7 | rvm:
 8 | - 2.4
 9 | - 2.5
10 | - 2.6
11 | - 2.7
12 | gemfile: Gemfile.ci
13 | env:
14 |   global:
15 |   - CC_TEST_REPORTER_ID=f35fd15664b071c621a239733eb3b063caf333ac7e795d3a20690942f64caf62
16 | before_install:
17 | - gem install bundler
18 | before_script:
19 | - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
20 | - chmod +x ./cc-test-reporter
21 | - "./cc-test-reporter before-build"
22 | - export TZ=Africa/Johannesburg
23 | - sleep 10
24 | after_script:
25 | - "./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT"
26 | deploy:
27 |   provider: rubygems
28 |   gem: sequel-elasticsearch
29 |   on:
30 |     tags: true
31 |     repo: jrgns/sequel-elasticsearch
32 |   api_key:
33 |     secure: xP8zLfBbDTTUpJP8DUL9llfGIGwuTkifmySfoTo7YHOoEC9hFhviT2+PdeoTLywjAIsIwjvghzB2rBERcAPkbu1ahVow9+/0n3jV4UmgRNm2bFcAlIEb8j2gSUlK/ETrIXRBUTUnMg60qhaxIIPd5VQY4eOv2EbmujUqfJgN6xJTTQkiuN4G/UK6Cbhi9d1YDcviY/un9XKIRZQuUvkHQ9vBpK2xJEmisymg0ljK/uj9/dAjKUtyZ7c3QKjcTcQeZ/YKxkKuwaTO5TyHTfKkzUS6M9wS280zbHj3KS98xYX1NTGqDTlq1YPPKAtedhh1+xiCoT50BcIFHmOAymTfvMeGrRQp4fxGOaLMV3q9hP91PAY/Eul3B4gJrAulDahc5xy4TECvM+GdIitNHWpXchH3iE70vWybjFFOTRrf4ftjh20BbeaGJG2FVLD4haC8Y73B6Q8Yf2/qu9Wc+vyXkCXRwiGBl+x1N1gmRoF2w5jyMfEnAJ/7wN4sTcLRvN8PViOKbZ9cNyjgsdwoJoqcH045HJhyRALl5pXeoTANzyPw7Dm2yGyzw2Rnv3TqJD4eeJr8dQyQgR83P7yl/GDrG493QY0Mkl8iMEmS/GFTX6L4p7qqiJqDYBtIuW4/XET2Ui7HrV/5midzTiC5nGNZdHkQkLNMlkIvaSpROXV058I=
34 | after_success:
35 | - bundle exec codeclimate-test-reporter
36 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | source 'https://rubygems.org'
4 | 
5 | # Specify your gem's dependencies in sequel-elasticsearch.gemspec
6 | gemspec
7 | 


--------------------------------------------------------------------------------
/Gemfile.ci:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | # Specify your gem's dependencies in proxes.gemspec
4 | gemspec
5 | 
6 | gem 'sqlite3'
7 | gem 'codeclimate'
8 | 


--------------------------------------------------------------------------------
/Gemfile.dev:
--------------------------------------------------------------------------------
 1 | source 'https://rubygems.org'
 2 | 
 3 | gem 'elasticsearch'
 4 | gem 'rake'
 5 | gem 'rspec'
 6 | gem 'rubocop'
 7 | gem 'sequel'
 8 | gem 'simplecov'
 9 | gem 'sqlite3'
10 | gem 'webmock'
11 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2018 Jurgens du Toit
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sequel::Elasticsearch
  2 | 
  3 | Sequel::Elasticsearch allows you to transparently mirror your database, or specific tables, to Elasticsearch. It's especially useful if you want the power of search through Elasticsearch, but keep the sanity and structure of a relational database.
  4 | 
  5 | [![Build Status](https://travis-ci.org/jrgns/sequel-elasticsearch.svg?branch=master)](https://travis-ci.org/jrgns/sequel-elasticsearch)
  6 | [![Maintainability](https://api.codeclimate.com/v1/badges/ff453fe81303a2fa7c02/maintainability)](https://codeclimate.com/github/jrgns/sequel-elasticsearch/maintainability)
  7 | [![Test Coverage](https://api.codeclimate.com/v1/badges/ff453fe81303a2fa7c02/test_coverage)](https://codeclimate.com/github/jrgns/sequel-elasticsearch/test_coverage)
  8 | 
  9 | ## Installation
 10 | 
 11 | Add this line to your application's Gemfile:
 12 | 
 13 | ```ruby
 14 | gem 'sequel-elasticsearch'
 15 | ```
 16 | 
 17 | And then execute:
 18 | 
 19 |     $ bundle
 20 | 
 21 | Or install it yourself as:
 22 | 
 23 |     $ gem install sequel-elasticsearch
 24 | 
 25 | ## Usage
 26 | 
 27 | Require the gem with:
 28 | 
 29 | ```ruby
 30 | require 'sequel/plugins/elasticsearch'
 31 | ```
 32 | 
 33 | You'll need an Elasticsearch cluster to sync your data to. By default the gem will try to connect to `http://localhost:9200`. Set the `ELASTICSEARCH_URL` ENV variable to the URL of your cluster.
 34 | 
 35 | This is a Sequel plugin, so you can enable it DB wide:
 36 | 
 37 | ```ruby
 38 | Sequel::Model.plugin :elasticsearch
 39 | 
 40 | ```
 41 | 
 42 | Or per model:
 43 | 
 44 | ```ruby
 45 | Document.plugin Sequel::Elasticsearch
 46 | 
 47 | # or
 48 | 
 49 | class Document < Sequel::Model
 50 |   plugin :elasticsearch
 51 | end
 52 | ```
 53 | 
 54 | There's a couple of options you can set:
 55 | 
 56 | ```ruby
 57 | Sequel::Model.plugin :elasticsearch,
 58 |   elasticsearch: { log: true }, # Options to pass the the Elasticsearch ruby client
 59 |   index: 'all-my-data', # The index in which the data should be stored. Defaults to the table name associated with the model
 60 |   type: 'is-mine' # The type in which the data should be stored.
 61 | ```
 62 | 
 63 | And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
 64 | 
 65 | ### Indexing
 66 | 
 67 | Ensure that you create the [index mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) for your data before using this plugin, otherwise you might get some weird results.
 68 | 
 69 | The records will by default be indexed using the `values` call of the model. Should you need to customize what's indexed, you can define a `indexed_values` method (or `as_indexed_json` method if you prefer the Rails way).
 70 | 
 71 | ### Searching
 72 | 
 73 | Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
 74 | 
 75 | ```ruby
 76 | Document.es('title:Sequel')
 77 | Document.es('title:Sequel AND body:Elasticsearch')
 78 | ```
 79 | 
 80 | The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model:
 81 | 
 82 | ```ruby
 83 | results = Document.es('title:Sequel')
 84 | results.each { |e| p e }
 85 | # Outputs
 86 | # #<Document @values={:id=>1, :title=>"Sequel", :body=>"Document 1"}>
 87 | # #<Document @values={:id=>2, :title=>"Sequel", :body=>"Document 2"}>
 88 | ```
 89 | 
 90 | The result also contains the meta info about the Elasticsearch query result:
 91 | 
 92 | ```ruby
 93 | results = Document.es('title:Sequel')
 94 | p results.count # The number of documents included in this result
 95 | p results.total # The total number of documents in the index that matches the search
 96 | p results.timed_out # If the search timed out or not
 97 | p results.took # How long, in miliseconds the search took
 98 | ```
 99 | 
100 | You can also use the scroll API to search and fetch large datasets:
101 | 
102 | ```ruby
103 | # Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration
104 | scroll = Document.es('test', scroll: '5m')
105 | p scroll_id # Outputs the scroll_id for this specific scrolling snapshot
106 | puts "Found #{scroll.count} of #{scroll.total} documents"
107 | scroll.each { |e| p e }
108 | while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
109 |   puts "Found #{scroll.count} of #{scroll.total} documents"
110 |   scroll.each { |e| p e }
111 | end
112 | ```
113 | 
114 | ### Import
115 | 
116 | You can import the whole dataset, or specify a dataset to be imported. This will create a new, timestamped index for your dataset, and import all the records from that dataset into the index. An alias will be created (or updated) to point to the newly created index.
117 | 
118 | ```ruby
119 | Document.import! # Import all the Document records. Use the default settings.
120 | 
121 | Document.import!(dataset: Document.where(active: true)) # Import all the active Document records
122 | 
123 | Document.import!(
124 |     index: 'active-documents', # Use the active-documents index
125 |     dataset: Document.where(active: true), # Only index active documents
126 |     batch_size: 20 # Send documents to Elasticsearch in batches of 20 records
127 | )
128 | ```
129 | 
130 | ## Development
131 | 
132 | After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
133 | 
134 | To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
135 | 
136 | ## Contributing
137 | 
138 | Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/sequel-elasticsearch.
139 | 
140 | Features that needs to be built:
141 | 
142 | - [x] An `es` method to search through the data on the cluster.
143 | - [x] Let `es` return an enumerator of `Sequel::Model` instances.
144 | - [ ] A rake task to create or suggest mappings for a table.
145 | 
146 | ## License
147 | 
148 | The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
149 | 
150 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | libdir = File.expand_path(File.dirname(__FILE__) + '/lib')
 4 | $LOAD_PATH.unshift(libdir) unless $LOAD_PATH.include?(libdir)
 5 | 
 6 | require 'bundler/gem_tasks'
 7 | begin
 8 |   require 'rspec/core/rake_task'
 9 |   RSpec::Core::RakeTask.new(:spec)
10 | rescue LoadError
11 |   puts 'Did not load RSpec'
12 | end
13 | 
14 | task default: :spec
15 | 
16 | desc 'Propose mappings based on a Sequel model'
17 | task :sequel_mappings do
18 | end
19 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-architect


--------------------------------------------------------------------------------
/lib/sequel/plugins/elasticsearch.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'elasticsearch'
  4 | require 'sequel/plugins/elasticsearch/result'
  5 | 
  6 | # Sequel: The Database Toolkit for Ruby
  7 | module Sequel
  8 |   # Sequel Plugins - http://sequel.jeremyevans.net/plugins.html
  9 |   module Plugins
 10 |     # The Sequel::Elasticsearch model plugin
 11 |     #
 12 |     # @example Simple usage
 13 |     #
 14 |     #     require 'sequel-elasticsearch'
 15 |     #     Document.plugin Sequel::Elasticsearch
 16 |     #     Document.es('test')
 17 |     #
 18 |     module Elasticsearch
 19 |       # Apply the plugin to the specified model
 20 |       def self.apply(model, _opts = OPTS)
 21 |         model
 22 |       end
 23 | 
 24 |       # Configure the plugin
 25 |       def self.configure(model, opts = OPTS)
 26 |         model.elasticsearch_opts = opts[:elasticsearch] || {}
 27 |         model.elasticsearch_index = (opts[:index] || model.table_name.to_s.downcase).to_sym
 28 |         model.elasticsearch_type = opts[:type]&.to_sym
 29 |         model
 30 |       end
 31 | 
 32 |       # The class methods that will be added to the Sequel::Model
 33 |       module ClassMethods
 34 |         # The extra options that will be passed to the Elasticsearch client.
 35 |         attr_accessor :elasticsearch_opts
 36 |         # The Elasticsearch index to which the documents will be written.
 37 |         attr_accessor :elasticsearch_index
 38 |         # The Elasticsearch type to which the documents will be written.
 39 |         attr_accessor :elasticsearch_type
 40 | 
 41 |         # Return the Elasticsearch client used to communicate with the cluster.
 42 |         def es_client
 43 |           @es_client = ::Elasticsearch::Client.new elasticsearch_opts
 44 |         end
 45 | 
 46 |         # Execute a search on the Model's Elasticsearch index without catching Errors.
 47 |         def es!(query = '', opts = {})
 48 |           opts = {
 49 |             index: elasticsearch_index,
 50 |             type: elasticsearch_type
 51 |           }.merge(opts)
 52 |           query.is_a?(String) ? opts[:q] = query : opts[:body] = query
 53 |           Result.new es_client.search(opts), self
 54 |         end
 55 | 
 56 |         # Fetch the next page in a scroll without catching Errors.
 57 |         def scroll!(scroll_id, duration)
 58 |           scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
 59 |           return nil unless scroll_id
 60 | 
 61 |           Result.new es_client.scroll(body: scroll_id, scroll: duration), self
 62 |         end
 63 | 
 64 |         # Execute a search or a scroll on the Model's Elasticsearch index.
 65 |         # This method is "safe" in that it will catch the more common Errors.
 66 |         def es(query = '', opts = {})
 67 |           call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) }
 68 |         end
 69 | 
 70 |         # Wrapper method in which error handling is done for Elasticsearch calls.
 71 |         def call_es
 72 |           yield
 73 |         rescue ::Elasticsearch::Transport::Transport::Errors::NotFound,
 74 |                ::Elasticsearch::Transport::Transport::Error,
 75 |                Faraday::ConnectionFailed => e
 76 |           db.loggers.first.warn e if db.loggers.count.positive?
 77 |           nil
 78 |         end
 79 | 
 80 |         # Import the whole dataset into Elasticsearch.
 81 |         #
 82 |         # This assumes that a template that covers all the possible index names
 83 |         # have been created. See +timestamped_index+ for examples of the indices
 84 |         # that will be created.
 85 |         #
 86 |         # This adds or updates records to the last index created by this utility.
 87 |         # Use the +reindex!+ method to create a completely new index and alias.
 88 |         def import!(index: nil, dataset: nil, batch_size: 100)
 89 |           dataset ||= self.dataset
 90 |           index_name = index || last_index || elasticsearch_index
 91 | 
 92 |           # Index all the documents
 93 |           body = []
 94 |           dataset.each_page(batch_size) do |ds|
 95 |             body = []
 96 |             ds.all.each do |row|
 97 |               print '.'
 98 |               body << { update: import_object(index_name, row) }
 99 |             end
100 |             puts '/'
101 |             es_client.bulk body: body
102 |             body = nil
103 |           end
104 |         end
105 | 
106 |         def import_object(idx, row)
107 |           val = {
108 |             _index: idx,
109 |             _id: row.document_id,
110 |             data: { doc: row.as_indexed_json, doc_as_upsert: true }
111 |           }
112 |           val[:_type] = elasticsearch_type if elasticsearch_type
113 |           val
114 |         end
115 | 
116 |         # Creates a new index in Elasticsearch from the specified dataset, as
117 |         # well as an alias to the new index.
118 |         #
119 |         # See the documentation on +import!+ for more details.
120 |         def reindex!(index: nil, dataset: nil, batch_size: 100)
121 |           index_name = index || timestamped_index
122 |           import!(index: index_name, dataset: dataset, batch_size: batch_size)
123 | 
124 |           # Create an alias to the newly created index
125 |           alias_index(index_name)
126 |         end
127 | 
128 |         # Remove previous aliases and point the `elasticsearch_index` to the new index.
129 |         def alias_index(new_index)
130 |           es_client.indices.update_aliases body: {
131 |             actions: [
132 |               { remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
133 |               { add: { index: new_index, alias: elasticsearch_index } }
134 |             ]
135 |           }
136 |         end
137 | 
138 |         # Find the last created index that matches the specified index name.
139 |         def last_index
140 |           es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
141 |         rescue ::Elasticsearch::Transport::Transport::Errors::NotFound
142 |           nil
143 |         end
144 | 
145 |         # Generate a timestamped index name.
146 |         # This will use the current timestamp to construct index names like this:
147 |         #
148 |         #    base-name-20191004.123456
149 |         def timestamped_index
150 |           time_str = Time.now.strftime('%Y%m%d.%H%M%S') # TODO: Make the format configurable
151 |           "#{elasticsearch_index}-#{time_str}".to_sym
152 |         end
153 |       end
154 | 
155 |       # The instance methods that will be added to the Sequel::Model
156 |       module InstanceMethods
157 |         def elasticsearch_index
158 |           self.class.elasticsearch_index
159 |         end
160 | 
161 |         def elasticsearch_type
162 |           self.class.elasticsearch_type
163 |         end
164 | 
165 |         # Sequel::Model after_create hook to add the new record to the Elasticsearch index.
166 |         # It's "safe" in that it won't raise an error if it fails.
167 |         def after_create
168 |           super
169 |           self.class.call_es { _index_document }
170 |         end
171 | 
172 |         # Sequel::Model after_destroy hook to remove the record from the Elasticsearch index.
173 |         # It's "safe" in that it won't raise an error if it fails.
174 |         def after_destroy
175 |           super
176 |           self.class.call_es { _destroy_document }
177 |         end
178 | 
179 |         # Sequel::Model after_update hook to update the record in the Elasticsearch index.
180 |         # It's "safe" in that it won't raise an error if it fails.
181 |         def after_update
182 |           super
183 |           self.class.call_es { _index_document }
184 |         end
185 | 
186 |         # Return the Elasticsearch client used to communicate with the cluster.
187 |         def es_client
188 |           self.class.es_client
189 |         end
190 | 
191 |         # Mirror the Elasticsearch Rails plugin. Use this to override what data
192 |         # is sent to Elasticsearch
193 |         def as_indexed_json
194 |           indexed_values
195 |         end
196 | 
197 |         # Internal reference for index_document. Override this for alternate
198 |         # implementations of indexing the document.
199 |         def _index_document(opts = {})
200 |           index_document(opts)
201 |         end
202 | 
203 |         # Create or update the document on the Elasticsearch cluster.
204 |         def index_document(opts = {})
205 |           params = document_path(opts)
206 |           params[:body] = as_indexed_json
207 |           es_client.index params
208 |         end
209 | 
210 |         # Internal reference for destroy_document. Override this for alternate
211 |         # implementations of removing the document.
212 |         def _destroy_document(opts = {})
213 |           destroy_document(opts)
214 |         end
215 | 
216 |         # Remove the document from the Elasticsearch cluster.
217 |         def destroy_document(opts = {})
218 |           es_client.delete document_path(opts)
219 |         end
220 | 
221 |         # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
222 |         def document_path(opts = {})
223 |           {
224 |             index: opts.delete(:index) || elasticsearch_index,
225 |             type: opts.delete(:type) || elasticsearch_type,
226 |             id: opts.delete(:id) || document_id
227 |           }
228 |         end
229 | 
230 |         # Determine the ID to be used for the document in the Elasticsearch cluster.
231 |         # It will join the values of a multi field primary key with an underscore.
232 |         def document_id
233 |           doc_id = pk
234 |           doc_id = doc_id.join('_') if doc_id.is_a? Array
235 |           doc_id
236 |         end
237 | 
238 |         private
239 | 
240 |         # Values to be indexed
241 |         def indexed_values
242 |           # TODO: Deprecate this method in favour of as_indexed_json
243 |           values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) }
244 |         end
245 |       end
246 |     end
247 |   end
248 | end
249 | 


--------------------------------------------------------------------------------
/lib/sequel/plugins/elasticsearch/result.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Sequel
 4 |   module Plugins
 5 |     module Elasticsearch
 6 |       # A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset.
 7 |       class Result
 8 |         include Enumerable
 9 | 
10 |         # The original result returned from the Elasticsearch client
11 |         attr_reader :result
12 |         # The scroll id, if set, from the result
13 |         attr_reader :scroll_id
14 |         # The total number of documents in the Elasticsearch result
15 |         attr_reader :total
16 |         # The time, in miliseconds, the Elasticsearch call took to complete
17 |         attr_reader :took
18 |         # If the Elasticsearch call timed out or note
19 |         attr_reader :timed_out
20 |         # The model class associated with this result
21 |         attr_reader :model
22 | 
23 |         # Initialize the Result
24 |         #
25 |         # * +result+ The result returns from the Elasticsearch client / +.es+ call.
26 |         # * +model+ The model class on which the results should be applied.
27 |         def initialize(result, model = nil)
28 |           return unless result && result['hits']
29 | 
30 |           @result = result
31 |           @scroll_id = result['_scroll_id']
32 |           @total = result['hits']['total']
33 |           @timed_out = result['timed_out']
34 |           @took = result['took']
35 |           @model = model
36 | 
37 |           result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) }
38 |         end
39 | 
40 |         # Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array.
41 |         def each
42 |           return [] unless result['hits'] && result['hits']['hits'].count.positive?
43 | 
44 |           result['hits']['hits'].each { |h| yield h }
45 |         end
46 | 
47 |         # Send back the complete result set
48 |         def all
49 |           result['hits']['hits']
50 |         end
51 | 
52 |         # Send all undefined methods to the +result['hits']['hits']+ array.
53 |         def method_missing(meth, *args, &block)
54 |           respond_to_missing?(meth) ? result['hits']['hits'].send(meth, *args, &block) : super
55 |         end
56 | 
57 |         # Send all undefined methods to the +result['hits']['hits']+ array.
58 |         def respond_to_missing?(meth, include_private = false)
59 |           result['hits']['hits'].respond_to?(meth, include_private) || super
60 |         end
61 | 
62 |         private
63 | 
64 |         # Convert an Elasticsearch hit to a Sequel::Model
65 |         def convert(hit)
66 |           return hit unless model
67 | 
68 |           source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v }
69 |           model.call source
70 |         end
71 |       end
72 |     end
73 |   end
74 | end
75 | 


--------------------------------------------------------------------------------
/lib/sequel/plugins/elasticsearch/version.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module Sequel
 4 |   # The Sequel::Elasticsearch model plugin
 5 |   #
 6 |   # See https://jrgns.github.io/sequel-elasticsearch
 7 |   module Elasticsearch
 8 |     # The Gem's version.
 9 |     VERSION = '0.4.13'
10 |   end
11 | end
12 | 


--------------------------------------------------------------------------------
/sequel-elasticsearch.gemspec:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | lib = File.expand_path('lib', __dir__)
 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 5 | require 'sequel/plugins/elasticsearch/version'
 6 | 
 7 | Gem::Specification.new do |spec|
 8 |   spec.name          = 'sequel-elasticsearch'
 9 |   spec.version       = Sequel::Elasticsearch::VERSION
10 |   spec.authors       = ['Jurgens du Toit']
11 |   spec.email         = ['jrgns@jadeit.co.za']
12 | 
13 |   spec.summary       = 'A plugin for the Sequel gem to sync data to Elasticsearch.'
14 |   spec.description   = 'A plugin for the Sequel gem to sync data to Elasticsearch.'
15 |   spec.homepage      = 'https://github.com/jrgns/sequel-elasticsearch'
16 |   spec.license       = 'MIT'
17 | 
18 |   spec.files         = `git ls-files -z`.split("\x0").reject do |f|
19 |     f.match(%r{^(test|spec|features)/})
20 |   end
21 |   spec.bindir        = 'exe'
22 |   spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
23 |   spec.require_paths = ['lib']
24 | 
25 |   spec.add_dependency 'elasticsearch', '>= 1.0'
26 |   spec.add_dependency 'sequel', '>= 4.0'
27 | 
28 |   spec.add_development_dependency 'bundler', '>= 1.13'
29 |   spec.add_development_dependency 'rake', '~> 12.3.3'
30 |   spec.add_development_dependency 'rspec', '~> 3.0'
31 |   spec.add_development_dependency 'rubocop', '>= 0.52'
32 |   spec.add_development_dependency 'rubocop-rspec', '>= 1.37'
33 |   spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
34 |   spec.add_development_dependency 'sqlite3', '~> 1.4'
35 |   spec.add_development_dependency 'timecop', '>= 0.9'
36 |   spec.add_development_dependency 'webmock', '~> 3.2'
37 | end
38 | 


--------------------------------------------------------------------------------
/spec/sequel/plugins/elasticsearch/result_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'json'
  4 | require 'sequel'
  5 | require 'elasticsearch'
  6 | require 'sequel/plugins/elasticsearch'
  7 | 
  8 | describe Sequel::Plugins::Elasticsearch::Result do
  9 |   def fixture(name)
 10 |     File.read("spec/support/#{name}")
 11 |   end
 12 | 
 13 |   let(:result) do
 14 |     {
 15 |       'took' => 234,
 16 |       'timed_out' => false,
 17 |       'hits' => {
 18 |         'hits' => [{ one: 'one', two: 'two' }, { one: 'three', two: 'four' }],
 19 |         'total' => 2
 20 |       }
 21 |     }
 22 |   end
 23 | 
 24 |   let(:scroll_result) { result.merge('_scroll_id' => '123scrollid') }
 25 | 
 26 |   describe '.new' do
 27 |     let(:es_result) do
 28 |       described_class.new(result)
 29 |     end
 30 | 
 31 |     it 'creates an enumerable' do
 32 |       expect(es_result).to be_a Enumerable
 33 |     end
 34 | 
 35 |     it 'handles an empty result' do
 36 |       expect { described_class.new(nil) }.not_to raise_error
 37 |     end
 38 | 
 39 |     it 'sets the result total property' do
 40 |       expect(es_result.total).not_to be nil
 41 |       expect(es_result.total).to eq result['hits']['total']
 42 |     end
 43 | 
 44 |     it 'sets the result timed_out property' do
 45 |       expect(es_result.timed_out).not_to be nil
 46 |       expect(es_result.timed_out).to eq result['timed_out']
 47 |     end
 48 | 
 49 |     it 'sets the result took property' do
 50 |       expect(es_result.took).not_to be nil
 51 |       expect(es_result.took).to eq result['took']
 52 |     end
 53 | 
 54 |     it 'accesses the enumerable elements correctly' do
 55 |       expect(es_result).to include one: 'one', two: 'two'
 56 |       expect(es_result).to include one: 'three', two: 'four'
 57 |       expect(es_result).not_to include one: 'five', two: 'six'
 58 |     end
 59 | 
 60 |     it 'reports the size of the hits array correctly' do
 61 |       expect(es_result.count).to eq result['hits']['hits'].count
 62 |     end
 63 |   end
 64 | 
 65 |   describe '#method_missing' do
 66 |     let(:es_result) do
 67 |       described_class.new(result)
 68 |     end
 69 | 
 70 |     it 'sends all methods to the hits array' do
 71 |       expect(es_result.count).to eq 2
 72 |     end
 73 |   end
 74 | 
 75 |   context 'scrollable' do
 76 |     let(:es_result) do
 77 |       described_class.new(scroll_result)
 78 |     end
 79 | 
 80 |     it 'sets the result scroll_id property' do
 81 |       expect(es_result.scroll_id).not_to be nil
 82 |       expect(es_result.scroll_id).to eq scroll_result['_scroll_id']
 83 |     end
 84 | 
 85 |     it 'iterates through the whole result set' do
 86 |       skip 'feature still pending'
 87 |       stub_request(:get, 'http://localhost:9200/_search?q=test&scroll=1m&size=2')
 88 |         .to_return(status: 200, body: fixture('scroll_one.json'), headers: { 'Content-Type' => 'application/json' })
 89 |       stub_request(:get, 'http://localhost:9200/_search/scroll')
 90 |         .to_return(status: 200, body: fixture('scroll_two.json'), headers: { 'Content-Type' => 'application/json' })
 91 | 
 92 |       client = Elasticsearch::Client.new
 93 |       result = described_class.new client.search(q: 'test', scroll: '1m', size: 2)
 94 |       expect(result.total).to eq 5
 95 |       expect(result.count).to eq 2
 96 |       expect(result.map { |e| e }.count).to eq 5
 97 |     end
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/spec/sequel/plugins/elasticsearch_spec.rb:
--------------------------------------------------------------------------------
  1 | # frozen-string-literal: true
  2 | 
  3 | require 'sequel'
  4 | require 'sequel/plugins/elasticsearch'
  5 | require 'sequel/plugins/elasticsearch/result'
  6 | require 'timecop'
  7 | 
  8 | # rubocop: disable Metrics/BlockLength
  9 | describe Sequel::Plugins::Elasticsearch do
 10 |   before(:all) do
 11 |     DB.create_table!(:documents) do
 12 |       primary_key :id
 13 |       String :title
 14 |       String :content, text: true
 15 |       Integer :views
 16 |       TrueClass :active
 17 |       DateTime :created_at
 18 |     end
 19 | 
 20 |     DB.create_table!(:complex_documents) do
 21 |       Integer :one
 22 |       Integer :two
 23 |       primary_key %i[one two]
 24 |       String :title
 25 |       String :content, text: true
 26 |     end
 27 |   end
 28 | 
 29 |   let(:model) do
 30 |     Class.new(Sequel::Model(:documents))
 31 |   end
 32 | 
 33 |   describe '.configure' do
 34 |     it 'defaults to the model table name for the index' do
 35 |       model.plugin :elasticsearch
 36 |       expect(model.send(:elasticsearch_index)).to eq :documents
 37 |     end
 38 | 
 39 |     it 'allows you to specify the index' do
 40 |       model.plugin :elasticsearch, index: :customIndex
 41 |       expect(model.elasticsearch_index).to eq :customIndex
 42 |     end
 43 | 
 44 |     it 'uses the specified index' do
 45 |       model.plugin :elasticsearch, index: :customIndex
 46 |       stub_request(:put, %r{http://localhost:9200/customIndex/_doc/\d+})
 47 |       doc = model.new.save
 48 |       expect(WebMock).to have_requested(:put, "http://localhost:9200/customIndex/_doc/#{doc.id}")
 49 |     end
 50 | 
 51 |     it 'only uses type if given' do
 52 |       model.plugin :elasticsearch
 53 |       expect(model.send(:elasticsearch_type)).to be_nil
 54 |     end
 55 | 
 56 |     it 'allows you to specify the type' do
 57 |       model.plugin :elasticsearch, type: :customType
 58 |       expect(model.send(:elasticsearch_type)).to eq :customType
 59 |     end
 60 | 
 61 |     it 'uses the specified type' do
 62 |       model.plugin :elasticsearch, type: :customType
 63 |       WebMock.allow_net_connect!
 64 |       doc = model.new.save
 65 |       expect(WebMock).to have_requested(:put, "http://localhost:9200/#{model.table_name}/customType/#{doc.id}")
 66 |     end
 67 | 
 68 |     it 'uses the default type' do
 69 |       model.plugin :elasticsearch
 70 |       WebMock.allow_net_connect!
 71 |       doc = model.new(content: Time.now).save
 72 |       expect(WebMock).to have_requested(:put, "http://localhost:9200/#{model.table_name}/_doc/#{doc.id}")
 73 |     end
 74 | 
 75 |     it 'allows you to pass down Elasticsearch client options' do
 76 |       model.plugin :elasticsearch, elasticsearch: { log: true }
 77 |       expect(model.new.es_client.transport.options).to include log: true
 78 |     end
 79 |   end
 80 | 
 81 |   describe 'ClassMethods' do
 82 |     describe '.es' do
 83 |       before do
 84 |         WebMock.allow_net_connect!
 85 |         model.plugin :elasticsearch
 86 |       end
 87 | 
 88 |       it 'does a basic query string search' do
 89 |         model.es('test')
 90 |         expect(WebMock).to have_requested(:get, 'http://localhost:9200/documents/_search?q=test')
 91 |       end
 92 | 
 93 |       it 'does a complex query search' do
 94 |         model.es(query: { match: { title: 'test' } })
 95 |         expect(WebMock)
 96 |           .to have_requested(:post, 'http://localhost:9200/documents/_search')
 97 |           .with(body: '{"query":{"match":{"title":"test"}}}')
 98 |       end
 99 | 
100 |       it 'handles not found exceptions' do
101 |         expect { model.es('test') }.not_to raise_error
102 |         stub_request(:get, %r{http://localhost:9200/documents/_search.*})
103 |           .to_return(status: 404)
104 |       end
105 | 
106 |       it 'handles connection failed exceptions' do
107 |         stub_request(:get, %r{http://localhost:9200/documents/_search.*})
108 |         allow(Faraday::Connection).to receive(:get).and_raise(Faraday::ConnectionFailed)
109 |         expect { model.es('test') }.not_to raise_error
110 |       end
111 | 
112 |       it 'returns an enumerable' do
113 |         stub_request(:get, %r{http://localhost:9200/documents/_search.*})
114 |         expect(model.es('test')).to be_a Enumerable
115 |       end
116 | 
117 |       it 'handles scroll requests' do
118 |         stub = stub_request(:get, 'http://localhost:9200/documents/_search?q=test&scroll=1m')
119 |         model.es('test', scroll: '1m')
120 |         expect(stub).to have_been_requested.once
121 |       end
122 | 
123 |       it 'handles scroll results'
124 |     end
125 | 
126 |     describe '.es!' do
127 |       it 'does not handle exceptions' do
128 |         stub_request(:get, %r{http://localhost:9200/documents/_search.*})
129 |           .to_return(status: 500)
130 |         model.plugin :elasticsearch
131 |         expect { model.es!('test') }.to raise_error Elasticsearch::Transport::Transport::Error
132 |       end
133 |     end
134 | 
135 |     describe '.scroll!' do
136 |       before do
137 |         model.plugin :elasticsearch
138 |       end
139 | 
140 |       it 'accepts a scroll_id' do
141 |         stub = stub_request(:post, 'http://localhost:9200/_search/scroll?scroll%5Bscroll%5D=1m')
142 | 
143 |         model.scroll!('somescrollid', scroll: '1m')
144 |         expect(stub).to have_been_requested.once
145 |       end
146 | 
147 |       it 'accepts a Result' do
148 |         result = Sequel::Plugins::Elasticsearch::Result.new('_scroll_id' => 'somescrollid')
149 |         allow(result).to receive(:scroll_id).and_return('somescrollid')
150 |         stub = stub_request(:post, 'http://localhost:9200/_search/scroll?scroll%5Bscroll%5D=1m')
151 |                .to_return(status: 200)
152 | 
153 |         model.scroll!(result, scroll: '1m')
154 | 
155 |         expect(stub).to have_been_requested.once
156 |       end
157 | 
158 |       it 'does not handle exceptions' do
159 |         stub_request(:get, 'http://localhost:9200/_search/scroll?scroll=1m&scroll_id=somescrollid')
160 |           .to_return(status: 500)
161 |         expect { model.scroll!('somescrollid', '1m') }.to raise_error Elasticsearch::Transport::Transport::Error # Getting Faraday::ConnectionFailed ??
162 |       end
163 |     end
164 | 
165 |     describe '.timestamped_index' do
166 |       it 'returns the index appended with a timestamp' do
167 |         model.plugin :elasticsearch
168 |         Timecop.freeze(Time.local(2019, 12, 4, 21, 26, 12)) do
169 |           expect(model.timestamped_index).to eq :'documents-20191204.212612'
170 |         end
171 |       end
172 |     end
173 |   end
174 | 
175 |   describe 'InstanceMethods' do
176 |     let(:simple_doc) do
177 |       @subj ||= begin
178 |         subj = Class.new(Sequel::Model(:documents))
179 |         subj.plugin :elasticsearch
180 |         subj
181 |       end
182 |     end
183 | 
184 |     let(:complex_doc) do
185 |       @subj ||= begin
186 |         subj = Class.new(Sequel::Model(:complex_documents))
187 |         subj.plugin :elasticsearch
188 |         subj
189 |       end
190 |     end
191 | 
192 |     describe '#es_client' do
193 |       it 'returns an Elasticsearch Transport Client' do
194 |         expect(simple_doc.new.send(:es_client)).to be_a Elasticsearch::Transport::Client
195 |       end
196 |     end
197 | 
198 |     describe '#document_id' do
199 |       it 'returns the value of the primary key for simple primary keys' do
200 |         stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+})
201 |         doc = simple_doc.new.save
202 |         expect(doc.send(:document_id)).to eq doc.id
203 |       end
204 | 
205 |       it 'returns the value of the primary key for composite primary keys' do
206 |         complex_doc.insert(one: 1, two: 2)
207 |         doc = complex_doc.first
208 |         expect(doc.send(:document_id)).to eq "#{doc.one}_#{doc.two}"
209 |       end
210 |     end
211 | 
212 |     describe '#as_indexed_json' do
213 |       let(:doc) do
214 |         simple_doc.new(
215 |           title: 'title',
216 |           content: 'content',
217 |           views: 4,
218 |           active: true,
219 |           created_at: Time.parse('2018-02-07T22:18:42+02:00')
220 |         )
221 |       end
222 | 
223 |       it 'correctly formats dates and other types' do
224 |         expect(doc.as_indexed_json).to include(
225 |           title: 'title', content: 'content', views: 4, active: true, created_at: '2018-02-07T22:18:42+02:00'
226 |         )
227 |       end
228 | 
229 |       it 'can be extended' do
230 |         doc = simple_doc.new
231 |         def doc.as_indexed_json
232 |           { test: 'this' }
233 |         end
234 |         expect(doc.as_indexed_json).to include(test: 'this')
235 |       end
236 |     end
237 | 
238 |     describe '#document_path' do
239 |       it 'returns the document index, type and id for documents' do
240 |         stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+})
241 |         doc = simple_doc.new.save
242 |         expect(doc.document_path).to include index: simple_doc.table_name
243 |         expect(doc.document_path).to include id: doc.id
244 |       end
245 |     end
246 | 
247 |     describe '#save' do
248 |       it 'indexes the document using the document path and model values' do
249 |         stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+})
250 |         doc = simple_doc.new.save
251 |         expect(WebMock)
252 |           .to have_requested(:put, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{doc.id}")
253 |       end
254 |     end
255 | 
256 |     describe '#update' do
257 |       let(:doc) do
258 |         doc = simple_doc.new.save
259 |         doc.title = 'updated'
260 |         doc.save
261 |         doc
262 |       end
263 | 
264 |       it 'indexes the document using the document path and model values' do
265 |         stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+})
266 |         expect(WebMock)
267 |           .to have_requested(:put, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{doc.id}")
268 |           .times(2)
269 |       end
270 |     end
271 | 
272 |     describe '#destroy' do
273 |       let(:id) do
274 |         doc = simple_doc.new.save
275 |         id = doc.pk
276 |         doc.destroy
277 |         id
278 |       end
279 | 
280 |       it 'destroys the document using the document path' do
281 |         stub_request(:put, %r{http://localhost:9200/documents/_doc/\d+})
282 |         stub_request(:delete, %r{http://localhost:9200/documents/_doc/\d+})
283 |         expect(WebMock)
284 |           .to have_requested(:delete, "http://localhost:9200/#{simple_doc.table_name}/_doc/#{id}")
285 |       end
286 |     end
287 |   end
288 | end
289 | # rubocop: enable Metrics/BlockLength
290 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | ENV['RACK_ENV'] ||= 'test'
 4 | ENV['DATABASE_URL'] ||= 'sqlite::memory:'
 5 | ENV['ELASTICSEARCH_URL'] ||= 'http://localhost:9200'
 6 | 
 7 | require 'sequel'
 8 | require 'webmock/rspec'
 9 | require 'simplecov'
10 | SimpleCov.start
11 | 
12 | DB = Sequel.connect ENV['DATABASE_URL']
13 | 
14 | RSpec.configure do |config|
15 |   config.filter_run :focus
16 |   config.run_all_when_everything_filtered = true
17 | end
18 | 


--------------------------------------------------------------------------------
/spec/support/scroll_one.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "_scroll_id": "DnF1ZXJ5VGhlbkZldGNoCgAAAAAAAACLFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAkRZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAJIWekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACTFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAlBZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAIwWekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACNFnpFbXRybktRUndPbnlkQ1BkcUpOR1EAAAAAAAAAjhZ6RW10cm5LUVJ3T255ZENQZHFKTkdRAAAAAAAAAI8WekVtdHJuS1FSd09ueWRDUGRxSk5HUQAAAAAAAACQFnpFbXRybktRUndPbnlkQ1BkcUpOR1E=",
 3 |     "_shards": {
 4 |         "failed": 0,
 5 |         "skipped": 0,
 6 |         "successful": 10,
 7 |         "total": 10
 8 |     },
 9 |     "hits": {
10 |         "hits": [
11 |             {
12 |                 "_id": "AV-3Skh5ek-DGROVf5Zo",
13 |                 "_index": "logstash-2017.11.13",
14 |                 "_score": 1.0,
15 |                 "_source": {
16 |                     "@timestamp": "2017-11-13T21:30:19.247Z",
17 |                     "@version": "1",
18 |                     "host": "somehost",
19 |                     "message": "I, [2017-11-13T22:18:01.997727 #5298]  INFO -- : Message 1",
20 |                     "path": "/home/jrgns/Code/EagerELK/proxes/logs/proxes.log"
21 |                 },
22 |                 "_type": "logs"
23 |             },
24 |             {
25 |                 "_id": "AV-3Skh5ek-DGROVf5Zr",
26 |                 "_index": "logstash-2017.11.13",
27 |                 "_score": 1.0,
28 |                 "_source": {
29 |                     "@timestamp": "2017-11-13T21:30:19.247Z",
30 |                     "@version": "1",
31 |                     "host": "somehost",
32 |                     "message": "I, [2017-11-13T22:18:02.205017 #5298]  INFO -- : Message 2",
33 |                     "path": "/home/jrgns/Code/EagerELK/proxes/logs/proxes.log"
34 |                 },
35 |                 "_type": "logs"
36 |             }
37 |         ],
38 |         "max_score": 1.0,
39 |         "total": 5
40 |     },
41 |     "timed_out": false,
42 |     "took": 40
43 | }
44 | 


--------------------------------------------------------------------------------