├── .rspec ├── lib ├── cloudsearchable │ ├── version.rb │ ├── cloud_search.rb │ ├── config.rb │ ├── config │ │ └── options.rb │ ├── field.rb │ ├── domain.rb │ └── query_chain.rb └── cloudsearchable.rb ├── Gemfile ├── .travis.yml ├── .gitignore ├── spec ├── spec_helper.rb ├── cloudsearchable │ ├── config_spec.rb │ ├── config │ │ └── option_spec.rb │ ├── field_spec.rb │ ├── cloud_search_spec.rb │ ├── cloudsearchable_spec.rb │ ├── domain_spec.rb │ └── query_chain_spec.rb └── test_classes │ └── cloud_searchable_test_class.rb ├── Rakefile ├── LICENSE.txt ├── cloudsearchable.gemspec └── README.md /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation 3 | --order random 4 | -------------------------------------------------------------------------------- /lib/cloudsearchable/version.rb: -------------------------------------------------------------------------------- 1 | module Cloudsearchable 2 | VERSION = "1.0.0" 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # gem's dependencies in cloudsearchable.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 1.9.3 4 | - 2.1.6 5 | - 2.2.3 6 | - jruby-1.7.11 7 | - jruby-9.0.3.0 8 | - rbx-2.5.3 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | 20 | .DS_Store 21 | *.swp 22 | *.swo 23 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler/setup' 3 | 4 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 5 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 6 | 7 | require 'cloudsearchable' 8 | 9 | RSpec.configure do |config| 10 | config.raise_errors_for_deprecations! 11 | end -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rspec/core/rake_task' 2 | RSpec::Core::RakeTask.new 3 | 4 | # make spec test the default task 5 | task :default => :spec 6 | 7 | require 'yard' 8 | YARD::Rake::YardocTask.new do |t| 9 | t.files = ['lib/**/*.rb', "README", "LICENSE"] # optional 10 | t.options = ['-m', 'markdown'] # optional 11 | end 12 | -------------------------------------------------------------------------------- /spec/cloudsearchable/config_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe Cloudsearchable::Config do 4 | it 'can be set in a block' do 5 | Cloudsearchable.configure do |config| 6 | config.domain_prefix = 'dev-llarue-' 7 | end 8 | 9 | expect(Cloudsearchable.configure.domain_prefix).to eq 'dev-llarue-' 10 | end 11 | 12 | it 'aliases configure to config' do 13 | expect(Cloudsearchable.config.domain_prefix).to eq Cloudsearchable.configure.domain_prefix 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /spec/cloudsearchable/config/option_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | #require 'cloudsearchable/config/options' 3 | 4 | describe Cloudsearchable::Config::Options do 5 | 6 | module ConfigTest 7 | extend self 8 | include Cloudsearchable::Config::Options 9 | 10 | option :foo 11 | option :timezone, :default => "PST" 12 | option :warnings, :default => false 13 | end 14 | 15 | subject { ConfigTest } 16 | 17 | it 'sets and gets' do 18 | subject.foo = 5 19 | expect(subject.foo).to eq 5 20 | end 21 | 22 | it 'defaults' do 23 | expect(subject.warnings).to_not be_nil 24 | expect(subject.warnings).to eq subject.settings[:warnings] 25 | end 26 | 27 | it 'resets option' do 28 | subject.timezone = "EST" 29 | expect(subject.timezone).to eq "EST" 30 | subject.reset_timezone 31 | expect(subject.timezone).to eq "PST" 32 | end 33 | 34 | it 'resets all options' do 35 | subject.foo = 5 36 | subject.reset 37 | expect(subject.foo).to be_nil 38 | end 39 | 40 | end 41 | -------------------------------------------------------------------------------- /lib/cloudsearchable/cloud_search.rb: -------------------------------------------------------------------------------- 1 | require 'aws-sdk' 2 | require 'json' 3 | 4 | module CloudSearch 5 | API_VERSION = "2011-02-01" 6 | 7 | def self.client 8 | @client ||= Aws::CloudSearch::Client.new 9 | end 10 | 11 | def self.client=(client) 12 | @client = client 13 | end 14 | 15 | # 16 | # Send an SDF document to CloudSearch via http post request. 17 | # Returns parsed JSON response, or raises an exception 18 | # 19 | def self.post_sdf endpoint, sdf 20 | self.post_sdf_list endpoint, [sdf] 21 | end 22 | 23 | def self.post_sdf_list endpoint, sdf_list 24 | uri = URI.parse("http://#{endpoint}/#{API_VERSION}/documents/batch") 25 | 26 | req = Net::HTTP::Post.new(uri.path) 27 | req.body = JSON.generate sdf_list 28 | req["Content-Type"] = "application/json" 29 | 30 | response = Net::HTTP.start(uri.host, uri.port){|http| http.request(req)} 31 | 32 | if response.is_a? Net::HTTPSuccess 33 | JSON.parse response.body 34 | else 35 | # Raise an exception based on the response see http://ruby-doc.org/stdlib-1.9.2/libdoc/net/http/rdoc/Net/HTTP.html 36 | response.error! 37 | end 38 | 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /spec/cloudsearchable/field_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'ostruct' 3 | 4 | describe Cloudsearchable::Field do 5 | before :all do 6 | ENV['AWS_REGION'] = 'us-east-1' 7 | end 8 | 9 | it 'has a name' do 10 | field = described_class.new 'fnord', :literal 11 | expect(field.name).to eq(:fnord) 12 | end 13 | 14 | it 'can find its value' do 15 | test_value = nil 16 | field = described_class.new('foo', :literal, :source => Proc.new { test_value }) 17 | test_value = 123 18 | expect(field.value_for(Object.new)).to eq(test_value) 19 | 20 | record = OpenStruct.new :a => test_value 21 | field2 = described_class.new('bar', :literal, :source => :a) 22 | expect(field.value_for(record)).to eq(test_value) 23 | end 24 | 25 | it 'generates a field definition' do 26 | domain_name = 'narnia' 27 | field = described_class.new('fnord', :literal, :search_enabled => true) 28 | expect(CloudSearch.client).to receive(:define_index_field) do |call| 29 | expect(call[:domain_name]).to eq(domain_name) 30 | expect(call[:index_field][:literal_options][:search_enabled]).to be_truthy 31 | end 32 | field.define_in_domain domain_name 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/cloudsearchable/config.rb: -------------------------------------------------------------------------------- 1 | require "cloudsearchable/config/options" 2 | 3 | module Cloudsearchable 4 | 5 | # basic configuration for Cloudsearchable. Most of this code was patterned from Dynamoid. 6 | module Config 7 | extend self 8 | include Options 9 | 10 | option :domain_prefix, :default => defined?(Rails) ? "#{Rails.env}" : "" 11 | option :fatal_warnings, :default => defined?(Rails) && Rails.env.production? ? false : true 12 | option :logger, :default => defined?(Rails) 13 | 14 | # The default logger either the Rails logger or just stdout. 15 | def default_logger 16 | defined?(Rails) && Rails.respond_to?(:logger) ? Rails.logger : ::Logger.new($stdout) 17 | end 18 | 19 | # Returns the assigned logger instance. 20 | def logger 21 | @logger ||= default_logger 22 | end 23 | 24 | # If you want to, set the logger manually to any output you'd like. Or pass false or nil to disable logging entirely. 25 | def logger=(logger) 26 | case logger 27 | when false, nil then @logger = nil 28 | when true then @logger = default_logger 29 | else 30 | @logger = logger if logger.respond_to?(:info) 31 | end 32 | end 33 | 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/cloudsearchable/config/options.rb: -------------------------------------------------------------------------------- 1 | module Cloudsearchable 2 | module Config 3 | 4 | # Encapsulates logic for setting options. 5 | module Options 6 | def self.included base 7 | base.extend ClassMethods 8 | end 9 | 10 | module ClassMethods 11 | # Get the defaults or initialize a new empty hash. 12 | def defaults 13 | @defaults ||= {} 14 | end 15 | 16 | # Reset the configuration options to the defaults. 17 | def reset 18 | settings.replace(defaults) 19 | end 20 | 21 | # Get the settings or initialize a new empty hash. 22 | def settings 23 | @settings ||= {} 24 | end 25 | 26 | # Define a configuration option with a default. Example usage: 27 | # Options.option(:persist_in_safe_mode, :default => false) 28 | def option(name, options = {}) 29 | defaults[name] = settings[name] = options[:default] 30 | 31 | define_method name do 32 | settings[name] 33 | end 34 | 35 | define_method "#{name}=" do |value| 36 | settings[name] = value 37 | end 38 | 39 | define_method "#{name}?" do 40 | !!settings[name] 41 | end 42 | 43 | define_method "reset_#{name}" do 44 | settings[name] = defaults[name] 45 | end 46 | end 47 | end 48 | end 49 | 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /spec/test_classes/cloud_searchable_test_class.rb: -------------------------------------------------------------------------------- 1 | require 'ostruct' 2 | require 'active_model' 3 | 4 | # A class that includes Cloudsearchable 5 | CloudSearchableSampleClassFactory = Proc.new do 6 | Class.new do 7 | include ActiveModel::Dirty 8 | extend ActiveModel::Callbacks 9 | define_model_callbacks :touch, :save 10 | 11 | # Your class must implement #id, #version, and #destroyed? 12 | attr_accessor :id, :customer, :name, :lock_version, :destroyed 13 | 14 | include Cloudsearchable 15 | 16 | # Anonymous classes don't have names, so set one: 17 | def self.name 18 | "TestClass" 19 | end 20 | 21 | def destroyed? 22 | !! @destroyed 23 | end 24 | 25 | # This is the default index. You probably only need one. 26 | index_in_cloudsearch do |idx| 27 | # Fetch the customer_id field from customer 28 | literal :customer_id, :result_enabled => true, :search_enabled => true, :source => Proc.new { customer } 29 | 30 | # Map the 'name' Ruby attribute to a field called 'test_name' 31 | text :test_name, :result_enabled => false, :search_enabled => true, :source => :name 32 | 33 | # uint fields can be used in result ranking functions 34 | uint :helpfulness, :result_enabled => true, :search_enabled => false do; 1234 end 35 | end 36 | 37 | # A named index. 38 | index_in_cloudsearch :test_index do |idx| 39 | literal :id, :search_enabled => true 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /spec/cloudsearchable/cloud_search_spec.rb: -------------------------------------------------------------------------------- 1 | require 'net/http' 2 | require 'spec_helper' 3 | require 'test_classes/cloud_searchable_test_class' 4 | 5 | describe CloudSearch do 6 | 7 | let(:item) do 8 | CloudSearchableSampleClassFactory.call.new.tap do |instance| 9 | instance.destroyed = false 10 | instance.lock_version = 1 11 | instance.id = 1 12 | instance.customer = '1234' 13 | end 14 | end 15 | let(:sdf_document){item.class.cloudsearch_index(:test_index).send :addition_sdf, item, item.id, item.lock_version} 16 | let(:endpoint){'https://fake_end_point.amazon.com'} 17 | 18 | class MockHTTPOK < Net::HTTPOK 19 | attr :body 20 | def initialize body 21 | @body = body 22 | end 23 | end 24 | 25 | class MockHTTPBadRequest < Net::HTTPBadRequest 26 | def initialize; end 27 | end 28 | 29 | let(:success_response){ MockHTTPOK.new( {"status" => "success", "adds" => 1, "deletes" => 0}.to_json ) } 30 | 31 | it 'json parses the response' do 32 | allow(Net::HTTP).to receive(:start).and_return(success_response) 33 | 34 | response = described_class.post_sdf endpoint, sdf_document 35 | expect(response).to eq JSON.parse(success_response.body) 36 | end 37 | 38 | it 'triggers error! on response its no not a Net::HTTPSuccess' do 39 | response = MockHTTPBadRequest.new 40 | allow(Net::HTTP).to receive(:start).and_return(response) 41 | 42 | expect(response).to receive(:error!) 43 | described_class.post_sdf endpoint, sdf_document 44 | end 45 | 46 | end 47 | -------------------------------------------------------------------------------- /lib/cloudsearchable/field.rb: -------------------------------------------------------------------------------- 1 | require 'active_support/core_ext/hash' 2 | 3 | # 4 | # Class the represents the schema of a domain in CloudSearch 5 | # 6 | # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled. 7 | # 8 | module Cloudsearchable 9 | # Represents a single field in a CloudSearch index. 10 | # 11 | class Field 12 | FieldTypes = [:literal, :uint, :text].freeze 13 | # Maps the type of field to the name of the options hash when defining the field 14 | FieldTypeOptionsNames = {:literal => :literal_options, :uint => :u_int_options, :text => :text_options}.freeze 15 | # Maps from field type to the allowed set of options for the field 16 | FieldTypeOptionsKeys = { 17 | literal: [:default_value, :facet_enabled, :search_enabled, :result_enabled].freeze, 18 | uint: [:default_value].freeze, 19 | text: [:default_value, :facet_enabled, :result_enabled].freeze 20 | }.freeze 21 | attr_reader :name, :type, :source, :options 22 | 23 | def initialize(name, type, options = {}) 24 | raise ArgumentError, "Invalid field type '#{type}'" unless FieldTypes.include?(type) 25 | @name = name.to_sym 26 | @type = type.to_sym 27 | @source = options[:source] || @name 28 | @options = options.slice(*FieldTypeOptionsKeys[@type]) 29 | end 30 | 31 | def value_for record 32 | if @source.respond_to?(:call) 33 | record.instance_exec &@source 34 | else 35 | record.send @source 36 | end 37 | end 38 | 39 | def define_in_domain domain_name 40 | CloudSearch.client.define_index_field( 41 | :domain_name => domain_name, 42 | :index_field => definition 43 | ) 44 | end 45 | 46 | def definition 47 | # http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/API_IndexField.html 48 | { 49 | :index_field_name => name.to_s, 50 | :index_field_type => type.to_s, 51 | FieldTypeOptionsNames[type] => options 52 | } 53 | end 54 | protected :definition 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /cloudsearchable.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'cloudsearchable/version' 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "cloudsearchable" 8 | spec.version = Cloudsearchable::VERSION 9 | spec.authors = ['Lane LaRue', 10 | 'Philip White', 11 | ] 12 | spec.email = ["llarue@amazon.com"] 13 | spec.description = %q{ActiveRecord-like query interface for AWS Cloud Search} 14 | spec.summary = %q{ActiveRecord-like query interface for AWS Cloud Search} 15 | spec.homepage = "" 16 | spec.license = "MIT" 17 | 18 | # generated with `git ls-files`.split($/) 19 | spec.files = [ 20 | ".rspec", 21 | "Gemfile", 22 | "LICENSE.txt", 23 | "README.md", 24 | "Rakefile", 25 | "cloudsearchable.gemspec", 26 | "lib/cloudsearchable.rb", 27 | "lib/cloudsearchable/cloud_search.rb", 28 | "lib/cloudsearchable/config.rb", 29 | "lib/cloudsearchable/config/options.rb", 30 | "lib/cloudsearchable/domain.rb", 31 | "lib/cloudsearchable/field.rb", 32 | "lib/cloudsearchable/query_chain.rb", 33 | "lib/cloudsearchable/version.rb", 34 | "spec/cloudsearchable/cloud_search_spec.rb", 35 | "spec/cloudsearchable/cloudsearchable_spec.rb", 36 | "spec/cloudsearchable/config/option_spec.rb", 37 | "spec/cloudsearchable/config_spec.rb", 38 | "spec/cloudsearchable/domain_spec.rb", 39 | "spec/cloudsearchable/field_spec.rb", 40 | "spec/cloudsearchable/query_chain_spec.rb", 41 | "spec/spec_helper.rb", 42 | "spec/test_classes/cloud_searchable_test_class.rb" 43 | ] 44 | 45 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 46 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 47 | spec.require_paths = ["lib"] 48 | 49 | spec.add_development_dependency "bundler", "~> 1.3" 50 | spec.add_development_dependency "rake" 51 | spec.add_development_dependency "yard" 52 | spec.add_dependency 'aws-sdk', "~> 2" 53 | 54 | # testing dependencies 55 | spec.add_development_dependency "rspec", '~> 3' 56 | spec.add_development_dependency "activemodel" 57 | end 58 | -------------------------------------------------------------------------------- /spec/cloudsearchable/cloudsearchable_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'test_classes/cloud_searchable_test_class' 3 | 4 | describe Cloudsearchable do 5 | let(:clazz){ CloudSearchableSampleClassFactory.call } 6 | 7 | it 'can describe an index that returns ids for the class type' do 8 | test_index = clazz.cloudsearch_index 9 | expect(test_index).to be_a(Cloudsearchable::Domain) 10 | expect(test_index.fields.count).to eq 4 #3 explicit + 1 for the id of the object 11 | end 12 | 13 | it 'has a default index' do 14 | expect(clazz.cloudsearch_index).to be_a(Cloudsearchable::Domain) 15 | expect(clazz.cloudsearch_index(:test_index)).to_not eq(clazz.cloudsearch_index) 16 | end 17 | 18 | it 'names domains consistent with CloudSearch limitations' do 19 | expect(clazz.cloudsearch_index(:test_index).name).to be =~ /^[a-z][a-z0-9\-]+$/ 20 | end 21 | 22 | describe 'an ordinary object' do 23 | #An instance of the searchable class 24 | let(:inst) do 25 | inst = clazz.new 26 | #arbitrary but plausible values for the core fields 27 | inst.destroyed = false 28 | inst.id = 42 29 | inst.lock_version = 18 30 | inst.customer = OpenStruct.new :id => 123 31 | inst.name = "My Name" 32 | inst 33 | end 34 | 35 | it 'supplies the right values to the fields' do 36 | test_index = clazz.cloudsearch_index 37 | expect(test_index.fields[:test_class_id].value_for(inst)).to eq(inst.id) 38 | expect(test_index.fields[:customer_id].value_for(inst)).to eq(inst.customer) 39 | expect(test_index.fields[:test_name].value_for(inst)).to eq (inst.name) 40 | expect(test_index.fields[:helpfulness].value_for(inst)).to eq(1234) 41 | end 42 | 43 | it 'reindexes when told to' do 44 | expect(clazz.cloudsearch_index( )).to receive(:post_record).with(inst, inst.id, inst.lock_version) 45 | expect(clazz.cloudsearch_index(:test_index)).to receive(:post_record).with(inst, inst.id, inst.lock_version) 46 | inst.update_indexes 47 | end 48 | 49 | it 'generates a sensible addition sdf document' do 50 | sdf = clazz.cloudsearch_index.send :addition_sdf, inst, inst.id, inst.lock_version 51 | expect(sdf[:fields][:helpfulness]).to eq(1234) 52 | end 53 | end 54 | 55 | describe 'a destroyed object' do 56 | #An instance of the searchable class 57 | let(:inst) do 58 | inst = clazz.new 59 | #arbitrary but plausible values for the core fields 60 | inst.destroyed = true 61 | inst 62 | end 63 | 64 | it 'reindexes when told to' do 65 | expect(clazz.cloudsearch_index( )).to receive(:delete_record).with(inst.id, inst.lock_version) 66 | expect(clazz.cloudsearch_index(:test_index)).to receive(:delete_record).with(inst.id, inst.lock_version) 67 | inst.update_indexes 68 | end 69 | end 70 | 71 | end 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cloudsearchable 2 | An ActiveRecord-style ORM query interface for [Amazon CloudSearch](https://aws.amazon.com/cloudsearch/). 3 | 4 | [![Build Status](https://travis-ci.org/awslabs/cloudsearchable.svg)](https://travis-ci.org/awslabs/cloudsearchable) 5 | 6 | ## Installation 7 | Add to your Gemfile: gem 'cloudsearchable'. Run bundle or: gem install cloudsearchable. 8 | 9 | ## Usage 10 | ### 1. Mix Cloudsearchable into your class 11 | ```ruby 12 | class Customer 13 | include Cloudsearchable 14 | 15 | attr_accessor :id, :customer, :name, :lock_version 16 | 17 | # This is the default index. You probably only need one. 18 | index_in_cloudsearch do |idx| 19 | # Fetch the customer_id field from customer 20 | literal :customer_id, :result_enabled => true, :search_enabled => true, :source => Proc.new { customer } 21 | 22 | # Map the 'name' Ruby attribute to a field called 'test_name' 23 | text :test_name, :result_enabled => false, :search_enabled => true, :source => :name 24 | 25 | # uint fields can be used in result ranking functions 26 | uint :helpfulness, :result_enabled => true, :search_enabled => false do; 1234 end 27 | end 28 | 29 | # A named index. 30 | index_in_cloudsearch :test_index do |idx| 31 | literal :id, :search_enabled => true 32 | end 33 | end 34 | ``` 35 | ### 2. Index some objects 36 | ```ruby 37 | c = Customer.new 38 | c.add_to_indexes 39 | c.update_indexes 40 | c.remove_from_indexes 41 | ``` 42 | ### 3. Start querying 43 | ```ruby 44 | Customer.search.where(customer_id: 12345) 45 | Customer.search.where(customer_id: 12345).order('-helpfulness') # ordering 46 | Customer.search.where(customer_id: 12345).limit(10) # limit, default 100000 47 | Customer.search.where(customer_id: 12345).offset(100) # offset 48 | Customer.search.where(customer_id: 12345).found_count # count 49 | 50 | Customer.search.where(customer_id: 12345).where(helpfulness: 42) # query chain 51 | Customer.search.where(customer_id: 12345, helpfulness: 42) # query chain from hash 52 | Customer.search.where(:category, :any, ["big", "small"]) # multiple values 53 | Customer.search.where(:customer_id, :!=, 1234) # "not equal to" operator 54 | Customer.search.text('test') # text search 55 | Customer.search.text('test').where(:featured, :==, 'f') # text search with other fields 56 | 57 | Customer.search.where(:helpfulness, :within_range, 0..123) # uint range query, string range works too 58 | Customer.search.where(:helpfulness, :>, 123) # uint greather than 59 | Customer.search.where(:helpfulness, :>=, 123) # uint greather than or equal to 60 | Customer.search.where(:helpfulness, :<, 123) # uint less than 61 | Customer.search.where(:helpfulness, :<=, 123) # uint less than or equal to 62 | ``` 63 | These queries return a Cloudsearchable::Query, calling .to_a or .found_count will fetch the results 64 | ```ruby 65 | Customer.search.where(customer_id: 12345).each |customer| 66 | p "#{customer.class}: #{customer.name}" 67 | end 68 | # Customer: foo 69 | # Customer: bar 70 | ``` 71 | ### Configuration 72 | ```ruby 73 | # config\initializers\cloudsearchable_config.rb 74 | 75 | require 'cloudsearchable' 76 | 77 | Cloudsearchable.configure do |config| 78 | config.domain_prefix = "dev-lane-" 79 | end 80 | ``` 81 | Supported Options 82 | * domain_prefix - A name prefix string for your domains in CloudSearch. Defaults to Rails.env, or "" if Rails is undefined. 83 | * config.fatal_warnings - raises WarningInQueryResult exception on warning. Defaults to false. 84 | * config.logger - a custom logger, defaults to Rails if defined. 85 | 86 | ### ActiveSupport Notifications 87 | 88 | Requests to AWS cloudsearch are instrumented using [ActiveSupport Notifications](http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html). To consume these instrumented events register a subscriber in your Application. For example, to register for getting notifications for search requests: 89 | 90 | ```ruby 91 | ActiveSupport::Notifications.subscribe('cloudsearchable.execute_query') do |*args| 92 | event = ActiveSupport::Notifications::Event.new(*args) 93 | # Your code here ... 94 | end 95 | ``` 96 | 97 | #### Instrumented events: 98 | 99 | 1. cloudsearchable.execute_query - Instruments search requests 100 | 2. cloudsearchable.post_record - Instruments record addition 101 | 3. cloudsearchable.delete_record - Instruments record deletion 102 | 4. cloudsearchable.describe_domains - Instruments request for getting domains information 103 | 104 | ### Other Features 105 | 106 | Cloudsearchable provides access the underlying AWS client objects, such as '''CloudSearch.client''' and '''class.cloudsearch_domains'''. For example here is how to drop domains associated with Customer class: 107 | 108 | ```ruby 109 | client = CloudSearch.client 110 | Customer.cloudsearch_domains.each do |key, domain| 111 | domain_name = domain.name 112 | puts "...dropping #{domain_name}" 113 | client.delete_domain(:domain_name => domain_name) 114 | end 115 | ``` 116 | 117 | See spec tests and source code for more information. 118 | 119 | ## Credits 120 | 121 | * [Logan Bowers](https://github.com/loganb) 122 | * [Peter Abrahamsen](https://github.com/rainhead) 123 | * [Lane LaRue](https://github.com/luxx) 124 | * [Philip White](https://github.com/philipmw) 125 | 126 | Apache 2.0 License 127 | 128 | ## Contributing 129 | 130 | 1. Fork it 131 | 2. Create your feature branch (`git checkout -b my-new-feature`) 132 | 3. Run the tests (`rake spec`) 133 | 4. Commit your changes (`git commit -am 'Add some feature'`) 134 | 5. Push to the branch (`git push origin my-new-feature`) 135 | 6. Create new Pull Request 136 | -------------------------------------------------------------------------------- /lib/cloudsearchable/domain.rb: -------------------------------------------------------------------------------- 1 | # 2 | # Class the represents the schema of a domain in CloudSearch 3 | # 4 | # In general, it will be instantiated by a helper class or module, like Cloudsearch_enabled. 5 | # 6 | module Cloudsearchable 7 | class Domain 8 | class DomainNotFound < StandardError; end 9 | 10 | attr_reader :name, :fields 11 | 12 | def initialize name 13 | @name = "#{Cloudsearchable::Config.domain_prefix}#{name}" 14 | @fields = {} 15 | end 16 | 17 | # Defines a literal index field. 18 | # @param name field name 19 | # @param type field type - one of :literal, :uint, or :text 20 | # @option options [Boolean] :search_enabled (true) 21 | # @option options [Boolean] :return_enabled (true) 22 | # @option options [Symbol or Proc] :source The name of a method to call on a record to fetch 23 | # the value of the field, or else a Proc to be evaluated in the context of the record. 24 | # Defaults to a method with the same name as the field. 25 | def add_field(name, type, options = {}) 26 | field = Field.new(name, type, options) 27 | raise "Field #{name} already exists on index #{self.name}" if @fields.has_key?(field.name) 28 | @fields[field.name] = field 29 | end 30 | 31 | # Creates the domain and defines its index fields in Cloudsearch 32 | # Will blindly recreate index fields, no-op if the index already exists 33 | def create 34 | Cloudsearchable.logger.info "Creating domain #{name}" 35 | CloudSearch.client.create_domain(:domain_name => name) 36 | 37 | #Create the fields for the index 38 | fields.values.each do |field| 39 | Cloudsearchable.logger.info " ...creating #{field.type} field #{name}" 40 | field.define_in_domain self.name 41 | end 42 | Cloudsearchable.logger.info " ...done!" 43 | end 44 | 45 | def reindex 46 | CloudSearch.client.index_documents(:domain_name => name) 47 | end 48 | 49 | # 50 | # This queries the status of the domain from Cloudsearch and determines if 51 | # the domain needs to be reindexed. If so, it will initiate the reindex and 52 | # wait timeout seconds for it to complete. Default is 0. Reindexings tend 53 | # to take 15-30 minutes. 54 | # 55 | # @return true if the changes are applied, false if the domain is still reindexing 56 | # 57 | def apply_changes(timeout = 0) 58 | d = cloudsearch_domain(true)[:domain_status_list][0] 59 | if(d[:requires_index_documents]) 60 | reindex 61 | end 62 | 63 | #We'll potentially sleep until the reindex has completed 64 | end_time = Time.now + timeout 65 | sleep_time = 1 66 | loop do 67 | d = cloudsearch_domain(true)[:domain_status_list][0] 68 | break unless (d[:processing] && Time.now < end_time) 69 | 70 | sleep(sleep_time) 71 | sleep_time = [2 * sleep_time, end_time - Time.now].min #exponential backoff 72 | end 73 | 74 | !d[:processing] #processing is true as long as it is reindexing 75 | end 76 | 77 | # Add or replace the CloudSearch document for a particular version of a record 78 | def post_record record, record_id, version 79 | ActiveSupport::Notifications.instrument('cloudsearchable.post_record') do 80 | CloudSearch.post_sdf doc_endpoint, addition_sdf(record, record_id, version) 81 | end 82 | end 83 | 84 | # Delete the CloudSearch document for a particular record (version must be greater than the last version pushed) 85 | def delete_record record_id, version 86 | ActiveSupport::Notifications.instrument('cloudsearchable.delete_record') do 87 | CloudSearch.post_sdf doc_endpoint, deletion_sdf(record_id, version) 88 | end 89 | end 90 | 91 | def execute_query(params) 92 | uri = URI("http://#{search_endpoint}/#{CloudSearch::API_VERSION}/search") 93 | uri.query = URI.encode_www_form(params) 94 | Cloudsearchable.logger.info "CloudSearch execute: #{uri.to_s}" 95 | res = ActiveSupport::Notifications.instrument('cloudsearchable.execute_query') do 96 | Net::HTTP.get_response(uri).body 97 | end 98 | JSON.parse(res) 99 | end 100 | 101 | def deletion_sdf record_id, version 102 | { 103 | :type => "delete", 104 | :id => document_id(record_id), 105 | :version => version 106 | } 107 | end 108 | 109 | def addition_sdf record, record_id, version 110 | { 111 | :type => "add", 112 | :id => document_id(record_id), 113 | :version => version, 114 | :lang => "en", # FIXME - key off of marketplace 115 | :fields => sdf_fields(record) 116 | } 117 | end 118 | 119 | # Generate a documentID that follows the CS restrictions 120 | def document_id record_id 121 | Digest::MD5.hexdigest record_id.to_s 122 | end 123 | 124 | protected 125 | 126 | # 127 | # AWS Cloudsearchable Domain 128 | # 129 | # @param force_reload force a re-fetch from the domain 130 | # 131 | def cloudsearch_domain(force_reload = false) 132 | if(force_reload || !@domain) 133 | @domain = ActiveSupport::Notifications.instrument('cloudsearchable.describe_domains') do 134 | CloudSearch.client.describe_domains(:domain_names => [name]) 135 | end 136 | else 137 | @domain 138 | end 139 | 140 | status = @domain[:domain_status_list] 141 | if status.nil? || status && status.empty? 142 | raise(DomainNotFound, "Cloudsearchable could not find the domain '#{name}' in AWS. Check the name and the availability region.") 143 | end 144 | 145 | @domain 146 | end 147 | 148 | def sdf_fields record 149 | fields.values.inject({}) do |sdf, field| 150 | value = field.value_for(record) 151 | sdf[field.name] = value if value 152 | sdf 153 | end 154 | end 155 | 156 | # AWS CloudSearch Domain API to get search endpoint 157 | def search_endpoint 158 | @search_endpoint ||= cloudsearch_domain[:domain_status_list].first[:search_service][:endpoint] 159 | end 160 | 161 | # AWS CloudSearch Domain API to get doc endpoint 162 | def doc_endpoint 163 | @doc_endpoint ||= cloudsearch_domain[:domain_status_list].first[:doc_service][:endpoint] 164 | end 165 | 166 | end 167 | end 168 | -------------------------------------------------------------------------------- /lib/cloudsearchable.rb: -------------------------------------------------------------------------------- 1 | require 'cloudsearchable/version' 2 | 3 | require 'cloudsearchable/domain' 4 | require 'cloudsearchable/field' 5 | require 'cloudsearchable/query_chain' 6 | require 'cloudsearchable/cloud_search' 7 | require 'cloudsearchable/config' 8 | 9 | require 'active_support/inflector' 10 | require 'active_support/core_ext/string' 11 | require 'active_support/notifications' 12 | 13 | module Cloudsearchable 14 | def self.configure 15 | block_given? ? yield(Cloudsearchable::Config) : Cloudsearchable::Config 16 | end 17 | 18 | def self.config 19 | configure 20 | end 21 | 22 | def self.logger 23 | Cloudsearchable::Config.logger 24 | end 25 | 26 | def self.included(base) 27 | base.extend ClassMethods 28 | end 29 | 30 | def cloudsearch_domains= *args 31 | self.class.cloudsearch_domains = args 32 | end 33 | 34 | def cloudsearch_domains 35 | self.class.cloudsearch_domains 36 | end 37 | 38 | def update_indexes 39 | if destroyed? 40 | remove_from_indexes 41 | else 42 | add_to_indexes 43 | end 44 | end 45 | 46 | def add_to_indexes 47 | cloudsearch_domains.map do |name, domain| 48 | domain.post_record(self, id, lock_version) 49 | end 50 | end 51 | 52 | def remove_from_indexes 53 | cloudsearch_domains.map do |name, domain| 54 | domain.delete_record(id, lock_version) 55 | end 56 | end 57 | 58 | protected 59 | 60 | class DSL 61 | attr_reader :domain, :base 62 | 63 | def initialize domain, base 64 | @domain = domain 65 | @base = base 66 | end 67 | 68 | def uint name, options = {}, &block 69 | field name, :uint, options, &block 70 | end 71 | 72 | def text name, options = {}, &block 73 | field name, :text, options, &block 74 | end 75 | 76 | def literal name, options = {}, &block 77 | field name, :literal, options, &block 78 | end 79 | 80 | def field name, type, options = {}, &block 81 | # This block is executed in the context of the record 82 | if block_given? 83 | options[:source] = block.to_proc 84 | end 85 | domain.add_field name, type, options 86 | end 87 | end 88 | 89 | module ClassMethods 90 | def cloudsearch_domains= domains 91 | @cloudsearch_domains = domains 92 | end 93 | 94 | def cloudsearch_domains 95 | @cloudsearch_domains || {} 96 | end 97 | 98 | # 99 | # Declares a Cloudsearchable index that returns a list of object of this class. 100 | # 101 | # @param name (optional) optional name for the index. If not specified, a default (unnamed) index for the class will be created 102 | # @param options (optional) Hash defining an index 103 | # 104 | # @option options [String] :name Name of the index 105 | # 106 | # 107 | def index_in_cloudsearch(name = nil, &block) 108 | locator_field = :"#{cloudsearch_prefix.singularize}_id" 109 | # Fetches the existing search domain, or generates a new one 110 | unless domain = cloudsearch_domains[name] 111 | domain = new_cloudsearch_index(name).tap do |d| 112 | # This id field is used to reify search results 113 | d.add_field(locator_field, :literal, 114 | :result_enabled => true, :search_enabled => true, 115 | :source => :id) 116 | end 117 | self.cloudsearch_domains = self.cloudsearch_domains.merge({name => domain}) 118 | end 119 | 120 | if block_given? 121 | dsl = DSL.new(domain, self) 122 | dsl.instance_exec &block 123 | end 124 | 125 | # Define the search method 126 | search_method_name = "search#{name && ('_' + name.to_s)}".to_sym 127 | define_singleton_method search_method_name do 128 | Query.new(self, cloudsearch_index(name), locator_field) 129 | end 130 | end 131 | 132 | def cloudsearch_index name = nil 133 | cloudsearch_domains[name] 134 | end 135 | 136 | # 137 | # Prefix name used for indexes, defaults to class name underscored 138 | # 139 | def cloudsearch_prefix 140 | name.pluralize.underscore.gsub('/', '_') 141 | end 142 | 143 | def new_cloudsearch_index name 144 | name = [cloudsearch_prefix, name].compact.join('-').gsub('_','-') 145 | Cloudsearchable::Domain.new name 146 | end 147 | 148 | # By default use 'find' to materialize items 149 | def materialize_method method_name = nil 150 | @materialize_method = method_name unless method_name.nil? 151 | @materialize_method.nil? ? :find : @materialize_method 152 | end 153 | end 154 | 155 | # 156 | # Wraps a Cloudsearchable::QueryChain, provides methods to execute and reify 157 | # a query into search result objects 158 | # 159 | class Query 160 | include Enumerable 161 | 162 | attr_reader :query, :class 163 | 164 | # 165 | # @param clazz [ActiveRecord::Model] The class of the Model object that 166 | # is being searched. The result set will be objects of this type. 167 | # @param domain [Domain] Cloudsearchable Domain to search 168 | # @param identity_field [Symbol] name of the field that contains the id of 169 | # the clazz (e.g. :collection_id) 170 | # 171 | def initialize(clazz, domain, identity_field) 172 | @query = Cloudsearchable::QueryChain.new(domain, fatal_warnings: Cloudsearchable.config.fatal_warnings) 173 | @class = clazz 174 | @query.returning(identity_field) 175 | @identity_field = identity_field 176 | end 177 | 178 | [:where, :text, :order, :limit, :offset, :returning].each do |method_name| 179 | # Passthrough methods, see CloudSearch::Domain for docs 180 | define_method method_name do |*args| 181 | @query.send(method_name, *args) 182 | self 183 | end 184 | end 185 | 186 | # Pass through to Cloudsearchable::Domain#materialize!, then retrieve objects from database 187 | # TODO: this does NOT preserve order! 188 | def materialize!(*args) 189 | @results ||= begin 190 | record_ids = @query.map{|result_hit| result_hit['data'][@identity_field.to_s].first}.reject{|r| r.nil?} 191 | @class.send(@class.materialize_method, record_ids) 192 | end 193 | end 194 | 195 | def each &block 196 | # Returns an enumerator 197 | return enum_for(__method__) unless block_given? 198 | materialize! 199 | @results.respond_to?(:each) ? @results.each { |o| yield o } : [@results].send(:each, &block) 200 | end 201 | 202 | def found_count 203 | query.found_count 204 | end 205 | 206 | end 207 | end 208 | -------------------------------------------------------------------------------- /spec/cloudsearchable/domain_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'test_classes/cloud_searchable_test_class' 3 | 4 | describe Cloudsearchable::Domain do 5 | before(:each) do 6 | fake_client = double('client') 7 | allow(CloudSearch).to receive(:client).and_return(fake_client) 8 | end 9 | 10 | # 11 | # First call to describe_domains returns it needs reindexing, 12 | # Second call returns that it is processing, 13 | # Third call returns that it is done processing 14 | # 15 | let(:needs_rebuild_domain) do 16 | described_class.new('nrb-index').tap do |dom| 17 | resp = describe_domain_response(dom.name) 18 | allow(CloudSearch.client).to receive(:describe_domains).with(:domain_names => [dom.name]). 19 | and_return( 20 | describe_domain_response(dom.name, :required_index_documents => true), 21 | describe_domain_response(dom.name, :processing => true), 22 | describe_domain_response(dom.name) 23 | ) 24 | end 25 | end 26 | 27 | # A domain name named 'my-index' 28 | let(:domain) do 29 | described_class.new('my-index').tap do |dom| 30 | allow(CloudSearch.client).to receive(:describe_domains).and_return(describe_domain_response(dom.name)) 31 | allow(CloudSearch.client).to receive(:describe_domains).with(:domain_names => [dom.name]).and_return(describe_domain_response(dom.name)) 32 | end 33 | end 34 | 35 | let(:empty_domain) do 36 | described_class.new('my-index').tap do |dom| 37 | allow(CloudSearch.client).to receive(:describe_domains).and_return({}) 38 | end 39 | end 40 | 41 | it 'can be instantiated' do 42 | index = domain 43 | expect(index.name).to end_with('my-index') 44 | end 45 | 46 | it 'can haz a literal field' do 47 | index = domain 48 | index.add_field(:literary, :literal) { nil } 49 | expect(index.fields[:literary].type).to eq(:literal) 50 | end 51 | 52 | it 'can be initialized with a nested class' do 53 | class OuterClassForCloudSearch 54 | class InnerClass 55 | include Cloudsearchable 56 | end 57 | end 58 | 59 | expect(OuterClassForCloudSearch::InnerClass.cloudsearch_prefix).to match(/^[A-Za-z0-9_-]+$/) 60 | Object.instance_eval { remove_const :OuterClassForCloudSearch } 61 | end 62 | 63 | context "SDF documents" do 64 | let(:object) { OpenStruct.new(:field_with_nil_value => nil, :field_with_present_value => 42) } 65 | subject do 66 | described_class.new('my-index').tap do |d| 67 | d.add_field(:field_with_nil_value, :literal) 68 | d.add_field(:field_with_present_value, :literal) 69 | end 70 | end 71 | 72 | it "generates present fields" do 73 | expect(subject.addition_sdf(object, "id", 1)[:fields][:field_with_present_value]).to eq 42 74 | end 75 | 76 | it "does not generate nil fields" do 77 | expect(subject.addition_sdf(object, "id", 1)[:fields][:field_with_nil_value]).to be_nil 78 | end 79 | end 80 | 81 | it 'raises if the domain cannot be found' do 82 | expect { empty_domain.send(:search_endpoint) }.to raise_error( Cloudsearchable::Domain::DomainNotFound, 83 | /Cloudsearchable could not find the domain/) 84 | end 85 | 86 | # 87 | # A test for the accidental clashing of domain caching 88 | # on a class variable 89 | # 90 | it 'caches endpoints for multiple domains' do 91 | expect(domain.send(:search_endpoint)).to_not eq(needs_rebuild_domain.send(:search_endpoint)) 92 | end 93 | 94 | it 'endpoint selected is based on the domain name' do 95 | expect(domain.send(:search_endpoint)).to eq describe_domain_response(domain.name)[:domain_status_list][0][:search_service][:endpoint] 96 | expect(domain.send(:doc_endpoint)).to eq describe_domain_response(domain.name)[:domain_status_list][0][:doc_service][:endpoint] 97 | end 98 | 99 | it 'sleeps, waiting for reindexing' do 100 | expect(CloudSearch.client).to receive(:index_documents).with(:domain_name => needs_rebuild_domain.name) 101 | expect(CloudSearch.client).to receive(:describe_domains).exactly(3).times 102 | expect(needs_rebuild_domain.apply_changes(3)).to be_truthy 103 | end 104 | 105 | protected 106 | 107 | # 108 | # A mockup of the response to a describe_domain request 109 | # 110 | def describe_domain_response(domain_name, options = {}) 111 | { 112 | :domain_status_list=> [ 113 | { 114 | :search_partition_count=>1, 115 | :search_service=>{ 116 | :arn=>"arn:aws:cs:us-east-1:510523556749:search/#{domain_name}", 117 | :endpoint=>"search-#{domain_name}-7bq6utq4fdrwax5r6irje7xlra.us-east-1.cloudsearch.amazonaws.com" 118 | }, 119 | :num_searchable_docs=>23, 120 | :search_instance_type=>"search.m1.small", 121 | :created=>true, 122 | :domain_id=>"510523556749/#{domain_name}", 123 | :processing=> options.fetch(:processing, false), 124 | :search_instance_count=>1, 125 | :domain_name=>"#{domain_name}", 126 | :requires_index_documents=> options.fetch(:required_index_documents,false), 127 | :deleted=>false, 128 | :doc_service=>{ 129 | :arn=>"arn:aws:cs:us-east-1:510523556749:doc/#{domain_name}", 130 | :endpoint=>"doc-#{domain_name}-7bq6utq4fdrwax5r6irje7xlra.us-east-1.cloudsearch.amazonaws.com" 131 | } 132 | }, 133 | {:search_partition_count=>1, 134 | :search_service=> 135 | {:arn=> 136 | "arn:aws:cs:us-east-1:510523556749:search/dev-llarue-collection-items", 137 | :endpoint=> 138 | "search-dev-llarue-collection-items-hjopg2yzhcjdd4qxeglr2v5v7m.us-east-1.cloudsearch.amazonaws.com"}, 139 | :num_searchable_docs=>2, 140 | :search_instance_type=>"search.m1.small", 141 | :created=>true, 142 | :domain_id=>"510523556749/dev-llarue-collection-items", 143 | :processing=>false, 144 | :search_instance_count=>1, 145 | :domain_name=>"dev-llarue-collection-items", 146 | :requires_index_documents=>false, 147 | :deleted=>false, 148 | :doc_service=> 149 | {:arn=>"arn:aws:cs:us-east-1:510523556749:doc/dev-llarue-collection-items", 150 | :endpoint=> 151 | "doc-dev-llarue-collection-items-hjopg2yzhcjdd4qxeglr2v5v7m.us-east-1.cloudsearch.amazonaws.com"}} 152 | ], 153 | :response_metadata=>{ 154 | :request_id=>"7d9487a7-1c9f-11e2-9f96-0958b8a97a74" 155 | } 156 | } 157 | end 158 | end 159 | -------------------------------------------------------------------------------- /lib/cloudsearchable/query_chain.rb: -------------------------------------------------------------------------------- 1 | module Cloudsearchable 2 | class NoClausesError < StandardError; end 3 | class WarningInQueryResult < StandardError; end 4 | 5 | # 6 | # An object that represents a query to cloud search 7 | # 8 | class QueryChain 9 | include Enumerable 10 | 11 | attr_reader :domain, :fields 12 | 13 | # options: 14 | # - fatal_warnings: if true, raises a WarningInQueryResult exception on warning. Defaults to false 15 | def initialize(domain, options = {}) 16 | @fatal_warnings = options.fetch(:fatal_warnings, false) 17 | @domain = domain 18 | @q = nil 19 | @clauses = [] 20 | @rank = nil 21 | @limit = 100000 # 10 is the CloudSearch default, 2kb limit will probably hit before this will 22 | @offset = nil 23 | @fields = Set.new 24 | @results = nil 25 | end 26 | 27 | # 28 | # This method can be called in several different forms. 29 | # 30 | # To do an equality search on several fields, you can pass a single hash, e.g.: 31 | # 32 | # Collection.search.where(customer_id: "12345", another_field: "Some value") 33 | # 34 | # To do a search on a single field, you can pass three parameters in the 35 | # form: where(field, op, value) 36 | # 37 | # Collection.search.where(:customer_id, :==, 12345) 38 | # 39 | # The value you provide must be of the same type as the field. For text and literal 40 | # values, provide a string value. For uint fields, provide a numeric value. 41 | # 42 | # To search for any of several possible values for a field, use the :any operator: 43 | # 44 | # Collection.search.where(:product_group, :any, %w{gl_kitchen gl_grocery}) 45 | # 46 | # Equality and inequality operators (:==, :!=, :<, :<=, :>, :>=) are supported on 47 | # integers, and equality operators are supported on all scalars. 48 | # Currently, special operators against arrays (any and all) are not yet implemented. 49 | # 50 | def where(field_or_hash, op = nil, value = nil) 51 | raise if materialized? 52 | 53 | if field_or_hash.is_a? Hash 54 | field_or_hash.each_pair do |k, v| 55 | where(k, :==, v) 56 | end 57 | elsif field_or_hash.is_a? Symbol 58 | if (field = domain.fields[field_or_hash.to_sym]).nil? 59 | raise "cannot query on field '#{field_or_hash}' because it is not a member of this index" 60 | end 61 | @clauses << clause_for(field_or_hash, field.type, op, value) 62 | else 63 | raise "field_or_hash must be a Hash or Symbol, not a #{field_or_hash.class}" 64 | end 65 | 66 | self 67 | end 68 | 69 | # 70 | # Allows searching by text, overwriting any existing text search. 71 | # 72 | # Collection.search.text('mens shoes') 73 | # 74 | # For more examples see http://docs.aws.amazon.com/cloudsearch/latest/developerguide/searching.text.html 75 | # 76 | def text(text) 77 | raise if materialized? 78 | @q = text 79 | self 80 | end 81 | 82 | # 83 | # Set a rank expression on the query, overwriting any existing expression. Defaults to "-text_relevance" 84 | # 85 | # Collection.search.order('created_at') # order by the created_at field ascending 86 | # Collection.search.order('-created_at') # descending order 87 | # 88 | # For more examples see http://docs.amazonwebservices.com/cloudsearch/latest/developerguide/tuneranking.html 89 | # 90 | def order rank_expression 91 | raise if materialized? 92 | raise "order clause must be a string, not a #{rank_expression.class}" unless rank_expression.is_a? String 93 | @rank = rank_expression.to_s 94 | self 95 | end 96 | 97 | # 98 | # Limit the number of results returned from query to the given count. 99 | # 100 | # Collection.search.limit(25) 101 | # 102 | def limit count 103 | raise if materialized? 104 | raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i 105 | @limit = count.to_i 106 | self 107 | end 108 | 109 | # 110 | # Offset the results returned by the query by the given count. 111 | # 112 | # Collection.search.offset(250) 113 | # 114 | def offset count 115 | raise if materialized? 116 | raise "limit value must be must respond to to_i, #{count.class} does not" unless count.respond_to? :to_i 117 | @offset = count.to_i 118 | self 119 | end 120 | 121 | # 122 | # Adds a one or more fields to the returned result set, e.g.: 123 | # 124 | # my_query.returning(:collection_id) 125 | # my_query.returning(:collection_id, :created_at) 126 | # 127 | # x = [:collection_id, :created_at] 128 | # my_query.returning(x) 129 | # 130 | def returning(*fields) 131 | raise if materialized? 132 | 133 | fields.flatten! 134 | fields.each do |f| 135 | @fields << f 136 | end 137 | self 138 | end 139 | 140 | # 141 | # True if the query has been materialized (e.g. the search has been 142 | # executed). 143 | # 144 | def materialized? 145 | !@results.nil? 146 | end 147 | 148 | # 149 | # Executes the query, getting a result set, returns true if work was done, 150 | # false if the query was already materialized. 151 | # Raises exception if there was a warning and not in production. 152 | # 153 | def materialize! 154 | return false if materialized? 155 | 156 | @results = domain.execute_query(to_q) 157 | 158 | if @results && @results["info"] && messages = @results["info"]["messages"] 159 | messages.each do |message| 160 | if message["severity"] == "warning" 161 | Cloudsearchable.logger.warn "Cloud Search Warning: #{message["code"]}: #{message["message"]}" 162 | raise(WarningInQueryResult, "#{message["code"]}: #{message["message"]}") if @fatal_warnings 163 | end 164 | end 165 | end 166 | 167 | true 168 | end 169 | 170 | def found_count 171 | materialize! 172 | if @results['hits'] 173 | @results['hits']['found'] 174 | else 175 | raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}" 176 | end 177 | end 178 | 179 | def each(&block) 180 | materialize! 181 | if @results['hits'] 182 | @results['hits']['hit'].each(&block) 183 | else 184 | raise "improperly formed response. hits parameter not available. messages: #{@results["messages"]}" 185 | end 186 | end 187 | 188 | # 189 | # Turns this Query object into a query string hash that goes on the CloudSearch URL 190 | # 191 | def to_q 192 | raise NoClausesError, "no search terms were specified" if (@clauses.nil? || @clauses.empty?) && (@q.nil? || @q.empty?) 193 | 194 | bq = (@clauses.count > 1) ? "(and #{@clauses.join(' ')})" : @clauses.first 195 | { 196 | q: @q, 197 | bq: bq, 198 | rank: @rank, 199 | size: @limit, 200 | start: @offset, 201 | :'return-fields' => @fields.reduce("") { |s,f| s << f.to_s } 202 | } 203 | end 204 | 205 | private 206 | 207 | def clause_for(field, type, op, value) 208 | # Operations for which 'value' is not a scalar 209 | if op == :any 210 | '(or ' + value.map { |v| "#{field}:#{query_clause_value(type, v)}" }.join(' ') + ')' 211 | elsif op == :within_range && type == :uint 212 | "#{field}:#{value.to_s}" 213 | else 214 | value = query_clause_value(type, value) 215 | 216 | # Some operations are applicable to all types. 217 | case op 218 | when :==, :eq 219 | "#{field}:#{value}" 220 | when :!= 221 | "(not #{field}:#{value})" 222 | else 223 | # Operation-specific, type-specific operations on scalars 224 | case type 225 | when :uint 226 | case op 227 | when :> 228 | "#{field}:#{value+1}.." 229 | when :< 230 | "#{field}:..#{value-1}" 231 | when :>= 232 | "#{field}:#{value}.." 233 | when :<= 234 | "#{field}:..#{value}" 235 | else 236 | raise "op #{op} is unrecognized for value #{value} of type #{type}" 237 | end 238 | else 239 | raise "op #{op} is unrecognized for value #{value} of type #{type}" 240 | end 241 | end 242 | end 243 | end 244 | 245 | def query_clause_value(type, value) 246 | if type == :uint 247 | Integer(value) 248 | elsif !value.nil? 249 | "'#{value.to_s}'" 250 | else 251 | raise "Value #{value} cannot be converted to query string on type #{type}" 252 | end 253 | end 254 | end 255 | end 256 | -------------------------------------------------------------------------------- /spec/cloudsearchable/query_chain_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | require 'test_classes/cloud_searchable_test_class' 3 | 4 | describe Cloudsearchable::Query do 5 | let(:clazz){ CloudSearchableSampleClassFactory.call } 6 | 7 | it "doesn't build queries without a query term" do 8 | expect do 9 | query = clazz.search.limit(10).query.to_q 10 | end.to raise_exception 11 | end 12 | 13 | describe '#where' do 14 | it 'can build a simple search query' do 15 | expect(clazz.search.where(:customer_id, :eq, 'A1234').query.to_q[:bq]).to be =~ /customer_id:'A1234'/ 16 | end 17 | 18 | it 'rejects field names that were not defined in the index' do 19 | expect { clazz.search.where(:mispeled_field, :eq, 12345) }.to raise_exception 20 | end 21 | 22 | it 'chains' do 23 | query = clazz.search.where(customer_id: 'A1234').where(helpfulness: 42).query.to_q[:bq] 24 | expect(query).to be =~ /customer_id:'A1234'/ 25 | expect(query).to be =~ /helpfulness:42/ 26 | end 27 | 28 | it 'can build a query with "not equal to" condition' do 29 | expect(clazz.search.where(:customer_id, :!=, 'A1234').query.to_q[:bq]).to be =~ /\(not customer_id:'A1234'\)/ 30 | end 31 | 32 | it 'can build a query from a hash' do 33 | query = clazz.search.where(customer_id: 'A1234', helpfulness: 42).query.to_q[:bq] 34 | expect(query).to be =~ /customer_id:'A1234'/ 35 | expect(query).to be =~ /helpfulness:42/ 36 | end 37 | 38 | context 'literal data type' do 39 | it 'supports equality' do 40 | expect(clazz.search.where(:customer_id, :==, 'ABC').query.to_q[:bq]).to eq "customer_id:'ABC'" 41 | end 42 | 43 | it 'supports :any' do 44 | expect(clazz.search.where(:customer_id, :any, ['ABC', 'DEF']).query.to_q[:bq]).to eq "(or customer_id:'ABC' customer_id:'DEF')" 45 | end 46 | 47 | it 'accepts a value as an integer' do 48 | expect(clazz.search.where(customer_id: 123).query.to_q[:bq]).to be =~ /customer_id:'123'/ 49 | end 50 | 51 | it 'rejects nil value' do 52 | expect { clazz.search.where(customer_id: nil) }.to raise_exception 53 | end 54 | end 55 | 56 | context 'uint data type' do 57 | it 'supports range query' do 58 | expect(clazz.search.where(:helpfulness, :within_range, "0..#{123}").query.to_q[:bq]).to be =~ /helpfulness:0..123/ 59 | end 60 | 61 | it 'supports range query using a ruby range' do 62 | expect(clazz.search.where(:helpfulness, :within_range, 0..123).query.to_q[:bq]).to be =~ /helpfulness:0..123/ 63 | end 64 | 65 | it 'supports equality' do 66 | expect(clazz.search.where(:helpfulness, :==, 123).query.to_q[:bq]).to eq 'helpfulness:123' 67 | end 68 | 69 | it 'supports not-equality' do 70 | expect(clazz.search.where(:helpfulness, :!=, 123).query.to_q[:bq]).to eq '(not helpfulness:123)' 71 | end 72 | 73 | it 'supports greater-than' do 74 | expect(clazz.search.where(:helpfulness, :>, 123).query.to_q[:bq]).to be =~ /helpfulness:124../ 75 | end 76 | 77 | it 'supports greater-than-or-equal-to' do 78 | expect(clazz.search.where(:helpfulness, :>=, 123).query.to_q[:bq]).to be =~ /helpfulness:123../ 79 | end 80 | 81 | it 'supports less-than' do 82 | expect(clazz.search.where(:helpfulness, :<, 123).query.to_q[:bq]).to be =~ /helpfulness:..122/ 83 | end 84 | 85 | it 'supports less-than-or-equal-to' do 86 | expect(clazz.search.where(:helpfulness, :<=, 123).query.to_q[:bq]).to be =~ /helpfulness:..123/ 87 | end 88 | 89 | it 'supports :any' do 90 | expect(clazz.search.where(:helpfulness, :any, [123, 456]).query.to_q[:bq]).to eq '(or helpfulness:123 helpfulness:456)' 91 | end 92 | 93 | it 'accepts a value as a string' do 94 | expect(clazz.search.where(helpfulness: '123').query.to_q[:bq]).to be =~ /helpfulness:123/ 95 | end 96 | 97 | [Object.new, nil, '123a'].each do |v| 98 | it "rejects value #{v} of type #{v.class}" do 99 | expect { clazz.search.where(helpfulness: v) }.to raise_exception 100 | end 101 | end 102 | end 103 | 104 | [:>, :>=, :<, :<=].each do |op| 105 | [Object.new, nil, '123a'].each do |v| 106 | it "does not permit op #{op} on value #{v} of type #{v.class}" do 107 | expect { clazz.search.where(:helpfulness, op, v).query.to_q[:bq] }.to raise_error 108 | end 109 | end 110 | end 111 | end 112 | 113 | 114 | it 'supports querying for any of several values of a field' do 115 | expect(clazz.search.where(:test_name, :any, %w{big small}).query.to_q[:bq]).to include("(or test_name:'big' test_name:'small')") 116 | end 117 | 118 | it 'supports text method' do 119 | query = clazz.search.text('test').query.to_q[:q] 120 | expect(query).to be =~ /test/ 121 | end 122 | 123 | it 'supports chaining text and where clauses together' do 124 | query = clazz.search.text('test').where(:helpfulness, :==, 123).query 125 | expect(query.to_q[:q]).to be =~ /test/ 126 | expect(query.to_q[:bq]).to be =~ /helpfulness:123/ 127 | end 128 | 129 | it 'supports ordering with a rank expression' do 130 | expect(clazz.search.where(customer_id: 12345).order('-helpfulness').query.to_q[:rank]).to eq '-helpfulness' 131 | end 132 | 133 | it 'supports limit' do 134 | expect(clazz.search.where(customer_id: 12345).limit(10).query.to_q[:size]).to eq 10 135 | end 136 | 137 | it 'has high default limit' do 138 | expect(clazz.search.where(customer_id: 12345).query.to_q[:size]).to eq 100000 139 | end 140 | 141 | it 'supports offset' do 142 | expect(clazz.search.where(customer_id: 12345).offset(100).query.to_q[:start]).to eq 100 143 | end 144 | 145 | context 'queries' do 146 | before(:each) do 147 | expect(clazz.cloudsearch_index).to receive(:execute_query).and_return(cloudsearch_response) 148 | end 149 | 150 | context 'query warning' do 151 | before(:each) do 152 | allow(clazz).to receive(:find).and_return([]) 153 | expect(Cloudsearchable.logger).to receive(:warn).with(/CS-InvalidFieldOrRankAliasInRankParameter/) 154 | end 155 | 156 | let(:query){clazz.search.where(customer_id: 12345).order("-adult")} 157 | let(:cloudsearch_response) do 158 | # generated by ranking a literal field that is search-enabled but not result-enabled 159 | { 160 | "rank" => "adult", 161 | "match-expr" => "(label initialized_at:1363464074..1366056074)", 162 | "hits" => { 163 | "found" => 285, 164 | "start" => 0, 165 | "hit" => [ 166 | {"id" => "40bdd5072b6dbae6245fe4ee837d22e3","data" => {"test_class_id" => ["PCxSz65GIcZTtc0UpRdT-i--w-1365550370"]}}, 167 | {"id" => "00af8f5f96aa1db7aff77be5651b3bb1","data" => {"test_class_id" => ["PCxhksJTLRYnoGXvwZik82Fkw-1365020313"]}}, 168 | {"id" => "00b6ac84e3ae402e7698959bf692a53e","data" => {"test_class_id" => ["PCxs-fIVZnBcTzZ4MtfDguS1A-1365020274"]}}, 169 | {"id" => "018fdee653bff74abd12ac30152a2837","data" => {"test_class_id" => ["PCxmAGHFtAgyqUrgI3HgM_P6Q-1365548349"]}}, 170 | {"id" => "01d062d24c389906eea2d16b8193eb56","data" => {"test_class_id" => ["PCxqjaTmwydKM82NqymbryNfg-1365470479"]}}, 171 | {"id" => "01e3ee5d848a30385a4e90eb851b094d","data" => {"test_class_id" => ["PCxSz65GIcZTtc0UpRdT-i--w-1365550369"]}}, 172 | {"id" => "01fca44cc596adb295ca6ee9f9f36499","data" => {"test_class_id" => ["PCx7XKbKwOVf1VvEWvTl5c1Eg-1365020176"]}}, 173 | {"id" => "02b85c9835b5045065ee389954a60c5f","data" => {"test_class_id" => ["PCxp_xid_WeTfTmb5MySEfxhQ-1365115565"]}}, 174 | {"id" => "040c01be434552a1d9e99eef9db87bdd","data" => {"test_class_id" => ["PCxLOYzA4bCt7-bP6wsZnl-ow-1365020297"]}}, 175 | {"id" => "048567c755e30d6d64d757508f1feaa0","data" => {"test_class_id" => ["PCxJhhnpYkeSKrOxteQo5Jckw-1365115667"]}} 176 | ] 177 | }, 178 | "info" => { 179 | "rid" => "7df344e77e1076a903e1f2dc1effcf3dde0a89442fb459d00a6e60ac64b8bbfcab1fbc5b35c10949", 180 | "time-ms" => 3, 181 | "cpu-time-ms" => 0, 182 | "messages" => [ 183 | { 184 | "severity" => "warning", 185 | "code" => "CS-InvalidFieldOrRankAliasInRankParameter", 186 | "host" => "7df344e77e1076a9884a6c43665da57c", 187 | "message" => "Unable to create score object for rank 'adult'" 188 | } 189 | ] 190 | } 191 | } 192 | end 193 | 194 | it 'causes WarningInQueryResult exception' do 195 | expect{ query.to_a }.to raise_error(Cloudsearchable::WarningInQueryResult) 196 | end 197 | 198 | it 'takes a :fatal_warnings option, and when set to false, does not raise' do 199 | sample_query = Cloudsearchable::QueryChain.new(double, fatal_warnings: false) 200 | expect(sample_query.instance_variable_get(:@fatal_warnings)).to be_falsey 201 | 202 | q = query 203 | q.query.instance_variable_set(:@fatal_warnings, false) 204 | expect{ q.to_a }.to_not raise_error 205 | end 206 | end 207 | 208 | context 'valid query results' do 209 | let(:customer_id){ '12345' } 210 | let(:other_customer_id){ 'foo' } 211 | 212 | let(:cloudsearch_response) do 213 | { 214 | "rank"=>"-text_relevance", 215 | "match-expr"=>"(label customer_id:'12345')", 216 | "hits"=>{ 217 | "found"=>11, 218 | "start"=>0, 219 | "info"=>{ 220 | "rid"=>"e2467862eecf73ec8dfcfe0cba1893abbe2e8803402f4da65b1195593c0f78ec3e8f1d29f6e40723", 221 | "time-ms"=>2, 222 | "cpu-time-ms"=>0 223 | }, 224 | "hit"=>[ 225 | {"id"=>"0633e1c9793f5288c58b664356533e81", "data"=>{"test_class_id"=>["ANINSTANCEID"]}}, 226 | # {"id"=>"04931ebede796ae8b435f1fd5291e772", "data"=>{"test_class_id"=>["PCxTj26ZRmV_EnHigQWx0S06w"]}}, 227 | # {"id"=>"72159a172d3043bfcdadb5244862b9ee", "data"=>{"test_class_id"=>["PCxS_apFtZMrKuqyPhFNstzMQ"]}}, 228 | # {"id"=>"1eb815b075bc005e97dc5827e53b9615", "data"=>{"test_class_id"=>["PCxSksjDUBehPWhYYW2Dtj4KQ"]}}, 229 | # {"id"=>"3e4950b829456b13bf1460b25a7aca26", "data"=>{"test_class_id"=>["PCx1oiyh6vrHGSeLvis4USMfQ"]}}, 230 | # {"id"=>"00b441f55fff86d2d746227988da77a9", "data"=>{"test_class_id"=>["PCxpt-aW8topsnTGs-AIkzWCA"]}}, 231 | # {"id"=>"919ea27d21bbdc07ead4688a0d7ceca1", "data"=>{"test_class_id"=>["PCxFHGLbGJ2mzau_a6-gh5ORw"]}}, 232 | # {"id"=>"c663c2d9af342b0038fc808322143cfd", "data"=>{"test_class_id"=>["PCxyFwShwjWBp_WiXB0rFb2WA"]}}, 233 | # {"id"=>"de8f00af5636393e2553c4b4710d3393", "data"=>{"test_class_id"=>["PCxnqXfm8McflBgi4HsYoUXVw"]}}, 234 | # {"id"=>"e297cf21741a4c43697ea2586164a987", "data"=>{"test_class_id"=>["PCxrdk8gAEVbkuUCazu2-qLjQ"]}} 235 | ] 236 | } 237 | } 238 | end 239 | 240 | it 'materializes' do 241 | expect(clazz).to receive(:find).with(["ANINSTANCEID"]).and_return([customer_id]) 242 | query = clazz.search.where(customer_id: 12345) 243 | expect(query.to_a).to eq [customer_id] 244 | end 245 | 246 | it 'materializes db results only once' do 247 | expected_results = [customer_id, other_customer_id] 248 | expect(clazz).to receive(:find).once.and_return(expected_results) 249 | 250 | query = clazz.search.where(customer_id: 12345) 251 | query.materialize! 252 | query.materialize! 253 | end 254 | 255 | it 'does not materialize if only asking for found_count' do 256 | expect(clazz).to_not receive(:find) 257 | clazz.search.where(customer_id: 12345).found_count 258 | end 259 | 260 | it 'supports each for multiple results' do 261 | expected_results = [customer_id, other_customer_id] 262 | expect(clazz).to receive(:find).with(["ANINSTANCEID"]).and_return(expected_results) 263 | 264 | results = clazz.search.where(customer_id: 12345).to_a 265 | (0..results.length).each{ |i| expect(results[i]).to eq expected_results[i] } 266 | end 267 | 268 | it 'supports each for single results' do 269 | expect(clazz).to receive(:find).with(["ANINSTANCEID"]).and_return(customer_id) 270 | 271 | results = clazz.search.where(customer_id: 12345).to_a 272 | results.each{ |r| expect(r).to eq customer_id } 273 | end 274 | 275 | it 'supports each for nil result' do 276 | expect(clazz).to receive(:find).with(["ANINSTANCEID"]).and_return(nil) 277 | 278 | results = clazz.search.where(customer_id: 12345).to_a 279 | results.each{ |r| expect(r).to be_nil } 280 | end 281 | 282 | it 'uses materialized method' do 283 | expect(clazz).to receive(:another_find).with(["ANINSTANCEID"]).and_return(customer_id) 284 | clazz.materialize_method :another_find 285 | clazz.search.where(customer_id: 12345).to_a 286 | end 287 | 288 | it 'returns the correct found count' do 289 | expect(clazz.search.where(customer_id: 12345).found_count).to eq 11 290 | end 291 | end 292 | 293 | context 'invalid query results' do 294 | let(:cloudsearch_response) do 295 | { 296 | "error"=>"info", 297 | "rid"=>"6ddcaa561c05c4cc85ddb10cb46568af2ef64b0583910e32210f551c238586e40fc3abe629ca87b250796d395a628af6", 298 | "time-ms"=>20, 299 | "cpu-time-ms"=>0, 300 | "messages"=>[ 301 | { 302 | "severity"=>"fatal", 303 | "code"=>"CS-UnknownFieldInMatchExpression", 304 | "message"=>"Field 'asdf' is not defined in the metadata for this collection." 305 | } 306 | ] 307 | } 308 | end 309 | 310 | it 'raises an exception when requesting found count with an error response' do 311 | expect { clazz.search.where(customer_id: 12345).found_count }.to raise_error 312 | end 313 | end 314 | 315 | context 'empty results with non-empty data' do 316 | let(:cloudsearch_response) do 317 | { 318 | # Empty-yet-present data may occur with a NOT query, such as "(not customer_id:'XYZ')". 319 | # Refer to: https://aws.amazon.com/support/case?caseId=107084141&language=en 320 | "rank" => "-text_relevance", 321 | "match-expr" => "(not customer_id:'A3E4T85Q6WPY4F')", 322 | "hits" => { 323 | "found" => 2087, 324 | "start" => 0, 325 | "hit" => [ 326 | {"id" => "fb9fb53e32c4b3714cf39be4b855d34b", "data" => { "test_class_id" => []}}, 327 | ] 328 | }, 329 | "info" => { 330 | "rid" => "621cf310b88f32076b1908e45b4930aafb872497bdbf3b5e64065619c0dcec96bbe513281093d6c7", 331 | "time-ms" => 3, 332 | "cpu-time-ms" => 0 333 | } 334 | } 335 | end 336 | 337 | it 'does not raise an exception' do 338 | expect(clazz).to receive(:find).with([]).and_return(nil) 339 | clazz.search.where(:customer_id, :!=, 'ABCDE') 340 | expect { clazz.search.where(:customer_id, :!=, 'ABCDE').to_a }.to_not raise_error 341 | end 342 | end 343 | end 344 | 345 | end 346 | --------------------------------------------------------------------------------