├── experiments └── rubis │ ├── .gitignore │ ├── package.json │ ├── rubis-truncate.sql │ ├── rubis-update.sql │ ├── fake.js │ ├── README.md │ └── rubis-schema.sql ├── spec ├── .rubocop.yml ├── support │ ├── dummy_cost_model.rb │ ├── entities.rb │ └── backend.rb ├── backend │ ├── mongo_backend_spec.rb │ └── cassandra_backend_spec.rb ├── loader │ ├── csv_loader_spec.rb │ └── mysql_loader_spec.rb ├── schema_spec.rb ├── results_spec.rb ├── serialize_spec.rb ├── random_spec.rb ├── spec_helper.rb ├── cost_spec.rb ├── backend_spec.rb ├── query_graph_spec.rb ├── workload_spec.rb ├── enumerator_spec.rb ├── search_spec.rb ├── util_spec.rb ├── model_spec.rb └── indexes_spec.rb ├── Gemfile ├── assets ├── NSERC-logo.png └── packagecloud-logo.png ├── .scrutinizer.yml ├── workloads ├── .rubocop.yml ├── ebay.rb ├── eac.rb ├── rubis_synthetic.rb └── rubis.rb ├── .rspec ├── .yardopts ├── .github ├── dependabot.yml └── workflows │ └── ci.yml ├── .simplecov ├── .editorconfig ├── Rakefile ├── .rubocop.yml ├── yard_extensions.rb ├── schemas ├── ebay.rb ├── rubis_baseline.rb └── rubis_expert.rb ├── .gitignore ├── lib ├── nose │ ├── cost │ │ ├── entity_count.rb │ │ ├── field_size.rb │ │ ├── request_count.rb │ │ └── cassandra.rb │ ├── loader.rb │ ├── plans │ │ ├── limit.rb │ │ ├── sort.rb │ │ ├── update.rb │ │ └── filter.rb │ ├── random │ │ ├── watts_strogatz.rb │ │ └── barbasi_albert.rb │ ├── debug.rb │ ├── loader │ │ ├── random.rb │ │ ├── sql.rb │ │ ├── csv.rb │ │ └── mysql.rb │ ├── cost.rb │ ├── timing.rb │ ├── schema.rb │ ├── statements │ │ ├── delete.rb │ │ ├── update.rb │ │ ├── insert.rb │ │ ├── query.rb │ │ └── connection.rb │ ├── proxy.rb │ ├── model.rb │ ├── model │ │ └── entity.rb │ ├── plans.rb │ ├── search │ │ └── constraints.rb │ ├── backend │ │ └── file.rb │ └── enumerator.rb └── nose.rb ├── CITATION.cff ├── models ├── ebay.rb ├── eac.rb └── rubis.rb ├── templates └── workload.erb ├── plans └── ebay.rb ├── README.md ├── nose.gemspec └── CONTRIBUTING.md /experiments/rubis/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /spec/.rubocop.yml: -------------------------------------------------------------------------------- 1 | Documentation: 2 | Enabled: false 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source 'https://rubygems.org' 4 | 5 | gemspec 6 | -------------------------------------------------------------------------------- /assets/NSERC-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelmior/NoSE/HEAD/assets/NSERC-logo.png -------------------------------------------------------------------------------- /.scrutinizer.yml: -------------------------------------------------------------------------------- 1 | tools: 2 | external_code_coverage: 3 | timeout: 600 # 10 minute timeout 4 | -------------------------------------------------------------------------------- /workloads/.rubocop.yml: -------------------------------------------------------------------------------- 1 | GlobalVars: 2 | Enabled: false 3 | 4 | LineLength: 5 | Enabled: false 6 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation 3 | --require spec_helper 4 | --tag ~mongo 5 | --tag ~mysql 6 | -------------------------------------------------------------------------------- /assets/packagecloud-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/michaelmior/NoSE/HEAD/assets/packagecloud-logo.png -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --private 2 | --query '!(docstring.blank? && [:inspect, :to_s, :to_color, :hash, :state, :call].include?(name))' 3 | - 4 | LICENSE.md 5 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'bundler' 4 | directory: '/' 5 | schedule: 6 | interval: 'weekly' 7 | -------------------------------------------------------------------------------- /.simplecov: -------------------------------------------------------------------------------- 1 | SimpleCov.formatters = [ 2 | SimpleCov::Formatter::HTMLFormatter 3 | ] 4 | 5 | SimpleCov.start do 6 | add_filter '/spec/' 7 | add_filter '/vendor/' 8 | end 9 | -------------------------------------------------------------------------------- /experiments/rubis/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rubis-fake", 3 | "version": "0.0.1", 4 | "license": "MIT", 5 | "dependencies": { 6 | "mysql-faker": "0.0.5" 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 2 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | -------------------------------------------------------------------------------- /experiments/rubis/rubis-truncate.sql: -------------------------------------------------------------------------------- 1 | TRUNCATE TABLE `bids`; 2 | TRUNCATE TABLE `buynow`; 3 | TRUNCATE TABLE `categories`; 4 | TRUNCATE TABLE `comments`; 5 | TRUNCATE TABLE `items`; 6 | TRUNCATE TABLE `old_items`; 7 | TRUNCATE TABLE `regions`; 8 | TRUNCATE TABLE `users`; 9 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'rspec/core/rake_task' 4 | require 'yard' 5 | require 'yard-thor' 6 | require_relative 'yard_extensions' 7 | 8 | # XXX: Patch OpenStruct for yard-thor 9 | class OpenStruct 10 | def delete(name) 11 | delete_field name 12 | end 13 | end 14 | 15 | RSpec::Core::RakeTask.new(:spec) 16 | YARD::Rake::YardocTask.new(:doc) 17 | 18 | task default: :spec 19 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | Metrics/AbcSize: 2 | Max: 20 3 | 4 | Metrics/ClassLength: 5 | Max: 200 6 | 7 | Metrics/CyclomaticComplexity: 8 | Max: 10 9 | 10 | Metrics/MethodLength: 11 | Max: 20 12 | 13 | Metrics/ModuleLength: 14 | Exclude: 15 | - 'spec/**/*' 16 | 17 | Metrics/PerceivedComplexity: 18 | Max: 10 19 | 20 | Style/ClassAndModuleChildren: 21 | Exclude: 22 | - 'spec/**/*' 23 | 24 | Style/SingleLineBlockParams: 25 | Enabled: false 26 | 27 | Style/SignalException: 28 | EnforcedStyle: semantic 29 | -------------------------------------------------------------------------------- /yard_extensions.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Handler to add methods for Parslet rules 4 | class ParsletHandler < YARD::Handlers::Ruby::Base 5 | handles method_call(:rule) 6 | namespace_only 7 | 8 | # Add a method for each Parlset rule 9 | def process 10 | name = statement.parameters.first.jump(:tstring_content, :ident).source 11 | object = YARD::CodeObjects::MethodObject.new namespace, name 12 | register(object) 13 | parse_block(statement.last.last, owner: object) 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /spec/support/dummy_cost_model.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Cost 3 | RSpec.shared_examples 'dummy cost model' do 4 | let(:cost_model) do 5 | # Simple cost model which just counts the number of indexes 6 | class DummyCost < NoSE::Cost::Cost 7 | include Subtype 8 | 9 | def index_lookup_cost(_step) 10 | 1 11 | end 12 | 13 | def insert_cost(_step) 14 | 1 15 | end 16 | 17 | def delete_cost(_step) 18 | 1 19 | end 20 | end 21 | 22 | DummyCost.new 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /spec/backend/mongo_backend_spec.rb: -------------------------------------------------------------------------------- 1 | require 'nose/backend/mongo' 2 | 3 | module NoSE 4 | module Backend 5 | describe MongoBackend do 6 | include_examples 'backend processing', mongo: true do 7 | let(:config) do 8 | { 9 | name: 'mongo', 10 | uri: 'mongodb://localhost:27017/', 11 | database: 'nose' 12 | } 13 | end 14 | 15 | let(:backend) do 16 | MongoBackend.new plans.schema.model, plans.schema.indexes.values, 17 | [], [], config 18 | end 19 | end 20 | 21 | it 'is a type of backend' do 22 | expect(MongoBackend.subtype_name).to eq 'mongo' 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /schemas/ebay.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../lib/nose.rb' 4 | 5 | NoSE::Schema.new do 6 | Model 'ebay' 7 | 8 | Index 'users_by_id' do 9 | Hash users.UserID 10 | Extra users['*'] 11 | Path users.UserID 12 | end 13 | 14 | Index 'items_by_id' do 15 | Hash items.ItemID 16 | Extra items['*'] 17 | Path items.ItemID 18 | end 19 | 20 | Index 'likes_by_user' do 21 | Hash users.UserID 22 | Ordered likes.LikedAt, likes.LikeID, items.ItemID 23 | Path users.UserID, users.likes, likes.item 24 | end 25 | 26 | Index 'likes_by_item' do 27 | Hash items.ItemID 28 | Ordered likes.LikedAt, likes.LikeID, users.UserID 29 | Path items.ItemID, items.likes, likes.user 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /test/tmp/ 9 | /test/version_tmp/ 10 | /tmp/ 11 | 12 | ## Documentation cache and generated files: 13 | /.yardoc/ 14 | /_yardoc/ 15 | /doc/ 16 | /rdoc/ 17 | /man/ 18 | 19 | ## Environment normalisation: 20 | /.bundle/ 21 | /lib/bundler/man/ 22 | 23 | # for a library or gem, you might want to ignore these files since the code is 24 | # intended to run in multiple environments; otherwise, check them in: 25 | Gemfile.lock 26 | .ruby-version 27 | .ruby-gemset 28 | 29 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 30 | .rvmrc 31 | 32 | ## Configuration files 33 | *.yml 34 | !.scrutinizer.yml 35 | !.rubocop.yml 36 | !.github/** 37 | 38 | ## Log files 39 | gurobi.log 40 | -------------------------------------------------------------------------------- /lib/nose/cost/entity_count.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Cost 5 | # A cost model which estimates the number of entities transferred 6 | class EntityCountCost < Cost 7 | include Subtype 8 | 9 | # Rough cost estimate as the number of entities retrieved at each step 10 | # @return [Numeric] 11 | def index_lookup_cost(step) 12 | # Simply count the number of entities at each step 13 | step.state.cardinality 14 | end 15 | 16 | # Cost estimate as number of entities deleted 17 | def delete_cost(step) 18 | step.state.cardinality 19 | end 20 | 21 | # Cost estimate as number of entities inserted 22 | def insert_cost(step) 23 | step.state.cardinality 24 | end 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /workloads/ebay.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Insipired by the blog post below on data modeling in Cassandra 4 | # www.ebaytechblog.com/2012/07/16/cassandra-data-modeling-best-practices-part-1/ 5 | 6 | NoSE::Workload.new do 7 | Model 'ebay' 8 | 9 | # Define queries and their relative weights 10 | Q 'SELECT users.* FROM users WHERE users.UserID = ? -- 1' 11 | Q 'SELECT items.* FROM items WHERE items.ItemID = ?' 12 | Q 'SELECT items.* FROM items.likes.user WHERE user.UserID = ? ORDER BY likes.LikedAt' 13 | Q 'SELECT users.* FROM users.likes.item WHERE item.ItemID = ? ORDER BY likes.LikedAt' 14 | 15 | Q 'INSERT INTO items SET ItemID = ?, Title = ?, Desc = ?' 16 | Q 'INSERT INTO users SET UserID = ?, Name = ?, Email = ?' 17 | Q 'INSERT INTO likes SET LikeID = ?, LikedAt = ? AND CONNECT TO user(?), item(?)' 18 | end 19 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | --- 3 | version: 1.0.3 4 | authors: 5 | - 6 | affiliation: "University of Waterloo" 7 | family-names: Mior 8 | given-names: "Michael Joseph" 9 | orcid: "https://orcid.org/0000-0002-4057-8726" 10 | - 11 | affiliation: "University of Waterloo" 12 | family-names: Salem 13 | given-names: Kenneth 14 | - 15 | affiliation: "Qatar Computing Research Institute" 16 | family-names: Aboulnaga 17 | given-names: Ashraf 18 | - 19 | affiliation: "HP Vertica" 20 | family-names: Liu 21 | given-names: Rui 22 | cff-version: "1.0.3" 23 | date-released: 2016-09-11 24 | doi: "10.5281/zenodo.2578159" 25 | license: "GPL-3.0-only" 26 | message: "If you use this software, please cite it using these metadata." 27 | repository-code: "https://github.com/michaelmior/NoSE" 28 | title: "NoSE: Automated schema design for NoSQL applications" 29 | ... 30 | -------------------------------------------------------------------------------- /models/ebay.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Insipired by the blog post below on data modeling in Cassandra 4 | # www.ebaytechblog.com/2012/07/16/cassandra-data-modeling-best-practices-part-1/ 5 | 6 | # rubocop:disable all 7 | 8 | NoSE::Model.new do 9 | # Define entities along with the size and cardinality of their fields 10 | # as well as an estimated number of each entity 11 | (Entity 'users' do 12 | ID 'UserID' 13 | String 'Name', 50 14 | String 'Email', 50 15 | end) * 100 16 | 17 | (Entity 'items' do 18 | ID 'ItemID' 19 | String 'Title', 50 20 | String 'Desc', 200 21 | end) * 1_000 22 | 23 | (Entity 'likes' do 24 | ID 'LikeID' 25 | Date 'LikedAt' 26 | end) * 10_000 27 | 28 | HasOne 'user', 'likes', 29 | {'likes' => 'users'} 30 | HasOne 'item', 'likes', 31 | {'likes' => 'items'} 32 | end 33 | -------------------------------------------------------------------------------- /lib/nose/cost/field_size.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Cost 5 | # A cost model which estimates the total size of data transferred 6 | class FieldSizeCost < Cost 7 | include Subtype 8 | 9 | # Rough cost estimate as the size of data returned 10 | # @return [Numeric] 11 | def index_lookup_cost(step) 12 | # If we have an answer to the query, we only need 13 | # to fetch the data fields which are selected 14 | fields = step.index.all_fields 15 | fields &= step.state.query.select if step.state.answered? 16 | 17 | step.state.cardinality * fields.sum_by(&:size) 18 | end 19 | 20 | # Cost estimate as the size of an index entry 21 | def delete_cost(step) 22 | step.index.entry_size 23 | end 24 | 25 | # Cost estimate as the size of an index entry 26 | def insert_cost(step) 27 | step.index.entry_size 28 | end 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /lib/nose/cost/request_count.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Cost 5 | # A cost model which estimates the number of requests to the backend 6 | class RequestCountCost < Cost 7 | include Subtype 8 | 9 | # Rough cost estimate as the number of requests made 10 | # @return [Numeric] 11 | def index_lookup_cost(step) 12 | # We always start with a single lookup, then the number 13 | # of lookups is determined by the cardinality at the preceding step 14 | if step.parent.is_a?(Plans::RootPlanStep) 15 | 1 16 | else 17 | step.state.cardinality 18 | end 19 | end 20 | 21 | # Cost estimate as number of entities deleted 22 | def delete_cost(step) 23 | step.state.cardinality 24 | end 25 | 26 | # Cost estimate as number of entities inserted 27 | def insert_cost(step) 28 | step.state.cardinality 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /experiments/rubis/rubis-update.sql: -------------------------------------------------------------------------------- 1 | alter table regions add column dummy tinyint(1); 2 | update regions set dummy=1; 3 | alter table comments change column from_user_id from_user int(10) unsigned not null, algorithm=inplace; 4 | alter table comments change column to_user_id to_user int(10) unsigned not null, algorithm=inplace; 5 | alter table categories add column dummy tinyint(1), algorithm=inplace; 6 | update categories set dummy=1; 7 | alter table comments change column item_id item int(10) unsigned not null, algorithm=inplace; 8 | alter table bids change column item_id item int(10) unsigned not null, algorithm=inplace; 9 | alter table bids change column user_id user int(10) unsigned not null, algorithm=inplace; 10 | rename table buy_now to buynow; 11 | alter table buynow change column buyer_id buyer int(10) unsigned not null, algorithm=inplace; 12 | alter table buynow change column item_id item int(10) unsigned not null, algorithm=inplace; 13 | alter table users change column nickname nickname varchar(50) not null, algorithm=inplace; 14 | -------------------------------------------------------------------------------- /lib/nose.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Namespace module for the whole project 4 | module NoSE 5 | end 6 | 7 | require_relative 'nose/util' 8 | 9 | require_relative 'nose/backend' 10 | require_relative 'nose/cost' 11 | require_relative 'nose/debug' 12 | require_relative 'nose/enumerator' 13 | require_relative 'nose/indexes' 14 | require_relative 'nose/loader' 15 | require_relative 'nose/model' 16 | require_relative 'nose/parser' 17 | require_relative 'nose/plans' 18 | require_relative 'nose/proxy' 19 | require_relative 'nose/query_graph' 20 | require_relative 'nose/random' 21 | require_relative 'nose/schema' 22 | require_relative 'nose/search' 23 | require_relative 'nose/statements' 24 | require_relative 'nose/timing' 25 | require_relative 'nose/workload' 26 | 27 | require_relative 'nose/serialize' 28 | 29 | # :nocov: 30 | require 'logging' 31 | 32 | logger = Logging.logger['nose'] 33 | logger.level = (ENV['NOSE_LOG'] || 'info').downcase.to_sym 34 | 35 | logger.add_appenders Logging.appenders.stderr 36 | logger = nil # rubocop:disable Lint/UselessAssignment 37 | # :nocov: 38 | -------------------------------------------------------------------------------- /lib/nose/cost/cassandra.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Cost 5 | # A cost model which estimates the number of requests to the backend 6 | class CassandraCost < Cost 7 | include Subtype 8 | 9 | # Rough cost estimate as the number of requests made 10 | # @return [Numeric] 11 | def index_lookup_cost(step) 12 | return nil if step.state.nil? 13 | rows = step.state.cardinality 14 | parts = step.state.hash_cardinality 15 | 16 | @options[:index_cost] + parts * @options[:partition_cost] + 17 | rows * @options[:row_cost] 18 | end 19 | 20 | # Cost estimate as number of entities deleted 21 | def delete_cost(step) 22 | return nil if step.state.nil? 23 | step.state.cardinality * @options[:delete_cost] 24 | end 25 | 26 | # Cost estimate as number of entities inserted 27 | def insert_cost(step) 28 | return nil if step.state.nil? 29 | step.state.cardinality * @options[:insert_cost] 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/nose/loader.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Loaders which insert data into indexes from external sources 5 | module Loader 6 | # Superclass for all data loaders 7 | class LoaderBase 8 | def initialize(workload = nil, backend = nil) 9 | @workload = workload 10 | @backend = backend 11 | end 12 | 13 | # :nocov: 14 | # @abstract Subclasses should produce a workload 15 | # @return [void] 16 | def workload(_config) 17 | fail NotImplementedError 18 | end 19 | # :nocov: 20 | 21 | # :nocov: 22 | # @abstract Subclasses should load data for the given list of indexes 23 | # @return [void] 24 | def load(_indexes, _config, _show_progress = false, _limit = nil, 25 | _skip_existing = true) 26 | fail NotImplementedError 27 | end 28 | # :nocov: 29 | 30 | # @abstract Subclasses should generate a model from the external source 31 | # :nocov: 32 | def model(_config) 33 | fail NotImplementedError 34 | end 35 | # :nocov: 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /models/eac.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # rubocop:disable all 3 | 4 | NoSE::Model.new do 5 | # Define entities along with the size and cardinality of their fields 6 | # as well as an estimated number of each entity 7 | 8 | (Entity 'Player' do 9 | ID 'PlayerID' 10 | String 'PlayerName' 11 | Integer 'PlayerFlags' 12 | Boolean 'IsAdmin' 13 | end) * 100_000 14 | 15 | (Entity 'Session' do 16 | ID 'SessionID' 17 | Date 'TimeStarted' 18 | Date 'TimeEnded' 19 | end) * 100_000 20 | 21 | (Entity 'PlayerState' do 22 | ID 'StateID' 23 | Float 'PosX' 24 | Float 'PosY' 25 | Float 'PosZ' 26 | Date 'ClientTimestamp' 27 | Date 'ServerTimestamp' 28 | end) * 4_000_000 29 | 30 | (Entity 'Server' do 31 | ID 'ServerID' 32 | String 'ServerIP' 33 | String 'ServerName' 34 | end) * 5_000 35 | 36 | HasOne 'player', 'sessions', 37 | {'Session' => 'Player'} 38 | 39 | HasOne 'server', 'sessions', 40 | {'Session' => 'Server'} 41 | 42 | HasOne 'session', 'states', 43 | {'PlayerState' => 'Session'} 44 | end 45 | -------------------------------------------------------------------------------- /lib/nose/plans/limit.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Plans 5 | # Limit results from a previous lookup 6 | # This should only ever occur at the end of a plan 7 | class LimitPlanStep < PlanStep 8 | attr_reader :limit 9 | 10 | def initialize(limit, state = nil) 11 | super() 12 | @limit = limit 13 | 14 | return if state.nil? 15 | @state = state.dup 16 | @state.cardinality = @limit 17 | end 18 | 19 | # Two limit steps are equal if they have the same value for the limit 20 | def ==(other) 21 | other.instance_of?(self.class) && @limit == other.limit 22 | end 23 | alias eql? == 24 | 25 | def hash 26 | @limit 27 | end 28 | 29 | # Check if we can apply a limit 30 | # @return [LimitPlanStep] 31 | def self.apply(_parent, state) 32 | # TODO: Apply if have IDs of the last entity set 33 | # with no filter/sort needed 34 | 35 | return nil if state.query.limit.nil? 36 | return nil unless state.answered? check_limit: false 37 | 38 | LimitPlanStep.new state.query.limit, state 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /templates/workload.erb: -------------------------------------------------------------------------------- 1 | <% foreign_keys = [] %> 2 | # rubocop:disable all 3 | 4 | workload = NoSE::Workload.new do 5 | # Define entities along with the size and cardinality of their fields 6 | # as well as an estimated number of each entity 7 | <% workload.model.entities.each_value do |entity| %> 8 | (Entity '<%= entity.name %>' do 9 | <% entity.fields.each_value do |field| %> 10 | <% 11 | if field.is_a? NoSE::Fields::ForeignKeyField 12 | foreign_keys << field 13 | next 14 | end 15 | %> 16 | <%= field.subtype_name name_case: :camel %> '<%= field.name %>',<%= 17 | case [field.class] 18 | when [NoSE::Fields::StringField] 19 | "#{field.size}, " 20 | else 21 | '' 22 | end 23 | %> count: <%= field.cardinality %> 24 | 25 | <% end %> 26 | end) * <%= entity.count %> 27 | 28 | 29 | <% end %> 30 | 31 | <% foreign_keys.each do |key| %> 32 | ForeignKey '<%= key.name %>', '<%= key.parent.name %>', '<%= key.entity.name %>', count: <%= key.cardinality %> 33 | 34 | <% end %> 35 | 36 | # Define queries and their relative weights 37 | <% workload.statement_weights.each do |statement, weight| %> 38 | Q '<%= statement.query %>', <%= weight %> 39 | 40 | <% end %> 41 | end 42 | # rubocop:enable all 43 | -------------------------------------------------------------------------------- /spec/loader/csv_loader_spec.rb: -------------------------------------------------------------------------------- 1 | require 'nose/loader/csv' 2 | 3 | module NoSE 4 | module Loader 5 | describe CsvLoader do 6 | include_context 'entities' 7 | include FakeFS::SpecHelpers 8 | 9 | before(:each) do 10 | FileUtils.mkdir_p '/tmp/csv' 11 | 12 | File.open '/tmp/csv/User.csv', 'w' do |file| 13 | file.puts <<-EOF.gsub(/^ {10}/, '') 14 | UserId,Username,City 15 | 1,Alice,Chicago 16 | EOF 17 | end 18 | end 19 | 20 | it 'can load data into a backend' do 21 | backend = instance_spy Backend::Backend 22 | allow(backend).to receive(:by_id_graph).and_return(false) 23 | 24 | index = Index.new [user['City']], [user['UserId']], 25 | [user['Username']], 26 | QueryGraph::Graph.from_path([user.id_field]) 27 | loader = CsvLoader.new workload, backend 28 | loader.load([index], directory: '/tmp/csv') 29 | 30 | expect(backend).to have_received(:index_insert_chunk).with( 31 | index, 32 | [{ 33 | 'User_UserId' => '1', 34 | 'User_Username' => 'Alice', 35 | 'User_City' => 'Chicago' 36 | }] 37 | ) 38 | end 39 | end 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /spec/schema_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | describe Schema do 3 | it 'can model a simple index' do 4 | schema = Schema.new do 5 | Model 'rubis' 6 | 7 | Index 'users_by_id' do 8 | Hash users.id 9 | Extra users['*'] 10 | Path users.id 11 | end 12 | end 13 | 14 | model = schema.model 15 | users = model.entities['users'] 16 | 17 | expect(schema.indexes.values).to match_array [ 18 | Index.new([users['id']], [], users.fields.values, 19 | QueryGraph::Graph.from_path([users['id']])) 20 | ] 21 | end 22 | 23 | it 'can model an index over multiple entities' do 24 | schema = Schema.new do 25 | Model 'rubis' 26 | 27 | Index 'user_region' do 28 | Hash users.id 29 | Ordered regions.id 30 | Extra regions.name 31 | Path users.id, users.region 32 | end 33 | end 34 | 35 | model = schema.model 36 | users = model.entities['users'] 37 | regions = model.entities['regions'] 38 | 39 | expect(schema.indexes.values).to match_array [ 40 | Index.new([users['id']], [regions['id']], [regions['name']], 41 | QueryGraph::Graph.from_path([users['id'], users['region']])) 42 | ] 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/nose/random/watts_strogatz.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Random 5 | # Generates a random graph using the Watts-Strogatz model 6 | class WattsStrogatzNetwork < Network 7 | def initialize(params = {}) 8 | super params 9 | 10 | @beta = params.fetch :beta, 0.5 11 | @node_degree = params.fetch :node_degree, 2 12 | @nodes = 0..(@nodes_nb - 1) 13 | 14 | @entities = @nodes.map do |node| 15 | create_entity node 16 | end 17 | 18 | build_initial_links 19 | rewire_links 20 | add_foreign_keys 21 | end 22 | 23 | private 24 | 25 | # Set up the initial links between all nodes 26 | # @return [void] 27 | def build_initial_links 28 | @nodes.each do |node| 29 | (@node_degree / 2).times do |i| 30 | add_link node, (node + i + 1) % @nodes_nb 31 | end 32 | end 33 | end 34 | 35 | # Rewire all links between nodes 36 | # @return [void] 37 | def rewire_links 38 | (@node_degree / 2).times do |i| 39 | @nodes.each do |node| 40 | next unless rand < @beta 41 | 42 | neighbour = (node + i + 1) % @nodes_nb 43 | remove_link node, neighbour 44 | add_link node, new_neighbour(node, neighbour) 45 | end 46 | end 47 | end 48 | end 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /spec/results_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Search 3 | describe Results do 4 | include_context 'entities' 5 | 6 | let(:problem) do 7 | OpenStruct.new( 8 | objective_type: Objective::COST, 9 | query_vars: [] 10 | ) 11 | end 12 | 13 | let(:results) do 14 | r = Results.new problem 15 | r.workload = workload 16 | r.enumerated_indexes = [] 17 | r.indexes = [] 18 | r.plans = [] 19 | r.update_plans = [] 20 | r.indexes = [] 21 | r.total_cost = 0 22 | r.total_size = 0 23 | 24 | r 25 | end 26 | 27 | it 'can ensure only enumerated indexes are used' do 28 | index = Index.new [tweet['TweetId']], [], [tweet['Body']], 29 | QueryGraph::Graph.from_path([tweet.id_field]) 30 | 31 | results.indexes = [index] 32 | expect { results.validate }.to \ 33 | raise_error InvalidResultsException 34 | end 35 | 36 | it 'checks for the correct cost objective value' do 37 | results.total_cost = 1 38 | expect { results.validate }.to raise_error InvalidResultsException 39 | end 40 | 41 | it 'checks for the correct size objective value' do 42 | problem.objective_type = Objective::SPACE 43 | results.total_size = 1 44 | expect { results.validate }.to raise_error InvalidResultsException 45 | end 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /lib/nose/debug.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # rubocop:disable Lint/HandleExceptions 3 | begin 4 | require 'binding_of_caller' 5 | require 'pry' 6 | rescue LoadError 7 | # Ignore in case we are not in development mode 8 | end 9 | # rubocop:enable Lint/HandleExceptions 10 | 11 | module NoSE 12 | # Various helpful debugging snippets 13 | module Debug 14 | # Convenience method to break in IndexLookupStep when 15 | # a particular set of indexes is reach when planning 16 | # @return [void] 17 | def self.break_on_indexes(*index_keys) 18 | apply = binding.of_caller(1) 19 | parent = apply.eval 'parent' 20 | index = apply.eval 'index' 21 | current_keys = parent.parent_steps.indexes.map(&:key) << index.key 22 | 23 | # rubocop:disable Lint/Debugger 24 | binding.pry if current_keys == index_keys 25 | # rubocop:enable Lint/Debugger 26 | end 27 | 28 | # Export entities in a model as global 29 | # variales for easier access when debugging 30 | # @return [void] 31 | def self.export_model(model) 32 | model.entities.each do |name, entity| 33 | # rubocop:disable Lint/Eval 34 | eval("$#{name} = entity") 35 | # rubocop:enable Lint/Eval 36 | 37 | entity.fields.merge(entity.foreign_keys).each do |field_name, field| 38 | entity.define_singleton_method field_name.to_sym, -> { field } 39 | end 40 | end 41 | 42 | nil 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/nose/random/barbasi_albert.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Random 5 | # Generates a random graph using the Barbasi-Albert model 6 | class BarbasiAlbertNetwork < Network 7 | def initialize(params = {}) 8 | super params 9 | 10 | # We assume for now that m0 = m = 2 11 | 12 | create_entities 13 | add_foreign_keys 14 | end 15 | 16 | private 17 | 18 | # Create all the entities in the graph and their connections 19 | # @return [void] 20 | def create_entities 21 | # Add the initial one or two entities 22 | @entities = [create_entity(0)] 23 | return if @nodes_nb == 1 24 | 25 | @entities << create_entity(1) 26 | add_link 0, 1 27 | return if @nodes_nb == 2 28 | 29 | @entities << create_entity(2) 30 | add_link 2, 0 31 | add_link 2, 1 32 | return if @nodes_nb == 3 33 | 34 | # Add and connect more entities as needed 35 | 3.upto(@nodes_nb - 1).each do |node| 36 | @entities << create_entity(node) 37 | pick = Pickup.new(0.upto(node - 1).to_a, 38 | key_func: ->(n) { n }, 39 | weight_func: ->(n) { @neighbours[n].size }, 40 | uniq: true) 41 | pick.pick(2).each do |node2| 42 | add_link node, node2 43 | end 44 | end 45 | end 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /lib/nose/plans/sort.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Plans 5 | # A query plan step performing external sort 6 | class SortPlanStep < PlanStep 7 | attr_reader :sort_fields 8 | 9 | def initialize(sort_fields, state = nil) 10 | super() 11 | 12 | @sort_fields = sort_fields 13 | @state = state 14 | end 15 | 16 | # :nocov: 17 | def to_color 18 | super + ' [' + @sort_fields.map(&:to_color).join(', ') + ']' 19 | end 20 | # :nocov: 21 | 22 | # Two sorting steps are equal if they sort on the same fields 23 | def ==(other) 24 | other.instance_of?(self.class) && @sort_fields == other.sort_fields 25 | end 26 | alias eql? == 27 | 28 | def hash 29 | @sort_fields.map(&:id).hash 30 | end 31 | 32 | # Check if an external sort can used (if a sort is the last step) 33 | # @return [SortPlanStep] 34 | def self.apply(parent, state) 35 | fetched_all_ids = state.fields.none? { |f| f.is_a? Fields::IDField } 36 | resolved_predicates = state.eq.empty? && state.range.nil? 37 | can_order = !(state.order_by.to_set & parent.fields).empty? 38 | return nil unless fetched_all_ids && resolved_predicates && can_order 39 | 40 | new_state = state.dup 41 | new_state.order_by = [] 42 | new_step = SortPlanStep.new(state.order_by, new_state) 43 | new_step.state.freeze 44 | 45 | new_step 46 | end 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /lib/nose/loader/random.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Loader 5 | # Load some random data (mostly useful for testing) 6 | class RandomLoader < LoaderBase 7 | def initialize(workload = nil, backend = nil) 8 | @logger = Logging.logger['nose::loader::randomloader'] 9 | 10 | @workload = workload 11 | @backend = backend 12 | end 13 | 14 | # Load a generated set of indexes with data from MySQL 15 | # @return [void] 16 | def load(indexes, config, show_progress = false, limit = nil, 17 | skip_existing = true) 18 | limit = 1 if limit.nil? 19 | 20 | indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph 21 | indexes.uniq.each do |index| 22 | load_index index, config, show_progress, limit, skip_existing 23 | end 24 | end 25 | 26 | private 27 | 28 | # Load a single index into the backend 29 | # @return [void] 30 | def load_index(index, _config, show_progress, limit, skip_existing) 31 | # Skip this index if it's not empty 32 | if skip_existing && !@backend.index_empty?(index) 33 | @logger.info "Skipping index #{index.inspect}" if show_progress 34 | return 35 | end 36 | @logger.info index.inspect if show_progress 37 | 38 | chunk = Array.new(limit) do 39 | Hash[index.all_fields.map do |field| 40 | [field.id, field.random_value] 41 | end] 42 | end 43 | 44 | @backend.index_insert_chunk index, chunk 45 | end 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /workloads/eac.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | NoSE::Workload.new do 4 | Model 'eac' 5 | 6 | # Server session exists 7 | Q 'SELECT Server.ServerID FROM Server WHERE ' \ 8 | 'Server.ServerID = ?', 3 9 | 10 | # Get sessions by GUID 11 | Q 'SELECT Session.SessionID FROM ' \ 12 | 'Session.player WHERE player.PlayerID = ?', 3 13 | 14 | # Get player session 15 | Q 'SELECT states.PosX, states.PosY, states.PosZ, ' \ 16 | 'states.ServerTimestamp FROM ' \ 17 | 'Server.sessions.states WHERE Server.ServerID = ? AND ' \ 18 | 'sessions.player.PlayerID = ? ORDER BY states.ServerTimestamp', 6 19 | 20 | # Get new data 21 | Q 'SELECT states.PosX, states.PosY, states.PosZ, ' \ 22 | 'states.ServerTimestamp, sessions.player.PlayerID FROM ' \ 23 | 'Server.sessions.states WHERE sessions.player.IsAdmin = 0 AND ' \ 24 | 'Server.ServerID = ? AND states.ServerTimestamp > ? AND ' \ 25 | 'states.ServerTimestamp <= ? ORDER BY states.ServerTimestamp', 6 26 | 27 | # Get server information 28 | Q 'SELECT Server.ServerName, Server.ServerIP FROM ' \ 29 | 'Server WHERE Server.ServerID = ?', 2 30 | 31 | # Add new player 32 | Q 'INSERT INTO Player SET PlayerID=?, PlayerName=?, PlayerFlags=?, ' \ 33 | 'IsAdmin=?', 4 34 | 35 | # Record new state 36 | Q 'INSERT INTO PlayerState SET StateID=?, PosX=?, PosY=?, PosZ=?, ' \ 37 | 'ClientTimestamp=?, ServerTimestamp=? AND CONNECT TO session(?)', 71 38 | 39 | Q 'INSERT INTO Session SET SessionID=?, TimeStarted=?, TimeEnded=? ' \ 40 | 'AND CONNECT TO server(?), player(?)', 4 41 | 42 | Q 'INSERT INTO Server SET ServerID=?, ServerIP=?, ' \ 43 | 'ServerName=?', 1 44 | end 45 | -------------------------------------------------------------------------------- /lib/nose/plans/update.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Plans 5 | # A superclass for steps which modify indexes 6 | class UpdatePlanStep < PlanStep 7 | attr_reader :index 8 | attr_accessor :state 9 | 10 | def initialize(index, type, state = nil) 11 | super() 12 | @index = index 13 | @type = type 14 | 15 | return if state.nil? 16 | @state = state.dup 17 | @state.freeze 18 | end 19 | 20 | # :nocov: 21 | def to_color 22 | "#{super} #{@index.to_color} * #{@state.cardinality}" 23 | end 24 | # :nocov: 25 | 26 | # Two insert steps are equal if they use the same index 27 | def ==(other) 28 | other.instance_of?(self.class) && @index == other.index && \ 29 | @type == other.instance_variable_get(:@type) 30 | end 31 | alias eql? == 32 | 33 | def hash 34 | [@index, @type].hash 35 | end 36 | end 37 | 38 | # A step which inserts data into a given index 39 | class InsertPlanStep < UpdatePlanStep 40 | attr_reader :fields 41 | 42 | def initialize(index, state = nil, fields = Set.new) 43 | super index, :insert, state 44 | @fields = if fields.empty? 45 | index.all_fields 46 | else 47 | fields.to_set & index.all_fields 48 | end 49 | @fields += index.hash_fields + index.order_fields.to_set 50 | end 51 | end 52 | 53 | # A step which deletes data into a given index 54 | class DeletePlanStep < UpdatePlanStep 55 | def initialize(index, state = nil) 56 | super index, :delete, state 57 | end 58 | end 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /spec/serialize_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Serialize 3 | describe EntityFieldRepresenter do 4 | include_context 'entities' 5 | 6 | it 'serializes a field and its properties' do 7 | field = EntityFieldRepresenter.represent(user['Username']).to_hash 8 | expect(field).to eq( 9 | 'name' => 'Username', 10 | 'size' => 10, 11 | 'cardinality' => 10, 12 | 'type' => 'string' 13 | ) 14 | end 15 | end 16 | 17 | describe FieldRepresenter do 18 | include_context 'entities' 19 | 20 | it 'serializes a field to a simple hash' do 21 | field = FieldRepresenter.represent(user['Username']).to_hash 22 | expect(field).to eq('name' => 'Username', 'parent' => 'User') 23 | end 24 | end 25 | 26 | describe IndexRepresenter do 27 | include_context 'entities' 28 | 29 | it 'serializes an index to a key' do 30 | index = Index.new [user['Username']], [user['UserId']], [], 31 | QueryGraph::Graph.from_path([user.id_field]), 32 | saved_key: 'IndexKey' 33 | hash = IndexRepresenter.represent(index).to_hash 34 | expect(hash).to eq('key' => 'IndexKey') 35 | end 36 | end 37 | 38 | describe EntityRepresenter do 39 | it 'serializes an empty entity' do 40 | entity = Entity.new('Foo') * 10 41 | hash = EntityRepresenter.represent(entity).to_hash 42 | expect(hash).to eq('name' => 'Foo', 'count' => 10, 'fields' => []) 43 | end 44 | end 45 | 46 | describe StatementRepresenter do 47 | include_context 'entities' 48 | 49 | it 'serializes queries to a string' do 50 | expect(StatementRepresenter.represent(query).to_hash).to eq(query.text) 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /spec/random_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Random 3 | shared_examples 'a network' do 4 | it 'has a default of 10 entities' do 5 | expect(network.entities).to have(10).items 6 | end 7 | 8 | it 'generates entities with IDs' do 9 | expect(network.entities).to all(satisfy do |entity| 10 | entity.fields.values.any? { |field| field.is_a? Fields::IDField } 11 | end) 12 | end 13 | 14 | it 'does not generate disconnected entities' do 15 | expect(network.entities).to all(satisfy do |entity| 16 | connected = !entity.foreign_keys.empty? 17 | connected ||= network.entities.any? do |other| 18 | other.foreign_keys.each_value.map(&:entity).include? entity 19 | end 20 | 21 | connected 22 | end) 23 | end 24 | end 25 | 26 | describe BarbasiAlbertNetwork do 27 | let(:network) { BarbasiAlbertNetwork.new } 28 | it_behaves_like 'a network' 29 | end 30 | 31 | describe WattsStrogatzNetwork do 32 | let(:network) { BarbasiAlbertNetwork.new } 33 | it_behaves_like 'a network' 34 | end 35 | 36 | describe StatementGenerator do 37 | include_context 'entities' 38 | 39 | subject(:sgen) { StatementGenerator.new(workload.model) } 40 | 41 | before(:each) { ::Random.srand 0 } 42 | 43 | it 'generates valid insertions' do 44 | expect(sgen.random_insert).to be_a Insert 45 | end 46 | 47 | it 'generates valid updates' do 48 | expect(sgen.random_update).to be_a Update 49 | end 50 | 51 | it 'generates valid deletions' do 52 | expect(sgen.random_delete).to be_a Delete 53 | end 54 | 55 | it 'generates valid queries' do 56 | expect(sgen.random_query).to be_a Query 57 | end 58 | 59 | it 'generates random graphs' do 60 | expect(sgen.random_graph(4)).to be_a QueryGraph::Graph 61 | end 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | 2 | name: CI 3 | on: 4 | push: 5 | branches: 6 | - '*' 7 | tags: 8 | - 'v*' 9 | pull_request: 10 | branches: 11 | - main 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | strategy: 16 | matrix: 17 | ruby-version: ['3.1.4', '3.2.3', '3.3.0'] 18 | services: 19 | mysql: 20 | image: mysql:5.6 21 | env: 22 | MYSQL_ALLOW_EMPTY_PASSWORD: 1 23 | MYSQL_DATABASE: nose 24 | ports: 25 | - 3306:3306 26 | options: >- 27 | --health-cmd="mysqladmin ping" 28 | --health-interval=10s 29 | --health-timeout=5s 30 | --health-retries=5 31 | mongo: 32 | image: mongo:5 33 | ports: 34 | - 27017:27017 35 | options: >- 36 | --health-cmd=mongo 37 | --health-interval=10s 38 | --health-timeout=5s 39 | --health-retries=5 40 | cassandra: 41 | image: cassandra:3 42 | ports: 43 | - 9042:9042 44 | options: >- 45 | --health-cmd="cqlsh --debug" 46 | --health-start-period=30s 47 | --health-interval=10s 48 | --health-timeout=5s 49 | --health-retries=5 50 | steps: 51 | - name: Install required packages 52 | run: sudo apt update && sudo apt-get install coinor-cbc coinor-libcbc-dev 53 | - uses: actions/checkout@v4 54 | - uses: ruby/setup-ruby@v1 55 | with: 56 | ruby-version: ${{ matrix.ruby-version }} 57 | bundler-cache: true 58 | - name: Populate MySQL 59 | run: mysql -h 127.0.0.1 -uroot -Dnose < spec/support/data/mysql.sql 60 | - name: Test 61 | run: | 62 | sed -i '/^--tag/d' .rspec 63 | bundle exec rspec 64 | - name: Upload coverage to Codecov 65 | uses: codecov/codecov-action@v4 66 | with: 67 | fail_ci_if_error: true 68 | token: ${{ secrets.CODECOV_TOKEN }} 69 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # This file was generated by the `rspec --init` command. Conventionally, all 2 | # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. 3 | # Require this file using `require "spec_helper"` to ensure that it is only 4 | # loaded once. 5 | # 6 | # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration 7 | 8 | require 'simplecov' 9 | SimpleCov.start 10 | 11 | require 'simplecov-cobertura' 12 | SimpleCov.formatter = SimpleCov::Formatter::CoberturaFormatter 13 | 14 | require 'rspec/collection_matchers' 15 | require 'fakefs/safe' 16 | require 'fakefs/spec_helpers' 17 | 18 | Dir['./spec/support/**/*.rb'].sort.each { |f| require f } 19 | 20 | RSpec.configure do |config| 21 | config.expect_with :rspec do |expectations| 22 | expectations.syntax = :expect 23 | expectations.include_chain_clauses_in_custom_matcher_descriptions = true 24 | end 25 | 26 | # rspec-mocks config goes here. You can use an alternate test double 27 | # library (such as bogus or mocha) by changing the `mock_with` option here. 28 | config.mock_with :rspec do |mocks| 29 | # Prevents you from mocking or stubbing a method that does not exist on 30 | # a real object. This is generally recommended, and will default to 31 | # `true` in RSpec 4. 32 | mocks.verify_partial_doubles = true 33 | end 34 | 35 | config.run_all_when_everything_filtered = true 36 | 37 | # Run specs in random order to surface order dependencies. If you find an 38 | # order dependency and want to debug it, you can fix the order by providing 39 | # the seed, which is printed after each run. 40 | # --seed 1234 41 | config.order = 'random' 42 | 43 | # Seed global randomization in this process using the `--seed` CLI option. 44 | # Setting this allows you to use `--seed` to deterministically reproduce 45 | # test failures related to randomization by passing the same `--seed` value 46 | # as the one that triggered the failure. 47 | Kernel.srand config.seed 48 | end 49 | 50 | require 'nose' 51 | 52 | # Disable parallelism for tests 53 | Parallel.instance_variable_set(:@processor_count, 0) 54 | -------------------------------------------------------------------------------- /plans/ebay.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | NoSE::Plans::ExecutionPlans.new do 4 | Schema 'ebay' 5 | 6 | Group 'GetUser' do 7 | Plan 'GetUser' do 8 | Select users['*'] 9 | Param users.UserID, :== 10 | Lookup 'users_by_id', [users.UserID, :==] 11 | end 12 | end 13 | 14 | Group 'GetItem' do 15 | Plan 'GetItem' do 16 | Select items['*'] 17 | Param items.ItemID, :== 18 | Lookup 'items_by_id', [items.ItemID, :==] 19 | end 20 | end 21 | 22 | Group 'GetUserLikes' do 23 | Plan 'GetUserLikes' do 24 | Select items['*'] 25 | Param users.UserID, :== 26 | Lookup 'likes_by_user', [users.UserID, :==] 27 | Lookup 'items_by_id', [items.ItemID, :==] 28 | end 29 | end 30 | 31 | Group 'GetItemLikes' do 32 | Plan 'GetItemLikes' do 33 | Select users['*'] 34 | Param items.ItemID, :== 35 | Lookup 'likes_by_item', [items.ItemID, :==] 36 | Lookup 'users_by_id', [users.UserID, :==] 37 | end 38 | end 39 | 40 | Group 'AddLike' do 41 | Plan 'AddItemLike' do 42 | Param items.ItemID, :== 43 | Param likes.LikeID, :== 44 | Param likes.LikedAt, :== 45 | Param users.UserID, :== 46 | Insert 'likes_by_item' 47 | end 48 | 49 | Plan 'AddUserLike' do 50 | Param users.UserID, :== 51 | Param likes.LikeID, :== 52 | Param likes.LikedAt, :== 53 | Param items.ItemID, :== 54 | Insert 'likes_by_user' 55 | end 56 | end 57 | 58 | Group 'AddUser' do 59 | Plan 'AddUser' do 60 | Param users.UserID, :== 61 | Param users.Name, :== 62 | Param users.Email, :== 63 | Insert 'users_by_id' 64 | end 65 | end 66 | 67 | Group 'AddItem' do 68 | Plan 'AddItem' do 69 | Param items.ItemID, :== 70 | Param items.Title, :== 71 | Param items.Desc, :== 72 | Insert 'items_by_id' 73 | end 74 | end 75 | 76 | Group 'UpdateItemTitle' do 77 | Plan 'UpdateItemTitle' do 78 | Param items.ItemID, :== 79 | Param items.Title, :== 80 | Insert 'items_by_id', items.ItemID, items.Title 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /lib/nose/cost.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Cost models for steps of backend statement excution 5 | module Cost 6 | # Cost model for a backend database 7 | class Cost 8 | include Listing 9 | include Supertype 10 | 11 | def initialize(**options) 12 | @options = options 13 | end 14 | 15 | # The cost of filtering intermediate results 16 | # @return [Integer] 17 | def filter_cost(_step) 18 | # Assume this has no cost and the cost is captured in the fact that we 19 | # have to retrieve more data earlier. All this does is skip records. 20 | 0 21 | end 22 | 23 | # The cost of limiting a result set 24 | # @return [Integer] 25 | def limit_cost(_step) 26 | # This is basically free since we just discard data 27 | 0 28 | end 29 | 30 | # The cost of sorting a set of results 31 | # @return [Integer] 32 | def sort_cost(_step) 33 | # TODO: Find some estimate of sort cost 34 | # This could be partially captured by the fact that sort + limit 35 | # effectively removes the limit 36 | 1 37 | end 38 | 39 | # The cost of performing a lookup via an index 40 | # @return [Integer] 41 | def index_lookup_cost(_step) 42 | fail NotImplementedError, 'Must be implemented in a subclass' 43 | end 44 | 45 | # The cost of performing a deletion from an index 46 | # @return [Integer] 47 | def delete_cost(_step) 48 | fail NotImplementedError, 'Must be implemented in a subclass' 49 | end 50 | 51 | # The cost of performing an insert into an index 52 | # @return [Integer] 53 | def insert_cost(_step) 54 | fail NotImplementedError, 'Must be implemented in a subclass' 55 | end 56 | 57 | # This is here for debugging purposes because we need a cost 58 | # @return [Integer] 59 | def pruned_cost(_step) 60 | 0 61 | end 62 | end 63 | end 64 | end 65 | 66 | require_relative 'cost/cassandra' 67 | require_relative 'cost/entity_count' 68 | require_relative 'cost/field_size' 69 | require_relative 'cost/request_count' 70 | -------------------------------------------------------------------------------- /spec/support/entities.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | RSpec.shared_examples 'entities' do 3 | let(:workload) do 4 | # rubocop:disable Lint/Void 5 | Workload.new do 6 | (Entity 'User' do 7 | ID 'UserId' 8 | String 'Username', 10 9 | String 'City', count: 5 10 | String 'Country' 11 | 12 | etc 13 | end) * 10 14 | 15 | (Entity 'Link' do 16 | ID 'LinkId' 17 | String 'URL' 18 | end) * 100 19 | 20 | (Entity 'Tweet' do 21 | ID 'TweetId' 22 | String 'Body', 140, count: 5 23 | Date 'Timestamp' 24 | Integer 'Retweets' 25 | end) * 1000 26 | 27 | HasOne 'User', 'Tweets', 28 | {'Tweet' => 'User'} 29 | 30 | HasOne 'Favourite', 'Favourited', 31 | {'User' => 'Tweet'} 32 | 33 | HasOne 'Link', 'Tweets', 34 | {'Tweet' => 'Link'} 35 | end 36 | end 37 | # rubocop:enable Lint/Void 38 | 39 | let(:tweet) { workload.model['Tweet'] } 40 | let(:user) { workload.model['User'] } 41 | let(:link) { workload.model['Link'] } 42 | let(:query) do 43 | Statement.parse 'SELECT Link.URL FROM Link.Tweets.User ' \ 44 | 'WHERE User.Username = ? LIMIT 5', workload.model 45 | end 46 | 47 | let(:index) do 48 | Index.new [user['Username']], 49 | [tweet['Timestamp'], user['UserId'], tweet['TweetId']], 50 | [tweet['Body']], 51 | QueryGraph::Graph.from_path([user.id_field, user['Tweets']]), 52 | saved_key: 'TweetIndex' 53 | end 54 | 55 | let(:users) do 56 | [{ 57 | 'User_UserId' => '18a9a155-c9c7-43b5-9ab0-5967c49f56e9', 58 | 'User_Username' => 'Bob' 59 | }] 60 | end 61 | 62 | let(:tweets) do 63 | [{ 64 | 'Tweet_Timestamp' => Time.now, 65 | 'Tweet_TweetId' => 'e2dee9ee-5297-4f91-a3f7-9dd169008407', 66 | 'Tweet_Body' => 'This is a test' 67 | }] 68 | end 69 | 70 | let(:links) do 71 | [{ 72 | 'Link_LinkId' => '4a5339d8-e619-4ad5-a1be-c0bbceb1cdab', 73 | 'Link_URL' => 'http://www.example.com/' 74 | }] 75 | end 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /schemas/rubis_baseline.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../lib/nose.rb' 4 | 5 | NoSE::Schema.new do 6 | Model 'rubis' 7 | 8 | SimpleIndex 'categories' 9 | SimpleIndex 'regions' 10 | SimpleIndex 'items' 11 | SimpleIndex 'comments' 12 | 13 | Index 'users_by_region' do 14 | Hash regions.id 15 | Ordered users.id 16 | Extra users.nickname 17 | Path regions.id, regions.users 18 | end 19 | 20 | Index 'users' do 21 | Hash users.id 22 | Ordered regions.id 23 | Extra users['*'] 24 | Path users.id, users.region 25 | end 26 | 27 | Index 'bids' do 28 | Hash bids.id 29 | Ordered users.id, items.id 30 | Extra bids['*'] 31 | Path users.id, users.bids, bids.item 32 | end 33 | 34 | Index 'buynow' do 35 | Hash buynow.id 36 | Ordered items.id 37 | Extra buynow['*'] 38 | Path buynow.id, buynow.item 39 | end 40 | 41 | Index 'all_categories' do 42 | Hash categories.dummy 43 | Ordered categories.id 44 | Path categories.id 45 | end 46 | 47 | Index 'all_regions' do 48 | Hash regions.dummy 49 | Ordered regions.id 50 | Path regions.id 51 | end 52 | 53 | Index 'bids_by_item' do 54 | Hash items.id 55 | Ordered bids.id 56 | Path items.id, items.bids 57 | end 58 | 59 | Index 'items_by_category' do 60 | Hash categories.id 61 | Ordered items.end_date, items.id 62 | Path categories.id, categories.items 63 | end 64 | 65 | Index 'items_by_region' do 66 | Hash regions.id 67 | Ordered categories.id, items.end_date, items.id, users.id 68 | Path regions.id, regions.users, users.items_sold, items.category 69 | end 70 | 71 | Index 'comments_by_user' do 72 | Hash users.id 73 | Ordered comments.id 74 | Path users.id, users.comments_received 75 | end 76 | 77 | Index 'user_items_sold' do 78 | Hash users.id 79 | Ordered items.end_date, items.id 80 | Path users.id, users.items_sold 81 | end 82 | 83 | Index 'buynow_by_user' do 84 | Hash users.id 85 | Ordered buynow.date, buynow.id 86 | Path users.id, users.bought_now 87 | end 88 | 89 | Index 'bids_by_user' do 90 | Hash users.id 91 | Ordered bids.date, bids.id 92 | Path users.id, users.bids 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /spec/cost_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Cost 3 | describe Cost do 4 | it 'should register all subclasses' do 5 | expect(Cost.subclasses).to have_key 'NoSE::Cost::RequestCountCost' 6 | expect(Cost.subclasses).to have_key 'NoSE::Cost::EntityCountCost' 7 | expect(Cost.subclasses).to have_key 'NoSE::Cost::FieldSizeCost' 8 | end 9 | end 10 | 11 | describe RequestCountCost do 12 | include_context 'entities' 13 | 14 | it 'is a type of cost' do 15 | expect(RequestCountCost.subtype_name).to eq 'request_count' 16 | end 17 | 18 | it 'counts a single request for a single step plan' do 19 | planner = Plans::QueryPlanner.new workload.model, 20 | [tweet.simple_index], subject 21 | plan = planner.min_plan \ 22 | Statement.parse 'SELECT Tweet.* FROM Tweet ' \ 23 | 'WHERE Tweet.TweetId = ?', workload.model 24 | expect(plan.cost).to eq 1 25 | end 26 | end 27 | 28 | describe EntityCountCost do 29 | include_context 'entities' 30 | 31 | it 'is a type of cost' do 32 | expect(EntityCountCost.subtype_name).to eq 'entity_count' 33 | end 34 | 35 | it 'counts multiple requests when multiple entities are selected' do 36 | query = Statement.parse 'SELECT Tweet.* FROM Tweet.User ' \ 37 | 'WHERE User.UserId = ?', workload.model 38 | planner = Plans::QueryPlanner.new workload.model, 39 | [query.materialize_view], subject 40 | plan = planner.min_plan query 41 | expect(plan.cost).to eq 100 42 | end 43 | end 44 | 45 | describe FieldSizeCost do 46 | include_context 'entities' 47 | 48 | it 'is a type of cost' do 49 | expect(FieldSizeCost.subtype_name).to eq 'field_size' 50 | end 51 | 52 | it 'measures the size of the selected data' do 53 | index = tweet.simple_index 54 | planner = Plans::QueryPlanner.new workload.model, [index], subject 55 | plan = planner.min_plan \ 56 | Statement.parse 'SELECT Tweet.* FROM Tweet ' \ 57 | 'WHERE Tweet.TweetId = ?', workload.model 58 | expect(plan.cost).to eq index.all_fields.sum_by(&:size) 59 | end 60 | end 61 | end 62 | end 63 | -------------------------------------------------------------------------------- /lib/nose/timing.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Tracks the runtime of various functions and outputs a measurement 5 | class Timer 6 | # Start tracking function runtime 7 | # @return [void] 8 | def self.enable 9 | traced = { 10 | IndexEnumerator => [ 11 | :indexes_for_workload, 12 | :support_indexes, 13 | :combine_indexes 14 | ], 15 | Search::Search => [ 16 | :query_costs, 17 | :update_costs, 18 | :search_overlap, 19 | :solve_mipper 20 | ], 21 | Search::Problem => [ 22 | :setup_model, 23 | :add_variables, 24 | :add_constraints, 25 | :define_objective, 26 | :total_cost, 27 | :add_update_costs, 28 | :total_size, 29 | :total_indexes, 30 | :solve 31 | ], 32 | MIPPeR::CbcModel => [ 33 | :add_constraints, 34 | :add_variables, 35 | :update, 36 | :optimize 37 | ] 38 | } 39 | @old_methods = Hash.new { |h, k| h[k] = {} } 40 | 41 | # Redefine each method to capture timing information on each call 42 | traced.each do |cls, methods| 43 | methods.each do |method| 44 | old_method = cls.instance_method(method) 45 | cls.send(:define_method, method) do |*args| 46 | $stderr.puts "#{cls}##{method}\tSTART" 47 | 48 | start = Time.now.utc 49 | result = old_method.bind(self).call(*args) 50 | elapsed = Time.now.utc - start 51 | 52 | # Allow a block to be called with the timing results 53 | yield cls, method, elapsed if block_given? 54 | 55 | $stderr.puts "#{cls}##{method}\tEND\t#{elapsed}" 56 | 57 | result 58 | end 59 | 60 | # Save a copy of the old method for later 61 | @old_methods[cls][method] = old_method 62 | end 63 | end 64 | end 65 | 66 | # Stop tracking function runtime 67 | # @return [void] 68 | def self.disable 69 | @old_methods.each do |cls, methods| 70 | methods.each do |method, old_method| 71 | cls.send(:define_method, method, old_method) 72 | end 73 | end 74 | 75 | # Remove the saved method definitions 76 | @old_methods.clear 77 | end 78 | end 79 | end 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NoSQL Schema Evaluator (NoSE) 2 | 3 | [![CI](https://github.com/michaelmior/NoSE/actions/workflows/ci.yml/badge.svg)](https://github.com/michaelmior/NoSE/actions/workflows/ci.yml) 4 | [![Depfu](https://badges.depfu.com/badges/69de42ee3415b077a040beadc8941f1e/overview.svg)](https://depfu.com/github/michaelmior/NoSE?project_id=6964) 5 | [![codecov](https://codecov.io/gh/michaelmior/NoSE/branch/main/graph/badge.svg?token=knALGf3kHn)](https://codecov.io/gh/michaelmior/NoSE) 6 | [![Docker Build Status](https://img.shields.io/docker/build/michaelmior/nose.svg)]() 7 | 8 | This is a work in progress tool to provide automated physical schema design for NoSQL data stores. 9 | NoSE is licensed under the [GPLv3 license](LICENSE.md). 10 | 11 | ## Getting Started 12 | 13 | If you want to quickly try NoSE, you can get a shell with all necessary dependencies using [Docker](https://www.docker.com/) as follows 14 | 15 | docker run --interactive --tty --rm michaelmior/nose /bin/bash 16 | 17 | For continued use, installing a development version of the NoSE CLI is more flexible. 18 | Instructions can be found in the [nose-cli](https://github.com/michaelmior/nose-cli) repository. 19 | 20 | ## Publications 21 | 22 | Mior, M.J.; Kenneth Salem; Ashraf Aboulnaga; Rui Liu, [NoSE: Schema Design for NoSQL Applications](https://www.researchgate.net/publication/296485511_NoSE_Schema_Design_for_NoSQL_Applications), in Data Engineering (ICDE), July 2017. 23 | 24 | Mior, M.J.; Kenneth Salem; Ashraf Aboulnaga; Rui Liu, [NoSE: Schema Design for NoSQL Applications](https://www.researchgate.net/publication/318126769_NoSE_Schema_Design_for_NoSQL_Applications), Transactions on Knowledge and Data Engineering, 16-20 May 2016. 25 | 26 | ![ACM DL Author-ize service](http://dl.acm.org/images/oa.gif) Michael J. Mior. 2014. [Automated schema design for NoSQL databases](http://dl.acm.org/authorize?N71145). In Proceedings of the 2014 SIGMOD PhD symposium (SIGMOD'14 PhD Symposium). ACM, New York, NY, USA, 41-45. 27 | 28 | ## Acknowledgements 29 | 30 | This work was supported by the Natural Sciences and Engineering Research Council of Canada ([NSERC](http://nserc.gc.ca)). 31 | 32 | [![NSERC](assets/NSERC-logo.png)](http://nserc.gc.ca) 33 | 34 | Hosting of [Coin-OR packages](https://packagecloud.io/michaelmior/coinor/) is generously provided by packagecloud. 35 | 36 | [![packagecloud](assets/packagecloud-logo.png)](https://packagecloud.io) 37 | -------------------------------------------------------------------------------- /schemas/rubis_expert.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../lib/nose.rb' 4 | 5 | NoSE::Schema.new do 6 | Model 'rubis' 7 | 8 | Index 'users_by_region' do 9 | Hash regions.id 10 | Ordered users.id 11 | Extra users.nickname 12 | Path regions.id, regions.users 13 | end 14 | 15 | Index 'user_data' do 16 | Hash users.id 17 | Ordered regions.id 18 | Extra users['*'], regions.name 19 | Path users.id, users.region 20 | end 21 | 22 | Index 'user_buynow' do 23 | Hash users.id 24 | Ordered buynow.date, buynow.id, items.id 25 | Extra buynow.qty 26 | Path users.id, users.bought_now, buynow.item 27 | end 28 | 29 | Index 'user_items_bid_on' do 30 | Hash users.id 31 | Ordered items.end_date, bids.id, items.id 32 | Extra bids.qty 33 | Path users.id, users.bids, bids.item 34 | end 35 | 36 | Index 'user_items_sold' do 37 | Hash users.id 38 | Ordered items.end_date, items.id 39 | Path users.id, users.items_sold 40 | end 41 | 42 | Index 'user_comments_received' do 43 | Hash users.id 44 | Ordered comments.id, items.id 45 | Extra comments['*'] 46 | Path users.id, users.comments_received, comments.item 47 | end 48 | 49 | Index 'commenter' do 50 | Hash comments.id 51 | Ordered users.id 52 | Extra users.nickname 53 | Path comments.id, comments.from_user 54 | end 55 | 56 | Index 'items_with_category' do 57 | Hash items.id 58 | Ordered categories.id 59 | Extra items['*'] 60 | Path items.id, items.category 61 | end 62 | 63 | Index 'item_bids' do 64 | Hash items.id 65 | Ordered bids.id, users.id 66 | Extra items.max_bid, users.nickname, bids.qty, bids.bid, bids.date 67 | Path items.id, items.bids, bids.user 68 | end 69 | 70 | Index 'items_by_category' do 71 | Hash categories.id 72 | Ordered items.end_date, items.id 73 | Path categories.id, categories.items 74 | end 75 | 76 | Index 'category_list' do 77 | Hash categories.dummy 78 | Ordered categories.id 79 | Extra categories.name 80 | Path categories.id 81 | end 82 | 83 | Index 'region_list' do 84 | Hash regions.dummy 85 | Ordered regions.id 86 | Extra regions.name 87 | Path regions.id 88 | end 89 | 90 | Index 'regions' do 91 | Hash regions.id 92 | Extra regions.name 93 | Path regions.id 94 | end 95 | end 96 | -------------------------------------------------------------------------------- /experiments/rubis/fake.js: -------------------------------------------------------------------------------- 1 | var Table = require('mysql-faker').Table, 2 | insert = require('mysql-faker').insert; 3 | 4 | var categories = (new Table('categories', 500)); 5 | categories.lorem_words('name', 2); 6 | 7 | var regions = (new Table('regions', 50)); 8 | regions.lorem_words('name', 2); 9 | 10 | var users = (new Table('users', 200000)); 11 | users.name_firstName('firstname') 12 | .name_lastName('lastname') 13 | .random_uuid('nickname') 14 | .internet_password('password') 15 | .internet_email('email') 16 | .random_number('rating', {min: -50, max: 200}) 17 | .finance_amount('balance') 18 | .date_past('creation_date') 19 | .random_number('region', {min: 1, max: regions.count}); 20 | 21 | var items = (new Table('items', 2000000)); 22 | items.lorem_words('name') 23 | .lorem_paragraph('description') 24 | .finance_amount('initial_price') 25 | .random_number('quantity', {min: 0, max: 10}) 26 | .finance_amount('reserve_price') 27 | .finance_amount('buy_now') 28 | .random_number('nb_of_bids', {min: 0, max: 100}) 29 | .finance_amount('max_bid') 30 | .date_past('start_date') 31 | .date_past('end_date') 32 | .random_number('seller', {min: 1, max: users.count}) 33 | .random_number('category', {min: 1, max: categories.count}); 34 | 35 | var bids = (new Table('bids', 20000000)); 36 | bids.random_number('qty', {min: 1, max: 5}) 37 | .finance_amount('bid') 38 | .finance_amount('max_bid') 39 | .date_past('date') 40 | .random_number('user', {min: 1, max: users.count}) 41 | .random_number('item', {min: 1, max: items.count}); 42 | 43 | var comments = (new Table('comments', 10000000)); 44 | comments.random_number('rating', {min: -5, max: 5}) 45 | .date_past('date') 46 | .lorem_sentences('comment') 47 | .random_number('from_user', {min: 1, max: users.count}) 48 | .random_number('to_user', {min: 1, max: users.count}) 49 | .random_number('item', {min: 1, max: items.count}); 50 | 51 | var buy_now = (new Table('buynow', 2000000)); 52 | buy_now.random_number('qty', {min: 1, max: 3}) 53 | .date_past('date') 54 | .random_number('buyer', {min: 1, max: users.count}) 55 | .random_number('item', {min: 1, max: users.count}); 56 | 57 | insert([ 58 | categories, 59 | regions, 60 | users, 61 | items, 62 | bids, 63 | comments, 64 | buy_now 65 | ], { 66 | host: 'localhost', 67 | user: 'root', 68 | password: 'root', 69 | database: 'rubis' 70 | }, true); 71 | -------------------------------------------------------------------------------- /lib/nose/schema.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Simple DSL for constructing indexes 5 | class Schema 6 | attr_reader :model, :indexes 7 | 8 | def initialize(&block) 9 | @indexes = {} 10 | instance_eval(&block) if block_given? 11 | end 12 | 13 | # Find the schema with the given name 14 | def self.load(name) 15 | filename = File.expand_path "../../../schemas/#{name}.rb", __FILE__ 16 | contents = File.read(filename) 17 | binding.eval contents, filename 18 | end 19 | 20 | # rubocop:disable MethodName 21 | 22 | # Set the model to be used by the schema 23 | # @return [void] 24 | def Model(name) 25 | @model = Model.load name 26 | NoSE::DSL.mixin_fields @model.entities, IndexDSL 27 | end 28 | 29 | # Add a simple index for an entity 30 | # @return [void] 31 | def SimpleIndex(entity) 32 | @indexes[entity] = @model[entity].simple_index 33 | end 34 | 35 | # Wrap commands for defining index attributes 36 | # @return [void] 37 | def Index(key, &block) 38 | # Apply the DSL 39 | dsl = IndexDSL.new(self) 40 | dsl.instance_eval(&block) if block_given? 41 | index = Index.new dsl.hash_fields, dsl.order_fields, dsl.extra, 42 | QueryGraph::Graph.from_path(dsl.path_keys), 43 | saved_key: key 44 | @indexes[index.key] = index 45 | end 46 | 47 | # rubocop:enable MethodName 48 | end 49 | 50 | # DSL for index creation within a schema 51 | class IndexDSL 52 | attr_reader :hash_fields, :order_fields, :extra, :path_keys 53 | 54 | def initialize(schema) 55 | @schema = schema 56 | @hash_fields = [] 57 | @order_fields = [] 58 | @extra = [] 59 | @path_keys = [] 60 | end 61 | 62 | # rubocop:disable MethodName 63 | 64 | # Define a list of hash fields 65 | # @return [void] 66 | def Hash(*fields) 67 | @hash_fields += fields.flatten 68 | end 69 | 70 | # Define a list of ordered fields 71 | # @return [void] 72 | def Ordered(*fields) 73 | @order_fields += fields.flatten 74 | end 75 | 76 | # Define a list of extra fields 77 | # @return [void] 78 | def Extra(*fields) 79 | @extra += fields.flatten 80 | end 81 | 82 | # Define the keys for the index path 83 | # @return [void] 84 | def Path(*keys) 85 | @path_keys += keys 86 | end 87 | 88 | # rubocop:enable MethodName 89 | end 90 | end 91 | -------------------------------------------------------------------------------- /models/rubis.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # rubocop:disable all 3 | 4 | NoSE::Model.new do 5 | # Define entities along with the size and cardinality of their fields 6 | # as well as an estimated number of each entity 7 | 8 | (Entity 'categories' do 9 | ID 'id' 10 | String 'name', 20 11 | Integer 'dummy', count: 1 12 | end) * 50 13 | 14 | (Entity 'regions' do 15 | ID 'id' 16 | String 'name', 25 17 | Integer 'dummy', count: 1 18 | end) * 5 19 | 20 | (Entity 'users' do 21 | ID 'id' 22 | String 'firstname', 6 23 | String 'lastname', 7 24 | String 'nickname', 12 25 | String 'password', 15 26 | String 'email', 23 27 | Integer 'rating', count: 50 28 | Float 'balance', count: 10_000 29 | Date 'creation_date' 30 | end) * 2_000 31 | 32 | (Entity 'items' do 33 | ID 'id' 34 | String 'name', 19 35 | String 'description', 197 36 | Float 'initial_price' 37 | Integer 'quantity', count: 100 38 | Float 'reserve_price' 39 | Float 'buy_now' 40 | Integer 'nb_of_bids', count: 100 41 | Float 'max_bid' 42 | Date 'start_date' 43 | Date 'end_date' 44 | end) * 20_000 45 | 46 | (Entity 'bids' do 47 | ID 'id' 48 | Integer 'qty', count: 5 49 | Float 'bid' 50 | Date 'date' 51 | end) * 200_000 52 | 53 | (Entity 'comments' do 54 | ID 'id' 55 | Integer 'rating', count: 10 56 | Date 'date' 57 | String 'comment', 130 58 | end) * 100_000 59 | 60 | (Entity 'buynow' do 61 | ID 'id' 62 | Integer 'qty', count: 4 63 | Date 'date' 64 | end) * 40_000 65 | 66 | HasOne 'region', 'users', 67 | {'users' => 'regions'} 68 | 69 | HasOne 'seller', 'items_sold', 70 | {'items' => 'users'} 71 | 72 | HasOne 'category', 'items', 73 | {'items' => 'categories'} 74 | 75 | HasOne 'user', 'bids', 76 | {'bids' => 'users'} 77 | 78 | HasOne 'item', 'bids', 79 | {'bids' => 'items'} 80 | 81 | HasOne 'from_user', 'comments_sent', 82 | {'comments' => 'users'} 83 | 84 | HasOne 'to_user', 'comments_received', 85 | {'comments' => 'users'} 86 | 87 | HasOne 'item', 'comments', 88 | {'comments' => 'items'} 89 | 90 | HasOne 'buyer', 'bought_now', 91 | {'buynow' => 'users'} 92 | 93 | HasOne 'item', 'bought_now', 94 | {'buynow' => 'items'} 95 | end 96 | 97 | # rubocop:enable all 98 | -------------------------------------------------------------------------------- /lib/nose/statements/delete.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # A representation of a delete in the workload 5 | class Delete < Statement 6 | include StatementConditions 7 | include StatementSupportQuery 8 | 9 | def initialize(params, text, group: nil, label: nil) 10 | super params, text, group: group, label: label 11 | 12 | populate_conditions params 13 | end 14 | 15 | # Build a new delete from a provided parse tree 16 | # @return [Delete] 17 | def self.parse(tree, params, text, group: nil, label: nil) 18 | conditions_from_tree tree, params 19 | 20 | Delete.new params, text, group: group, label: label 21 | end 22 | 23 | # Produce the SQL text corresponding to this delete 24 | # @return [String] 25 | def unparse 26 | delete = "DELETE #{entity.name} " 27 | delete += "FROM #{from_path @key_path}" 28 | delete << where_clause 29 | 30 | delete 31 | end 32 | 33 | def ==(other) 34 | other.is_a?(Delete) && 35 | @graph == other.graph && 36 | entity == other.entity && 37 | @conditions == other.conditions 38 | end 39 | alias eql? == 40 | 41 | def hash 42 | @hash ||= [@graph, entity, @conditions].hash 43 | end 44 | 45 | # Index contains the entity to be deleted 46 | def modifies_index?(index) 47 | index.graph.entities.include? entity 48 | end 49 | 50 | # Specifies that deletes require deletion 51 | def requires_delete?(_index) 52 | true 53 | end 54 | 55 | # Get the support queries for deleting from an index 56 | def support_queries(index) 57 | return [] unless modifies_index? index 58 | select = (index.hash_fields + index.order_fields.to_set) - 59 | @conditions.each_value.map(&:field).to_set 60 | return [] if select.empty? 61 | 62 | support_queries = [] 63 | 64 | # Build a support query which gets the IDs of the entities being deleted 65 | graph = @graph.dup 66 | support_fields = select.select do |field| 67 | field.parent == entity 68 | end.to_set 69 | support_fields << entity.id_field \ 70 | unless @conditions.each_value.map(&:field).include? entity.id_field 71 | conditions = Hash[@conditions.map { |k, v| [k.dup, v.dup] }] 72 | 73 | support_queries << build_support_query(entity, index, graph, 74 | support_fields, conditions) 75 | support_queries.compact + support_queries_for_entity(index, select) 76 | end 77 | 78 | # The condition fields are provided with the deletion 79 | def given_fields 80 | @conditions.each_value.map(&:field) 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /nose.gemspec: -------------------------------------------------------------------------------- 1 | Gem::Specification.new do |s| 2 | s.name = 'nose' 3 | s.version = '0.1.4' 4 | s.license = 'GPL-3.0' 5 | s.summary = 'Schema design for NoSQL applications' 6 | s.author = 'Michael Mior' 7 | s.email = 'mmior@uwaterloo.ca' 8 | s.files = Dir['lib/**/*'] + 9 | Dir['templates/*'] + 10 | Dir['models/*'] + 11 | Dir['workloads/*'] + 12 | Dir['plans/*'] + 13 | Dir['schemas/*'] + 14 | Dir['data/**/*'] 15 | s.homepage = 'https://michael.mior.ca/projects/NoSE/' 16 | 17 | s.add_dependency 'faker', '~> 2.16.0', '>= 1.7.0' 18 | s.add_dependency 'formatador', '~> 0.2.5' 19 | s.add_dependency 'json-schema', '~> 2.8.0', '>= 2.8.0' 20 | s.add_dependency 'logging', '>= 2.2', '< 2.4' 21 | s.add_dependency 'mipper', '~> 0.1.0' 22 | s.add_dependency 'parallel', '>= 1.20.1', '< 1.25.0' 23 | s.add_dependency 'parslet', '>= 1.8', '< 2.1' 24 | s.add_dependency 'pickup', '~> 0.0.11' 25 | s.add_dependency 'pry', '~> 0.13.1' 26 | s.add_dependency 'rake', '~> 12.3.2', '>= 12.0.0' 27 | s.add_dependency 'representable', '~> 3.2.0', '>= 3.0.0' 28 | s.add_dependency 'ruby-graphviz', '~> 1.2.2', '>= 1.2.0' 29 | s.add_dependency 'ruby-mysql', '~> 2.9.14', '>= 2.9.0' # for the proxy because it's pure Ruby 30 | s.add_dependency 'sequel', '>= 5.41', '< 5.80' 31 | s.add_dependency 'smarter_csv', '1.10.2' 32 | s.add_dependency 'sorted_set', '~> 1.0', '>= 1.0.3' 33 | 34 | # Required for Cassandra backend 35 | s.add_dependency 'cassandra-driver', '~> 3.2.5', '>= 3.1.0' 36 | 37 | # Required for MongoDB backend 38 | s.add_dependency 'mongo', '>= 2.14', '< 2.21' 39 | 40 | s.add_development_dependency 'fakefs', '~> 2.5.0' 41 | s.add_development_dependency 'memory_profiler', '~> 1.0.0' 42 | s.add_development_dependency 'pry-byebug', '~> 3.9.0' 43 | s.add_development_dependency 'pry-doc', '~> 1.5.0' 44 | s.add_development_dependency 'pry-stack_explorer', '~> 0.5.1' 45 | s.add_development_dependency 'ronn', '~> 0.7.3' 46 | s.add_development_dependency 'rspec', '~> 3.10.0' 47 | s.add_development_dependency 'rspec-core', '~> 3.10.0' 48 | s.add_development_dependency 'rspec-collection_matchers', '~> 1.2.0', '>= 1.1.0' 49 | s.add_development_dependency 'ruby-prof', '~> 1.4.2' 50 | s.add_development_dependency 'simplecov', '~> 0.22.0' 51 | s.add_development_dependency 'simplecov-cobertura', '~> 2.1' 52 | s.add_development_dependency 'yard', '~> 0.9.4' 53 | 54 | # Below for MRI only (TODO JRuby gemspec) 55 | s.add_dependency 'rbtree', '~> 0.4.2' # for more efficient SortedSet implementation 56 | s.add_dependency 'mysql2', '~> 0.5.2' # this is used for the loader for performance 57 | s.add_development_dependency 'pry-rescue', '~> 1.6.0' 58 | s.add_development_dependency 'binding_of_caller', '~> 0.8.0' 59 | end 60 | -------------------------------------------------------------------------------- /spec/backend_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Backend 3 | describe Backend::SortStatementStep do 4 | include_context 'entities' 5 | 6 | it 'can sort a list of results' do 7 | results = [ 8 | { 'User_Username' => 'Bob' }, 9 | { 'User_Username' => 'Alice' } 10 | ] 11 | step = Plans::SortPlanStep.new [user['Username']] 12 | 13 | step_class = Backend::SortStatementStep 14 | prepared = step_class.new nil, [], {}, step, nil, nil 15 | results = prepared.process nil, results 16 | 17 | expect(results).to eq [ 18 | { 'User_Username' => 'Alice' }, 19 | { 'User_Username' => 'Bob' } 20 | ] 21 | end 22 | end 23 | 24 | describe Backend::FilterStatementStep do 25 | include_context 'entities' 26 | 27 | it 'can filter results by an equality predicate' do 28 | results = [ 29 | { 'User_Username' => 'Alice' }, 30 | { 'User_Username' => 'Bob' } 31 | ] 32 | step = Plans::FilterPlanStep.new [user['Username']], nil 33 | query = Statement.parse 'SELECT User.* FROM User ' \ 34 | 'WHERE User.Username = "Bob"', workload.model 35 | 36 | step_class = Backend::FilterStatementStep 37 | prepared = step_class.new nil, [], {}, step, nil, nil 38 | results = prepared.process query.conditions, results 39 | 40 | expect(results).to eq [ 41 | { 'User_Username' => 'Bob' } 42 | ] 43 | end 44 | 45 | it 'can filter results by a range predicate' do 46 | results = [ 47 | { 'User_Username' => 'Alice' }, 48 | { 'User_Username' => 'Bob' } 49 | ] 50 | step = Plans::FilterPlanStep.new [], [user['Username']] 51 | query = Statement.parse 'SELECT User.* FROM User WHERE ' \ 52 | 'User.Username < "B" AND ' \ 53 | 'User.City = "New York"', workload.model 54 | 55 | step_class = Backend::FilterStatementStep 56 | prepared = step_class.new nil, [], {}, step, nil, nil 57 | results = prepared.process query.conditions, results 58 | 59 | expect(results).to eq [ 60 | { 'User_Username' => 'Alice' } 61 | ] 62 | end 63 | end 64 | 65 | describe Backend::FilterStatementStep do 66 | include_context 'entities' 67 | 68 | it 'can limit results' do 69 | results = [ 70 | { 'User_Username' => 'Alice' }, 71 | { 'User_Username' => 'Bob' } 72 | ] 73 | step = Plans::LimitPlanStep.new 1 74 | step_class = Backend::LimitStatementStep 75 | prepared = step_class.new nil, [], {}, step, nil, nil 76 | results = prepared.process({}, results) 77 | 78 | expect(results).to eq [ 79 | { 'User_Username' => 'Alice' } 80 | ] 81 | end 82 | end 83 | end 84 | end 85 | -------------------------------------------------------------------------------- /lib/nose/proxy.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Query processing proxies to transparently execute queries against a backend 5 | module Proxy 6 | # A proxy server to interpret our query language and implement query plans 7 | class ProxyBase 8 | attr_reader :logger 9 | def initialize(config, result, backend) 10 | @logger = Logging.logger['nose::proxy'] 11 | 12 | @result = result 13 | @backend = backend 14 | @config = config 15 | 16 | @continue = true 17 | end 18 | 19 | # Start the proxy server 20 | # @return [void] 21 | def start 22 | @logger.info "Starting server on port #{@config[:port]}" 23 | 24 | server_socket = TCPServer.new('127.0.0.1', @config[:port]) 25 | server_socket.listen(100) 26 | 27 | @read_sockets = [server_socket] 28 | @write_sockets = [] 29 | loop do 30 | break unless @continue && select_connection(server_socket) 31 | end 32 | end 33 | 34 | # @abstract Subclasses should process a new connection 35 | # on the given socket 36 | # :nocov: 37 | # @return [void] 38 | def handle_connection(_socket) 39 | fail NotImplementedError 40 | end 41 | # :nocov: 42 | 43 | # @abstract Subclasses should dispose of state associated with the socket 44 | # :nocov: 45 | # @return [void] 46 | def remove_connection(_socket) 47 | fail NotImplementedError 48 | end 49 | # :nocov: 50 | 51 | # Stop accepting connections 52 | # @return [void] 53 | def stop 54 | @continue = false 55 | end 56 | 57 | private 58 | 59 | # Select sockets which are available to be processed 60 | # @return [void] 61 | def select_connection(server_socket) 62 | read, write, error = IO.select @read_sockets, @write_sockets, 63 | @read_sockets + @write_sockets, 5 64 | return true if read.nil? 65 | 66 | # Check if we have a new incoming connection 67 | if read.include? server_socket 68 | accept_connection server_socket 69 | read.delete server_socket 70 | elsif error.include? server_socket 71 | @logger.error 'Server socket died' 72 | return false 73 | end 74 | 75 | # Remove all sockets which have errors 76 | error.each { |socket| remove_connection socket } 77 | @read_sockets -= error 78 | @write_sockets -= error 79 | 80 | # Handle connections on each available socket 81 | process_connections read + write 82 | end 83 | 84 | # Accept the new connection 85 | # @return [void] 86 | def accept_connection(server_socket) 87 | client_socket, = server_socket.accept 88 | @read_sockets << client_socket 89 | @write_sockets << client_socket 90 | end 91 | 92 | # Process all pending connections 93 | # @return [void] 94 | def process_connections(sockets) 95 | sockets.each do |socket| 96 | @write_sockets.delete socket 97 | @read_sockets.delete socket unless handle_connection socket 98 | end 99 | end 100 | end 101 | end 102 | end 103 | -------------------------------------------------------------------------------- /lib/nose/plans/filter.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Plans 5 | # A query plan performing a filter without an index 6 | class FilterPlanStep < PlanStep 7 | attr_reader :eq, :range 8 | 9 | def initialize(eq, range, state = nil) 10 | @eq = eq 11 | @range = range 12 | super() 13 | 14 | return if state.nil? 15 | @state = state.dup 16 | update_state 17 | @state.freeze 18 | end 19 | 20 | # Two filtering steps are equal if they filter on the same fields 21 | # @return [Boolean] 22 | def ==(other) 23 | other.instance_of?(self.class) && \ 24 | @eq == other.eq && @range == other.range 25 | end 26 | 27 | def hash 28 | [@eq.map(&:id), @range.nil? ? nil : @range.id].hash 29 | end 30 | 31 | # :nocov: 32 | def to_color 33 | "#{super} #{@eq.to_color} #{@range.to_color} " + 34 | begin 35 | "#{@parent.state.cardinality} " \ 36 | "-> #{state.cardinality}" 37 | rescue NoMethodError 38 | '' 39 | end 40 | end 41 | # :nocov: 42 | 43 | # Check if filtering can be done (we have all the necessary fields) 44 | def self.apply(parent, state) 45 | # Get fields and check for possible filtering 46 | filter_fields, eq_filter, range_filter = filter_fields parent, state 47 | return nil if filter_fields.empty? 48 | 49 | FilterPlanStep.new eq_filter, range_filter, state \ 50 | if required_fields?(filter_fields, parent) 51 | end 52 | 53 | # Get the fields we can possibly filter on 54 | def self.filter_fields(parent, state) 55 | eq_filter = state.eq.select { |field| parent.fields.include? field } 56 | filter_fields = eq_filter.dup 57 | if state.range && parent.fields.include?(state.range) 58 | range_filter = state.range 59 | filter_fields << range_filter 60 | else 61 | range_filter = nil 62 | end 63 | 64 | [filter_fields, eq_filter, range_filter] 65 | end 66 | private_class_method :filter_fields 67 | 68 | # Check that we have all the fields we are filtering 69 | # @return [Boolean] 70 | def self.required_fields?(filter_fields, parent) 71 | filter_fields.map do |field| 72 | next true if parent.fields.member? field 73 | 74 | # We can also filter if we have a foreign key 75 | # XXX for now we assume this value is the same 76 | next unless field.is_a? IDField 77 | parent.fields.any? do |pfield| 78 | pfield.is_a?(ForeignKeyField) && pfield.entity == field.parent 79 | end 80 | end.all? 81 | end 82 | private_class_method :required_fields? 83 | 84 | private 85 | 86 | # Apply the filters and perform a uniform estimate on the cardinality 87 | # @return [void] 88 | def update_state 89 | @state.eq -= @eq 90 | @state.cardinality *= @eq.map { |field| 1.0 / field.cardinality } \ 91 | .inject(1.0, &:*) 92 | return unless @range 93 | 94 | @state.range = nil 95 | @state.cardinality *= 0.1 96 | end 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/nose/statements/update.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # A representation of an update in the workload 5 | class Update < Statement 6 | include StatementConditions 7 | include StatementSettings 8 | include StatementSupportQuery 9 | 10 | def initialize(params, text, group: nil, label: nil) 11 | super params, text, group: group, label: label 12 | 13 | populate_conditions params 14 | @settings = params[:settings] 15 | end 16 | 17 | # Build a new update from a provided parse tree 18 | # @return [Update] 19 | def self.parse(tree, params, text, group: nil, label: nil) 20 | conditions_from_tree tree, params 21 | settings_from_tree tree, params 22 | 23 | Update.new params, text, group: group, label: label 24 | end 25 | 26 | # Produce the SQL text corresponding to this update 27 | # @return [String] 28 | def unparse 29 | update = "UPDATE #{entity.name} " 30 | update += "FROM #{from_path @key_path} " 31 | update << settings_clause 32 | update << where_clause 33 | 34 | update 35 | end 36 | 37 | def ==(other) 38 | other.is_a?(Update) && 39 | @graph == other.graph && 40 | entity == other.entity && 41 | @settings == other.settings && 42 | @conditions == other.conditions 43 | end 44 | alias eql? == 45 | 46 | def hash 47 | @hash ||= [@graph, entity, @settings, @conditions].hash 48 | end 49 | 50 | # Specifies that updates require insertion 51 | def requires_insert?(_index) 52 | true 53 | end 54 | 55 | # Specifies that updates require deletion 56 | def requires_delete?(index) 57 | !(settings.map(&:field).to_set & 58 | (index.hash_fields + index.order_fields.to_set)).empty? 59 | end 60 | 61 | # Get the support queries for updating an index 62 | # @return [Array] 63 | def support_queries(index) 64 | return [] unless modifies_index? index 65 | 66 | # Get the updated fields and check if an update is necessary 67 | set_fields = settings.map(&:field).to_set 68 | 69 | # We only need to fetch all the fields if we're updating a key 70 | updated_key = !(set_fields & 71 | (index.hash_fields + index.order_fields)).empty? 72 | 73 | select = if updated_key 74 | index.all_fields 75 | else 76 | index.hash_fields + index.order_fields 77 | end - set_fields - @conditions.each_value.map(&:field) 78 | return [] if select.empty? 79 | 80 | support_queries = [] 81 | 82 | graph = @graph.dup 83 | support_fields = select.select do |field| 84 | field.parent == entity 85 | end.to_set 86 | support_fields << entity.id_field \ 87 | unless @conditions.each_value.map(&:field).include? entity.id_field 88 | 89 | support_queries << build_support_query(entity, index, graph, 90 | support_fields, conditions) 91 | support_queries.compact + support_queries_for_entity(index, select) 92 | end 93 | 94 | # The condition fields are provided with the update 95 | # Note that we don't include the settings here because we 96 | # care about the previously existing values in the database 97 | def given_fields 98 | @conditions.each_value.map(&:field) 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /lib/nose/model.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative 'model/entity' 4 | require_relative 'model/fields' 5 | 6 | require 'graphviz' 7 | 8 | module NoSE 9 | # A conceptual data model of a set of entities 10 | class Model 11 | # The subdirectory models are loaded from 12 | LOAD_PATH = 'models' 13 | include Loader 14 | 15 | attr_reader :entities 16 | 17 | def initialize(&block) 18 | @entities = {} 19 | 20 | # Apply the DSL 21 | WorkloadDSL.new(self).instance_eval(&block) if block_given? 22 | end 23 | 24 | # Compare all entities 25 | # @return [Boolean] 26 | def ==(other) 27 | other.is_a?(Model) && @entities = other.entities 28 | end 29 | alias eql? == 30 | 31 | # Retrieve an entity by name 32 | # @return [Entity] 33 | def [](name) 34 | return @entities[name] if @entities.key? name 35 | fail EntityNotFound 36 | end 37 | 38 | # Add an {Entity} to the workload 39 | # @return [Entity] 40 | def add_entity(entity) 41 | fail InvalidEntity, 'no primary key defined' if entity.id_field.nil? 42 | @entities[entity.name] = entity 43 | end 44 | 45 | # Find a field given an +Enumerable+ of identifiers 46 | # @return [Field] 47 | def find_field(field) 48 | if field.count > 2 49 | find_field_chain field 50 | else 51 | find_entity_field(*field) 52 | end 53 | end 54 | 55 | # Output a PNG representation of entities in the model 56 | def output(format, filename, include_fields = false) 57 | graph = GraphViz.new :G, type: :digraph 58 | nodes = add_graph_nodes graph, include_fields 59 | add_graph_edges graph, nodes 60 | 61 | graph.output(**{ format => filename }) 62 | end 63 | 64 | private 65 | 66 | # Add the nodes (entities) to a GraphViz object 67 | def add_graph_nodes(graph, include_fields) 68 | Hash[@entities.each_value.map do |entity| 69 | label = "#{entity.name}\n" 70 | if include_fields 71 | label += entity.fields.each_value.map do |field| 72 | type = field.class.name.sub(/^NoSE::(.*?)(Field)?$/, '\1') 73 | "#{field.name}: #{type}" 74 | end.join("\n") 75 | end 76 | 77 | [entity.name, graph.add_nodes(label)] 78 | end] 79 | end 80 | 81 | # Add the edges (foreign keys) to a GraphViz object 82 | def add_graph_edges(graph, nodes) 83 | @entities.each_value do |entity| 84 | entity.foreign_keys.each_value do |key| 85 | graph.add_edges nodes[entity.name], nodes[key.entity.name] 86 | end 87 | end 88 | end 89 | 90 | # Find a field in an entity where the entity may be a string or an object 91 | def find_field_chain(field) 92 | # Do a foreign key lookup 93 | field = field.dup 94 | key_field = @entities[field[0]][field[1]] 95 | field[0..1] = key_field ? key_field.entity.name : field[1] 96 | find_field field 97 | end 98 | 99 | # Find a field in an entity where the entity may be a string or an object 100 | def find_entity_field(entity, field) 101 | entity = entities[entity] if entity.is_a?(String) 102 | entity[field] 103 | end 104 | end 105 | 106 | # Raised when looking up an entity in the workload which does not exist 107 | class EntityNotFound < StandardError 108 | end 109 | 110 | # Raised when attempting to add an invalid entity to a workload 111 | class InvalidEntity < StandardError 112 | end 113 | end 114 | -------------------------------------------------------------------------------- /workloads/rubis_synthetic.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | NoSE::Workload.new do 4 | Model 'rubis' 5 | 6 | # Define queries and their relative weights 7 | 8 | Q 'SELECT comments.date, comments.comment FROM comments.item ' \ 9 | 'WHERE item.id = ? ORDER BY comments.date' 10 | # 1. SELECT item_id as E_item, date as O_date, from_user_id, date, comment 11 | # FROM comments; 12 | # I2227598752 13 | 14 | Q 'SELECT users.id, users.nickname, users.rating FROM users.region ' \ 15 | 'WHERE region.id = ? ORDER BY users.rating LIMIT 50' 16 | # 2. SELECT region as E_region, rating as O_rating, id, nickname, rating 17 | # FROM users; 18 | # I1083340549 19 | 20 | Q 'SELECT items.id, items.name, items.description, items.max_bid ' \ 21 | 'FROM items.seller.region WHERE region.id = ? LIMIT 50' 22 | # 3. SELECT region as E_region, items.id, name, description, max_bid FROM 23 | # items join users on items.seller=users.id WHERE items.seller.region; 24 | # I4186334592 25 | 26 | Q 'SELECT comments.date, comments.comment FROM ' \ 27 | 'comments.item.seller.region WHERE item.quantity = ? AND region.id = ? ' \ 28 | 'LIMIT 50' 29 | # 4. SELECT category AS E_category, region as E_region, from_user_id, date, 30 | # comment FROM comments join items on comments.item_id=items.id join 31 | # users on items.seller=users.id; 32 | # I3254083673 33 | 34 | Q 'SELECT bids.bid, bids.date FROM bids.item.seller.region WHERE ' \ 35 | 'region.id = ? AND item.quantity = ? AND ' \ 36 | 'item.end_date < "2015-06-11T14:00:00-04:00"' 37 | # 5. SELECT region as E_region, category as E_category, 38 | # end_date as O_end_date, bids.id as O_id, bid, date FROM bids join 39 | # items on bids.item_id=items.id join users on items.seller=users.id 40 | # I1184534160 41 | 42 | Q 'SELECT comments.comment, comments.date FROM comments.item.seller ' \ 43 | 'WHERE seller.id = ?' 44 | # 6. SELECT seller AS E_seller, comments.id AS O_id, from_user_id, comment, 45 | # date FROM comments join items on comments.item_id=items.id; 46 | # I638854407 47 | 48 | Q 'SELECT items.id, items.name FROM items.category WHERE category.id = ? ' \ 49 | 'LIMIT 1000' 50 | # 7. SELECT category as E_category, id, name FROM items; 51 | # I3358488952 52 | 53 | Q 'SELECT comments.comment FROM comments.item.category ' \ 54 | 'WHERE category.id = ? ORDER BY comments.date LIMIT 100' 55 | # 8. SELECT category AS E_category, date AS O_date, comment FROM comments 56 | # join items ON comments.item_id=items.id; 57 | # I127205473 58 | 59 | # RegisterItem 60 | Q 'INSERT INTO items SET id=?, name=?, description=?, initial_price=?, ' \ 61 | 'quantity=?, reserve_price=?, buy_now=?, nb_of_bids=0, max_bid=0, ' \ 62 | 'start_date=?, end_date=?' 63 | Q 'CONNECT items(?) TO category(?)' 64 | Q 'CONNECT items(?) TO seller(?)' 65 | 66 | # RegisterUser 67 | Q 'INSERT INTO users SET id=?, firstname=?, lastname=?, nickname=?, ' \ 68 | 'password=?, email=?, rating=0, balance=0, creation_date=?' 69 | Q 'CONNECT users(?) TO region(?)' 70 | 71 | # StoreBid 72 | Q 'INSERT INTO bids SET id=?, qty=?, bid=?, date=?' 73 | Q 'CONNECT bids(?) TO item(?)' 74 | Q 'CONNECT bids(?) TO user(?)' 75 | Q 'SELECT items.nb_of_bids FROM items WHERE items.id=?' 76 | Q 'UPDATE items SET nb_of_bids=? WHERE items.id=?' 77 | 78 | # StoreComment 79 | Q 'UPDATE users SET rating=? WHERE users.id=?' 80 | Q 'INSERT INTO comments SET id=?, rating=?, date=?, comment=?' 81 | # Q 'CONNECT comments(?) TO to_user(?)' 82 | Q 'CONNECT comments(?) TO from_user(?)' 83 | Q 'CONNECT comments(?) TO item(?)' 84 | end 85 | -------------------------------------------------------------------------------- /spec/query_graph_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module QueryGraph 3 | describe QueryGraph do 4 | include_context 'entities' 5 | 6 | context 'when comparing with an isomorphic graph' do 7 | let(:graph) { Graph.new [], [user, tweet, user['Tweets']] } 8 | let(:graph2) { Graph.new [], [tweet, user, tweet['User']] } 9 | 10 | it 'compares equal with an isomorphic graph' do 11 | expect(graph).to eq graph2 12 | end 13 | 14 | it 'has the same unique edges' do 15 | expect(graph.unique_edges).to eq graph2.unique_edges 16 | end 17 | end 18 | 19 | context 'when producing subgraphs' do 20 | it 'produces only itself for a single entity graph' do 21 | graph = Graph.new([user]) 22 | expect(graph.subgraphs).to match_array [graph] 23 | end 24 | 25 | it 'produces single node graphs when splitting with two nodes' do 26 | graph = Graph.new [], [user, tweet, user['Tweets']] 27 | subgraphs = graph.subgraphs.to_a 28 | expect(subgraphs).to match_array [ 29 | graph, 30 | Graph.new([user]), 31 | Graph.new([tweet]) 32 | ] 33 | end 34 | 35 | it 'produces all paths when splitting a graph' do 36 | graph = Graph.new [], [user, tweet, user['Tweets']], 37 | [tweet, link, tweet['Link']] 38 | subgraphs = graph.subgraphs.to_a 39 | expect(subgraphs).to match_array [ 40 | graph, 41 | Graph.new([user]), 42 | Graph.new([tweet]), 43 | Graph.new([link]), 44 | Graph.new([], [user, tweet, user['Tweets']]), 45 | Graph.new([], [tweet, link, tweet['Link']]) 46 | ] 47 | end 48 | end 49 | 50 | context 'when converting to a path' do 51 | it 'can convert single node graphs' do 52 | graph = Graph.new [user] 53 | expect(graph.to_path(user)).to eq KeyPath.new([user.id_field]) 54 | end 55 | 56 | it 'can convert longer paths' do 57 | graph = Graph.new [], [user, tweet, user['Tweets']] 58 | expect(graph.to_path(user)).to eq KeyPath.new([user.id_field, 59 | user['Tweets']]) 60 | end 61 | end 62 | 63 | context 'when converting from a path' do 64 | it 'converts empty paths to empty graphs' do 65 | path = KeyPath.new 66 | expect(Graph.from_path(path)).to eq Graph.new 67 | end 68 | 69 | it 'converts single entity paths' do 70 | path = KeyPath.new [user.id_field] 71 | expect(Graph.from_path(path)).to eq Graph.new([user]) 72 | end 73 | 74 | it 'converts path with multiple entities' do 75 | path = KeyPath.new [user.id_field, user['Tweets']] 76 | expect(Graph.from_path(path)).to eq \ 77 | Graph.new([], [user, tweet, user['Tweets']]) 78 | end 79 | end 80 | 81 | it 'can find the longest path through a graph' do 82 | graph = Graph.new [], [user, tweet, user['Tweets']], 83 | [tweet, link, tweet['Link']] 84 | expect(graph.longest_path.entities).to eq([user, tweet, link]) 85 | end 86 | 87 | it 'can split graphs at a given entity' do 88 | graph = Graph.new [], [user, tweet, user['Tweets']], 89 | [tweet, link, tweet['Link']] 90 | expect(graph.split(tweet)).to eq [ 91 | Graph.new([user]), 92 | Graph.new([link]) 93 | ] 94 | end 95 | end 96 | end 97 | end 98 | -------------------------------------------------------------------------------- /spec/workload_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | describe Workload do 3 | subject(:workload) { Workload.new } 4 | let(:entity) { Entity.new('Foo') << field } 5 | let(:field) { Fields::IDField.new('Id') } 6 | 7 | before(:each) do 8 | workload.model.add_entity entity 9 | end 10 | 11 | context 'when adding items' do 12 | it 'holds entities' do 13 | expect(workload.model.entities).to have(1).item 14 | expect(workload.model['Foo']).to be entity 15 | end 16 | 17 | it 'automatically parses queries' do 18 | valid_query = Statement.parse 'SELECT Foo.Id FROM Foo ' \ 19 | 'WHERE Foo.Id = ?', workload.model 20 | workload.add_statement valid_query 21 | 22 | expect(workload.queries).to have(1).item 23 | expect(workload.queries.first).to be_a Query 24 | end 25 | 26 | it 'only accepts entities and queries' do 27 | expect { workload << 3 }.to raise_error TypeError 28 | end 29 | end 30 | 31 | it 'can find statements with a given tag' do 32 | query = Statement.parse 'SELECT Foo.Id FROM Foo WHERE Foo.Id = ? -- foo', 33 | workload.model 34 | workload.add_statement query 35 | 36 | expect(workload.find_with_tag 'foo').to eq(query) 37 | end 38 | 39 | it 'can find fields on entities from queries' do 40 | expect(workload.model.find_field %w(Foo Id)).to be field 41 | end 42 | 43 | it 'can find fields which traverse foreign keys' do 44 | other_entity = Entity.new 'Bar' 45 | other_field = Fields::IDField.new 'Quux' 46 | other_entity << other_field 47 | workload.model.add_entity other_entity 48 | 49 | entity << Fields::ForeignKeyField.new('Baz', other_entity) 50 | 51 | expect(workload.model.find_field %w(Foo Baz Quux)).to be other_field 52 | end 53 | 54 | it 'raises an exception for nonexistent entities' do 55 | expect { workload.model['Bar'] }.to raise_error EntityNotFound 56 | end 57 | 58 | it 'can produce an image of itself' do 59 | expect_any_instance_of(GraphViz).to \ 60 | receive(:output).with(png: '/tmp/rubis.png') 61 | workload.model.output :png, '/tmp/rubis.png' 62 | end 63 | 64 | it 'can remove updates' do 65 | entity << Fields::IntegerField.new('Bar') 66 | 67 | valid_query = Statement.parse 'SELECT Foo.Id FROM Foo WHERE Foo.Id = ?', 68 | workload.model 69 | workload.add_statement valid_query 70 | update = Statement.parse 'UPDATE Foo SET Bar = ? WHERE Foo.Id = ?', 71 | workload.model 72 | workload.add_statement update 73 | 74 | workload.remove_updates 75 | expect(workload.queries).not_to be_empty 76 | expect(workload.updates).to be_empty 77 | end 78 | 79 | it 'can group statements' do 80 | query1 = 'SELECT Foo.Bar FROM Foo WHERE Foo.Id = ?' 81 | query2 = 'SELECT Foo.Baz FROM Foo WHERE Foo.Id = ?' 82 | 83 | workload = Workload.new do 84 | Entity 'Foo' do 85 | ID 'Id' 86 | String 'Bar' 87 | String 'Baz' 88 | end 89 | 90 | Group 'Test1', 0.5 do 91 | Q query1 92 | end 93 | 94 | Group 'Test2', 0.5 do 95 | Q query2 96 | end 97 | end 98 | 99 | expect(workload.statement_weights).to eq( 100 | Statement.parse(query1, workload.model) => 0.5, 101 | Statement.parse(query2, workload.model) => 0.5 102 | ) 103 | end 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /lib/nose/loader/sql.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'sequel' 4 | 5 | module NoSE 6 | module Loader 7 | # Load data from a MySQL database into a backend 8 | class SqlLoader < LoaderBase 9 | def initialize(workload = nil, backend = nil) 10 | @logger = Logging.logger['nose::loader::sqlloader'] 11 | 12 | @workload = workload 13 | @backend = backend 14 | end 15 | 16 | # Load a generated set of indexes with data from MySQL 17 | def load(indexes, config, show_progress = false, limit = nil, 18 | skip_existing = true) 19 | indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph 20 | 21 | # XXX Assuming backend is thread-safe 22 | Parallel.each(indexes, in_threads: 2) do |index| 23 | client = new_client config 24 | 25 | # Skip this index if it's not empty 26 | if skip_existing && !@backend.index_empty?(index) 27 | @logger.info "Skipping index #{index.inspect}" if show_progress 28 | next 29 | end 30 | @logger.info index.inspect if show_progress 31 | 32 | query = index_sql client, index, limit 33 | 34 | result_chunk = [] 35 | query.each do |result| 36 | result = Hash[result.map { |k, v| [k.to_s, v] }] 37 | result_chunk.push result 38 | if result_chunk.length >= 100 39 | @backend.index_insert_chunk index, result_chunk 40 | result_chunk = [] 41 | end 42 | end 43 | @backend.index_insert_chunk index, result_chunk \ 44 | unless result_chunk.empty? 45 | end 46 | end 47 | 48 | private 49 | 50 | # Create a new client from the given configuration 51 | def new_client(config) 52 | Sequel.connect config[:uri] 53 | end 54 | 55 | # Get all the fields selected by this index 56 | # @return [Array] 57 | def index_sql_select(index) 58 | fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a 59 | 60 | fields.map do |field| 61 | "#{field.parent.name}__#{field.name}___" \ 62 | "#{field.parent.name}_#{field.name}".to_sym 63 | end 64 | end 65 | 66 | # Get the list of tables along with the join condition 67 | # for a query to fetch index data 68 | def index_sql_tables(index) 69 | # Create JOIN statements 70 | tables = index.graph.entities.map { |entity| entity.name.to_sym } 71 | return [tables, []] if index.graph.size == 1 72 | 73 | keys = index.path.each_cons(2).map do |_prev_key, key| 74 | is_many = key.relationship == :many 75 | key = key.reverse if is_many 76 | fields = [key.entity.id_field.name.to_sym, key.name.to_sym] 77 | fields = fields.reverse if is_many 78 | Hash[[fields]] 79 | end 80 | 81 | [tables, keys] 82 | end 83 | 84 | # Construct a SQL statement to fetch the data to populate this index 85 | def index_sql(client, index, limit = nil) 86 | # Get all the necessary fields 87 | select = index_sql_select index 88 | 89 | # Construct the join condition 90 | tables, keys = index_sql_tables index 91 | 92 | query = client[tables.first] 93 | keys.map.with_index do |key, i| 94 | query = query.join tables[i + 1], key 95 | end 96 | 97 | query = query.select(*select) 98 | query = query.limit limit unless limit.nil? 99 | 100 | @logger.debug { query.sql } 101 | query 102 | end 103 | end 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /experiments/rubis/README.md: -------------------------------------------------------------------------------- 1 | # RUBiS Experiments 2 | 3 | This directory contains instructions and various scripts for running a performance analysis on different RUBiS schemas. 4 | Currently these experiments are run against the Cassandra backend using the MySQL loader to populate the column families. 5 | You will need to configure a Cassandra cluster with a keyspace named `rubis` and a MySQL cluster with a database named `rubis`. 6 | Once this is done, initialize `nose.yml` in the root of the repository with the configuration below. 7 | Note that you will need to edit the configuration with the correct connection information for Cassandra and MySQL. 8 | 9 | ```yaml 10 | backend: 11 | name: cassandra 12 | hosts: 13 | - 10.0.0.2 14 | port: 9042 15 | keyspace: rubis 16 | cost_model: 17 | name: cassandra 18 | 19 | index_cost: 0.0078395645 20 | partition_cost: 0.0013692786 21 | row_cost: 1.17093638386496e-005 22 | delete_cost: 0.0013287903 23 | insert_cost: 0.013329108 24 | loader: 25 | name: mysql 26 | directory: /tmp/csv 27 | host: 127.0.0.1 28 | database: rubis 29 | username: root 30 | password: root 31 | ``` 32 | 33 | First create the RUBiS schema in MySQL. 34 | 35 | mysql -uroot -proot -Drubis < rubis-schema.sql 36 | mysql -uroot -proot -Drubis < rubis-update.sql 37 | 38 | To populate the MySQL database with some test data, we use the [mysql-faker](https://www.npmjs.com/package/mysql-faker) Node.js package. 39 | This package does not use the MySQL configuration in `nose.yml` so it may need to be manually edited. 40 | Next, install mysql-faker and populate the database. 41 | 42 | npm install 43 | node fake.js 44 | 45 | Once this script finishes, we are ready to load data in Cassandra. 46 | At this point, you can use either one of the manually-defined schemas, `rubis_baseline` or `rubis_expert` or use a JSON results file output by `nose search`. 47 | We refer to the choice of schema to use as `SCHEMA` for the remainder of the instructions. 48 | Now we can create the Cassandra column families and load the data from MySQL. 49 | This step may take several hours to complete. 50 | 51 | bundle exec nose create SCHEMA 52 | bundle exec nose load SCHEMA 53 | 54 | Since the experiments are destructive (i.e. they modify data in the database), it's a good idea to [take a snapshot](https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_backup_restore_c.html) before continuing. 55 | Finally, experiments can be run using `nose execute` for a manually-defined schema or `nose benchmark` for a schema generated with `nose search`. 56 | 57 | ## Running multiple experiments 58 | 59 | As mentioned above, experiments are destructive since updates modify the populated data. 60 | The easiest way to run multiple experiments is to take a snapshot after populating the data but before running the first experiment. 61 | 62 | nodetool snapshot rubis -t SNAPSHOT_NAME 63 | 64 | The script below will restore the snapshot at which point you will be ready to run another experiment. 65 | Be sure to replace `SCHEMA` and `SNAPSHOT_NAME` with the appropriate values. 66 | 67 | ```bash 68 | # Drop and recreate all tables 69 | for cf in $(cqlsh 10.0.0.2 -k rubis -f <(echo 'DESCRIBE COLUMNFAMILIES') | tr ' ' '\n' | grep -Ev '^$'); do 70 | cqlsh 10.0.0.2 -k rubis -f <(echo "DROP TABLE $cf;") 71 | done 72 | 73 | bundle exec nose create SCHEMA 74 | 75 | # Restore snapshot 76 | for ssdir in $(find /ssd1/mmior/cassandra/data/rubis_big/ -wholename '*/snapshots/SNAPSHOT_NAME' -type d); do 77 | for file in $(find "$ssdir/" -type f | rev | cut -d/ -f1 | rev); do 78 | sudo ln "$ssdir/$file" "$ssdir/../../$file" 79 | done 80 | done 81 | 82 | # Refresh column families 83 | for cf in $(cqlsh 10.0.0.2 -k rubis -f <(echo 'DESCRIBE COLUMNFAMILIES') | tr ' ' '\n' | grep -Ev '^$'); do 84 | nodetool refresh rubis $cf 85 | done 86 | ``` 87 | -------------------------------------------------------------------------------- /lib/nose/loader/csv.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'formatador' 4 | require 'smarter_csv' 5 | require 'zlib' 6 | 7 | module NoSE 8 | module Loader 9 | # Load data into an index from a set of CSV files 10 | class CsvLoader < LoaderBase 11 | def initialize(workload = nil, backend = nil) 12 | super 13 | 14 | @logger = Logging.logger['nose::loader::csvloader'] 15 | end 16 | 17 | # Load data for all the indexes 18 | def load(indexes, config, show_progress = false, limit = nil, 19 | skip_existing = true) 20 | indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph 21 | 22 | simple_indexes = find_simple_indexes indexes, skip_existing 23 | simple_indexes.each do |entity, simple_index_list| 24 | filename = File.join config[:directory], "#{entity.name}.csv" 25 | total_rows = (limit || 0) - 1 # account for header row 26 | File.open(filename) { |file| file.each_line { total_rows += 1 } } 27 | 28 | progress = initialize_progress entity, simple_index_list, 29 | total_rows if show_progress 30 | load_file_indexes filename, entity, simple_index_list, progress 31 | end 32 | end 33 | 34 | private 35 | 36 | # Find the simple indexes we should populate 37 | # @return [Hash] 38 | def find_simple_indexes(indexes, skip_existing) 39 | simple_indexes = indexes.select do |index| 40 | index.graph.size == 1 && 41 | !(skip_existing && !@backend.index_empty?(index)) 42 | end 43 | 44 | simple_indexes.group_by do |index| 45 | index.hash_fields.first.parent 46 | end 47 | end 48 | 49 | # Initialize a progress bar to reporting loading results 50 | # @return [Formatador::ProgressBar] 51 | def initialize_progress(entity, simple_index_list, total_rows) 52 | @logger.info "Loading simple indexes for #{entity.name}" 53 | @logger.info simple_index_list.map(&:key).join(', ') 54 | 55 | Formatador.new.redisplay_progressbar 0, total_rows 56 | Formatador::ProgressBar.new total_rows, started_at: Time.now.utc 57 | end 58 | 59 | # Load all indexes for a given file 60 | # @return [void] 61 | def load_file_indexes(filename, entity, simple_index_list, progress) 62 | SmarterCSV.process(filename, 63 | downcase_header: false, 64 | chunk_size: 1000, 65 | convert_values_to_numeric: false) do |chunk| 66 | Parallel.each(chunk.each_slice(100), 67 | finish: (lambda do |_, _, _| 68 | next if progress.nil? 69 | inc = [progress.total - progress.current, 100].min 70 | progress.increment inc 71 | end)) do |minichunk| 72 | load_simple_chunk minichunk, entity, simple_index_list 73 | end 74 | end 75 | end 76 | 77 | # Load a chunk of data from a simple entity index 78 | # @return [void] 79 | def load_simple_chunk(chunk, entity, indexes) 80 | # Prefix all hash keys with the entity name and convert values 81 | chunk.map! do |row| 82 | index_row = {} 83 | row.each_key do |key| 84 | field_class = entity[key.to_s].class 85 | value = field_class.value_from_string row[key] 86 | index_row["#{entity.name}_#{key}"] = value 87 | end 88 | 89 | index_row 90 | end 91 | 92 | # Insert the batch into the index 93 | indexes.each do |index| 94 | @backend.index_insert_chunk index, chunk 95 | end 96 | end 97 | end 98 | end 99 | end 100 | -------------------------------------------------------------------------------- /lib/nose/model/entity.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # A representation of an object in the conceptual data model 5 | class Entity 6 | attr_reader :fields 7 | attr_reader :foreign_keys, :name 8 | attr_accessor :count 9 | 10 | def initialize(name, &block) 11 | @name = name 12 | @fields = {} 13 | @foreign_keys = {} 14 | @count = 1 15 | 16 | # Precompute the hash 17 | hash 18 | 19 | # Apply the DSL 20 | EntityDSL.new(self).instance_eval(&block) if block_given? 21 | end 22 | 23 | # :nocov: 24 | # @return [String] 25 | def to_color 26 | "[light_blue]#{@name}[/] [#{fields.each_key.map(&:to_color).join ', '}]" 27 | end 28 | # :nocov: 29 | 30 | # Compare by name 31 | # @return [Boolean] 32 | def ==(other) 33 | @name == other.instance_variable_get(:@name) 34 | end 35 | alias eql? == 36 | 37 | # The hash is based on the name of the entity and its fields 38 | # @return [Integer] 39 | def hash 40 | @hash ||= @name.hash 41 | end 42 | 43 | # Get the key fields for the entity 44 | # @return [Fields::IDField>] 45 | def id_field 46 | fields.each_value.find(&:primary_key?) 47 | end 48 | 49 | # Adds a {Fields::Field} to the entity 50 | # @return [self] the current entity to allow chaining 51 | def <<(field, freeze: true) 52 | if field.is_a? Fields::ForeignKeyField 53 | @foreign_keys[field.name] = field 54 | else 55 | @fields[field.name] = field 56 | end 57 | 58 | field.instance_variable_set(:@parent, self) 59 | field.hash 60 | field.freeze if freeze 61 | 62 | self 63 | end 64 | 65 | # Shortcut for {#count=} 66 | # @return [Entity] 67 | def *(other) 68 | fail TypeError, 'count must be an integer' unless other.is_a? Integer 69 | @count = other 70 | 71 | self 72 | end 73 | 74 | # Get the field on the entity with the given name 75 | # @return [Field] 76 | def [](field_name) 77 | field = @fields[field_name] || @foreign_keys[field_name] 78 | fail FieldNotFound if field.nil? 79 | field 80 | end 81 | 82 | # Return true if the entity contains a field with the given name 83 | def field?(field) 84 | @fields.key? field 85 | end 86 | 87 | # Generate a hash with random values for fields in the entity 88 | # @return [Hash] 89 | def random_entity(prefix_entity = true) 90 | Hash[@fields.map do |name, field| 91 | key = name 92 | key = "#{@name}_#{name}" if prefix_entity 93 | [key, field.random_value] 94 | end] 95 | end 96 | end 97 | 98 | # A helper class for DSL creation to avoid messing with {Entity} 99 | class EntityDSL 100 | def initialize(entity) 101 | @entity = entity 102 | end 103 | 104 | # rubocop:disable MethodName 105 | 106 | # Specify a list of field names for the primary key 107 | def PrimaryKey(*names) 108 | # Unset the old keys and set new ones, 109 | # we dup because the fields are frozen 110 | @entity.fields.each_value do |field| 111 | next unless field.primary_key? 112 | field = field.dup 113 | field.primary_key = false 114 | @entity.fields[field.name] = field 115 | field.freeze 116 | end 117 | 118 | names.each do |name| 119 | field = @entity[name].dup 120 | field.primary_key = true 121 | @entity.fields[name] = field 122 | field.freeze 123 | end 124 | end 125 | 126 | # rubocop:enable MethodName 127 | 128 | def etc(size = 1) 129 | @entity << Fields::HashField.new('**', size) 130 | end 131 | end 132 | 133 | # Raised when looking up a field on an entity which does not exist 134 | class FieldNotFound < StandardError 135 | end 136 | end 137 | -------------------------------------------------------------------------------- /spec/support/backend.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | RSpec.shared_examples 'backend processing' do |tag| 3 | let(:plans) { Plans::ExecutionPlans.load 'ebay' } 4 | 5 | # Insert a new entity for testing purposes 6 | def direct_insert(index_key, values) 7 | backend.indexes_ddl(true, true, true).to_a 8 | 9 | index = plans.schema.indexes[index_key] 10 | index = index.to_id_graph if backend.by_id_graph 11 | inserted_ids = backend.index_insert_chunk index, [values] 12 | inserted_ids.first 13 | end 14 | 15 | # Get a record from a particular index 16 | # @return [Hash] 17 | def direct_query(index_key) 18 | index = plans.schema.indexes[index_key] 19 | index = index.to_id_graph if backend.by_id_graph 20 | 21 | backend.index_sample(index, 1).first 22 | end 23 | 24 | # Execute an insert statement against the backend 25 | # @return [void] 26 | def insert(group, values) 27 | modify group, values, {} 28 | end 29 | 30 | # Execute an update statement against the backend 31 | # @return [void] 32 | def update(group, settings, conditions) 33 | modify group, settings, conditions 34 | end 35 | 36 | # Execute a modification statement with 37 | # the given settings and conditions 38 | # @return [void] 39 | def modify(group, settings, conditions) 40 | backend.indexes_ddl(true, true, true).to_a 41 | 42 | update_plans = plans.groups[group] 43 | 44 | update_plans.each do |plan| 45 | # Decide which fields should be set 46 | plan_settings = settings.map do |field_id, value| 47 | field = plan.index.all_fields.find { |f| f.id == field_id } 48 | FieldSetting.new field, value 49 | end 50 | 51 | # Generate any missing IDs 52 | (plan.index.hash_fields + plan.index.order_fields).each do |field| 53 | setting = plan_settings.find { |s| s.field == field } 54 | next unless setting.nil? 55 | 56 | plan_settings << FieldSetting.new(field, backend.generate_id) \ 57 | if field.is_a? Fields::IDField 58 | end 59 | 60 | # Build the list of conditions 61 | plan_conditions = Hash[conditions.map do |field_id, value| 62 | field = plan.index.all_fields.find { |f| f.id == field_id } 63 | [field_id, Condition.new(field, :'=', value)] 64 | end] 65 | 66 | prepared = backend.prepare_update nil, [plan] 67 | prepared.each { |p| p.execute plan_settings, plan_conditions } 68 | end 69 | end 70 | 71 | # Execute a query against the backend and return the results 72 | # @return [Hash] 73 | def query(group, values) 74 | plan = plans.groups[group].first 75 | prepared = backend.prepare_query nil, plan.select_fields, plan.params, 76 | [plan.steps] 77 | 78 | prepared.execute Hash[values.map do |k, v| 79 | condition = plan.params[k] 80 | condition.instance_variable_set :@value, v 81 | [k, condition] 82 | end] 83 | end 84 | 85 | it 'can query for inserted entities', tag do 86 | id = direct_insert 'items_by_id', 'items_Title' => 'Foo', 87 | 'items_Desc' => 'A thing' 88 | id = id.first if id.is_a? Array 89 | 90 | result = query 'GetItem', 'items_ItemID' => id 91 | expect(result).to have(1).item 92 | expect(result.first['items_Title']).to eq('Foo') 93 | end 94 | 95 | it 'can insert new entities', tag do 96 | insert 'AddItem', 'items_Title' => 'Foo', 'items_Desc' => 'A thing' 97 | 98 | result = direct_query 'items_by_id' 99 | expect(result).to include 'items_Title' => 'Foo' 100 | end 101 | 102 | it 'can update entities', tag do 103 | id = direct_insert 'items_by_id', 'items_Title' => 'Foo', 104 | 'items_Desc' => 'A thing' 105 | id = id.first if id.is_a? Array 106 | 107 | update 'UpdateItemTitle', 108 | { 'items_Title' => 'Bar' }, 109 | 'items_ItemID' => id 110 | 111 | result = direct_query 'items_by_id' 112 | expect(result).to include 'items_Title' => 'Bar' 113 | end 114 | end 115 | end 116 | -------------------------------------------------------------------------------- /spec/enumerator_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | describe IndexEnumerator do 3 | include_context 'entities' 4 | 5 | subject(:enum) { IndexEnumerator.new workload } 6 | 7 | it 'produces a simple index for a filter' do 8 | query = Statement.parse 'SELECT User.Username FROM User ' \ 9 | 'WHERE User.City = ?', workload.model 10 | indexes = enum.indexes_for_query query 11 | 12 | expect(indexes.to_a).to include \ 13 | Index.new [user['City']], [user['UserId']], [user['Username']], 14 | QueryGraph::Graph.from_path([user.id_field]) 15 | end 16 | 17 | it 'produces a simple index for a foreign key join' do 18 | query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \ 19 | 'WHERE User.City = ?', workload.model 20 | indexes = enum.indexes_for_query query 21 | 22 | expect(indexes).to include \ 23 | Index.new [user['City']], [user['UserId'], tweet['TweetId']], 24 | [tweet['Body']], 25 | QueryGraph::Graph.from_path([user.id_field, 26 | user['Tweets']]) 27 | end 28 | 29 | it 'produces an index for intermediate query steps' do 30 | query = Statement.parse 'SELECT Link.URL FROM Link.Tweets.User ' \ 31 | 'WHERE User.Username = ?', workload.model 32 | indexes = enum.indexes_for_query query 33 | expect(indexes).to include \ 34 | Index.new [user['UserId']], [tweet['TweetId']], [], 35 | QueryGraph::Graph.from_path([tweet.id_field, 36 | tweet['User']]) 37 | end 38 | 39 | it 'produces a simple index for a filter within a workload' do 40 | query = Statement.parse 'SELECT User.Username FROM User ' \ 41 | 'WHERE User.City = ?', workload.model 42 | workload.add_statement query 43 | indexes = enum.indexes_for_workload 44 | 45 | expect(indexes.to_a).to include \ 46 | Index.new [user['City']], [user['UserId']], [user['Username']], 47 | QueryGraph::Graph.from_path([user.id_field]) 48 | end 49 | 50 | it 'does not produce empty indexes' do 51 | query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \ 52 | 'WHERE User.City = ?', workload.model 53 | workload.add_statement query 54 | indexes = enum.indexes_for_workload 55 | expect(indexes).to all(satisfy do |index| 56 | !index.order_fields.empty? || !index.extra.empty? 57 | end) 58 | end 59 | 60 | it 'includes no indexes for updates if nothing is updated' do 61 | # Use a fresh workload for this test 62 | model = workload.model 63 | workload = Workload.new model 64 | enum = IndexEnumerator.new workload 65 | update = Statement.parse 'UPDATE User SET Username = ? ' \ 66 | 'WHERE User.City = ?', model 67 | workload.add_statement update 68 | indexes = enum.indexes_for_workload 69 | 70 | expect(indexes).to be_empty 71 | end 72 | 73 | it 'includes indexes enumerated from queries generated from updates' do 74 | # Use a fresh workload for this test 75 | model = workload.model 76 | workload = Workload.new model 77 | enum = IndexEnumerator.new workload 78 | 79 | update = Statement.parse 'UPDATE User SET Username = ? ' \ 80 | 'WHERE User.City = ?', model 81 | workload.add_statement update 82 | 83 | query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \ 84 | 'WHERE User.Username = ?', workload.model 85 | workload.add_statement query 86 | 87 | indexes = enum.indexes_for_workload 88 | 89 | expect(indexes.to_a).to include \ 90 | Index.new [user['City']], [user['UserId']], [], 91 | QueryGraph::Graph.from_path([user.id_field]) 92 | 93 | expect(indexes.to_a).to include \ 94 | Index.new [user['UserId']], [tweet['TweetId']], 95 | [tweet['Body']], 96 | QueryGraph::Graph.from_path([user.id_field, 97 | user['Tweets']]) 98 | end 99 | end 100 | end 101 | -------------------------------------------------------------------------------- /lib/nose/plans.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Statement planning and abstract models of execution steps 5 | module Plans 6 | # A single step in a statement plan 7 | class PlanStep 8 | include Supertype 9 | 10 | attr_accessor :state, :parent 11 | attr_reader :children, :cost, :fields 12 | 13 | def initialize 14 | @children = Set.new 15 | @parent = nil 16 | @fields = Set.new 17 | end 18 | 19 | # :nocov: 20 | def to_color 21 | # Split on capital letters and remove the last two parts (PlanStep) 22 | self.class.name.split('::').last.split(/(?=[A-Z])/)[0..-3] \ 23 | .map(&:downcase).join(' ').capitalize 24 | end 25 | # :nocov: 26 | 27 | # Set the children of the current plan step 28 | # @return [void] 29 | def children=(children) 30 | @children = children.to_set 31 | 32 | # Track the parent step of each step 33 | children.each do |child| 34 | child.instance_variable_set(:@parent, self) 35 | fields = child.instance_variable_get(:@fields) + self.fields 36 | child.instance_variable_set(:@fields, fields) 37 | end 38 | end 39 | 40 | # Mark the fields in this index as fetched 41 | # @return [void] 42 | def add_fields_from_index(index) 43 | @fields += index.all_fields 44 | end 45 | 46 | # Get the list of steps which led us here 47 | # If a cost model is not provided, statement plans using 48 | # this step cannot be evaluated on the basis of cost 49 | # 50 | # (this is to support PlanStep#parent_index which does not need cost) 51 | # @return [QueryPlan] 52 | def parent_steps(cost_model = nil) 53 | steps = nil 54 | 55 | if @parent.nil? 56 | steps = QueryPlan.new state.query, cost_model 57 | else 58 | steps = @parent.parent_steps cost_model 59 | steps << self 60 | end 61 | 62 | steps 63 | end 64 | 65 | # Find the closest index to this step 66 | # @return [PlanStep] 67 | def parent_index 68 | step = parent_steps.to_a.reverse_each.find do |parent_step| 69 | parent_step.is_a? IndexLookupPlanStep 70 | end 71 | step.index unless step.nil? 72 | end 73 | 74 | # Calculate the cost of executing this step in the plan 75 | # @return [Integer] 76 | def calculate_cost(cost_model) 77 | @cost = cost_model.method((subtype_name + '_cost').to_sym).call self 78 | end 79 | 80 | # Add the Subtype module to all step classes 81 | # @return [void] 82 | def self.inherited(child_class) 83 | child_class.send(:include, Subtype) 84 | end 85 | end 86 | 87 | # A dummy step used to inspect failed statement plans 88 | class PrunedPlanStep < PlanStep 89 | def state 90 | OpenStruct.new answered?: true 91 | end 92 | end 93 | 94 | # The root of a tree of statement plans used as a placeholder 95 | class RootPlanStep < PlanStep 96 | def initialize(state) 97 | super() 98 | @state = state 99 | @cost = 0 100 | end 101 | end 102 | 103 | # This superclass defines what is necessary for manually defined 104 | # and automatically generated plans to provide for execution 105 | class AbstractPlan 106 | attr_reader :group, :name, :weight 107 | 108 | # @abstract Subclasses should produce the steps for executing this query 109 | def steps 110 | fail NotImplementedError 111 | end 112 | 113 | # @abstract Subclasses should produce the fields selected by this plan 114 | def select_fields 115 | [] 116 | end 117 | 118 | # @abstract Subclasses should produce the parameters 119 | # necessary for this plan 120 | def params 121 | fail NotImplementedError 122 | end 123 | end 124 | end 125 | end 126 | 127 | require_relative 'plans/filter' 128 | require_relative 'plans/index_lookup' 129 | require_relative 'plans/limit' 130 | require_relative 'plans/sort' 131 | require_relative 'plans/update' 132 | 133 | require_relative 'plans/query_planner' 134 | require_relative 'plans/update_planner' 135 | require_relative 'plans/execution_plan' 136 | -------------------------------------------------------------------------------- /spec/loader/mysql_loader_spec.rb: -------------------------------------------------------------------------------- 1 | require 'nose/loader/mysql' 2 | 3 | module NoSE 4 | module Loader 5 | describe MysqlLoader do 6 | # Mock the client of a loader to return canned responses to SQL queries 7 | def mock_loader(responses, count) 8 | loader = MysqlLoader.new 9 | 10 | allow(loader).to receive(:new_client) do 11 | client = double('client') 12 | expect(client).to receive(:query) do |query| 13 | responses.each_pair.find { |k, _| k == query }.last 14 | end.exactly(count).times 15 | 16 | client 17 | end 18 | 19 | loader 20 | end 21 | 22 | it 'can generate a workload from a database' do 23 | # Simple Array subclass so we can use .each(as: :array) 24 | class EachArray < Array 25 | def each(*_args, **_options) 26 | super() 27 | end 28 | end 29 | 30 | loader = mock_loader( 31 | { 32 | 'SHOW TABLES' => EachArray.new([['Foo']]), 33 | 'SELECT COUNT(*) FROM Foo' => [{ 'COUNT()*)' => 10 }], 34 | 'DESCRIBE Foo' => EachArray.new( 35 | [ 36 | ['FooId', 'int(10) unsigned', 'NO', 'PRI', 'NULL', ''], 37 | ['Bar', 'int(10) unsigned', 'NO', '', 'NULL', ''], 38 | ['Baz', 'float', 'NO', '', 'NULL', ''], 39 | ['Quux', 'datetime', 'NO', '', 'NULL', ''], 40 | ['Corge', 'text', 'NO', '', 'NULL', ''], 41 | ['Garply', 'varchar(10)', 'NO', '', 'NULL', ''] 42 | ] 43 | ) 44 | }, 3 45 | ) 46 | 47 | workload = loader.workload({}) 48 | expect(workload.model.entities).to have(1).item 49 | 50 | entity = workload.model.entities.values.first 51 | expect(entity.name).to eq 'Foo' 52 | expect(entity.fields).to have(6).items 53 | 54 | expect(entity.fields.values[0]).to be_a Fields::IDField 55 | expect(entity.fields.values[1]).to be_a Fields::IntegerField 56 | expect(entity.fields.values[2]).to be_a Fields::FloatField 57 | expect(entity.fields.values[3]).to be_a Fields::DateField 58 | expect(entity.fields.values[4]).to be_a Fields::StringField 59 | expect(entity.fields.values[5]).to be_a Fields::StringField 60 | end 61 | 62 | context 'when loading into a backend', mysql: true do 63 | let(:workload) { Workload.load 'rubis' } 64 | let(:backend) do 65 | dummy = double('backend') 66 | allow(dummy).to receive(:by_id_graph).and_return(false) 67 | allow(dummy).to receive(:index_empty?).and_return(true) 68 | 69 | dummy 70 | end 71 | 72 | let(:config) do 73 | { 74 | host: '127.0.0.1', 75 | username: 'root', 76 | database: 'nose' 77 | } 78 | end 79 | 80 | let(:loader) do 81 | MysqlLoader.new workload, backend 82 | end 83 | 84 | it 'can load a simple ID index', mysql: true do 85 | user = workload.model['users'] 86 | index = Index.new [user['id']], [], [user['nickname']], 87 | QueryGraph::Graph.from_path([user['id']]) 88 | expect(backend).to receive(:index_insert_chunk).with( 89 | index, [ 90 | { 91 | 'users_id' => 2, 92 | 'users_nickname' => '08ec962a-fc56-40a3-9e07-1fca0520253c' 93 | } 94 | ] 95 | ) 96 | loader.load([index], config, false, 1) 97 | end 98 | 99 | it 'can load an index across multiple entities', mysql: true do 100 | user = workload.model['users'] 101 | item = workload.model['items'] 102 | index = Index.new [user['id']], [item['id']], [item['name']], 103 | QueryGraph::Graph.from_path( 104 | [user['id'], user['items_sold']] 105 | ) 106 | expect(backend).to receive(:index_insert_chunk).with( 107 | index, [ 108 | { 109 | 'users_id' => 1, 110 | 'items_id' => 45, 111 | 'items_name' => 'repellat alias consequatur' 112 | } 113 | ] 114 | ) 115 | loader.load([index], config, false, 1) 116 | end 117 | end 118 | end 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /spec/search_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | module Search 3 | describe Search do 4 | include_context 'dummy cost model' 5 | include_context 'entities' 6 | 7 | it 'raises an exception if there is no space', solver: true do 8 | workload.add_statement 'SELECT Tweet.Body FROM Tweet ' \ 9 | 'WHERE Tweet.TweetId = ?' 10 | indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a 11 | search = Search.new(workload, cost_model) 12 | expect do 13 | search.search_overlap(indexes, 1) 14 | end.to raise_error(NoSolutionException) 15 | end 16 | 17 | it 'produces a materialized view with sufficient space', solver: true do 18 | query = Statement.parse 'SELECT User.UserId FROM User WHERE ' \ 19 | 'User.City = ? ORDER BY User.Username', 20 | workload.model 21 | workload.add_statement query 22 | 23 | indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a 24 | result = Search.new(workload, cost_model).search_overlap indexes 25 | indexes = result.indexes 26 | expect(indexes).to include query.materialize_view 27 | end 28 | 29 | it 'can perform multiple lookups on a path segment', solver: true do 30 | query = Statement.parse 'SELECT User.Username FROM User ' \ 31 | 'WHERE User.City = ?', workload.model 32 | workload.add_statement query 33 | 34 | indexes = [ 35 | Index.new([user['City']], [user['UserId']], [], 36 | QueryGraph::Graph.from_path([user.id_field])), 37 | Index.new([user['UserId']], [], [user['Username']], 38 | QueryGraph::Graph.from_path([user.id_field])) 39 | ] 40 | search = Search.new(workload, cost_model) 41 | expect do 42 | search.search_overlap(indexes, indexes.first.size).to_set 43 | end.to raise_error NoSolutionException 44 | end 45 | 46 | it 'does not denormalize heavily updated data', solver: true do 47 | workload.add_statement 'UPDATE User SET Username = ? ' \ 48 | 'WHERE User.UserId = ?', 0.98 49 | workload.add_statement 'SELECT User.Username FROM User ' \ 50 | 'WHERE User.City = ?', 0.01 51 | workload.add_statement 'SELECT User.Username FROM User ' \ 52 | 'WHERE User.Country = ?', 0.01 53 | 54 | # Enumerate the indexes and select those actually used 55 | indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a 56 | cost_model = Cost::EntityCountCost.new 57 | result = Search.new(workload, cost_model).search_overlap indexes 58 | indexes = result.indexes 59 | 60 | # Get the indexes actually used by the generated plans 61 | planner = Plans::QueryPlanner.new workload, indexes, cost_model 62 | plans = workload.queries.map { |query| planner.min_plan query } 63 | indexes = plans.flat_map(&:indexes).to_set 64 | 65 | expect(indexes).to match_array [ 66 | Index.new([user['Country']], [user['UserId']], [], 67 | QueryGraph::Graph.from_path([user.id_field])), 68 | Index.new([user['City']], [user['UserId']], [], 69 | QueryGraph::Graph.from_path([user.id_field])), 70 | Index.new([user['UserId']], [], [user['Username']], 71 | QueryGraph::Graph.from_path([user.id_field])) 72 | ] 73 | end 74 | 75 | it 'increases the total cost when an update is added' do 76 | query = Statement.parse 'SELECT User.UserId FROM User WHERE ' \ 77 | 'User.City = ? ORDER BY User.Username', workload.model 78 | 79 | workload.add_statement query 80 | indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a 81 | result = Search.new(workload, cost_model).search_overlap indexes 82 | 83 | workload.add_statement 'UPDATE User SET Username = ? ' \ 84 | 'WHERE User.UserId = ?', 0.98 85 | 86 | indexes_with_update = IndexEnumerator.new(workload).indexes_for_workload.to_a 87 | result_with_update = Search.new(workload, cost_model).search_overlap indexes_with_update 88 | 89 | # total cost should be increased due to additional update statement 90 | expect(result.total_cost).to be < result_with_update.total_cost 91 | end 92 | end 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /spec/util_spec.rb: -------------------------------------------------------------------------------- 1 | require 'stringio' 2 | 3 | describe Enumerable do 4 | it 'can generate all possible prefixes' do 5 | expect([1, 2, 3].prefixes).to match_array [[1], [1, 2], [1, 2, 3]] 6 | end 7 | 8 | it 'generates an empty prefix list when there are no elements' do 9 | expect([].prefixes).to match_array [] 10 | end 11 | 12 | it 'can generate all partitionings of itself' do 13 | expect([1, 2, 3, 4].partitions).to match_array [ 14 | [[1, 2, 3, 4], []], 15 | [[1], [2, 3, 4]], 16 | [[1, 2], [3, 4]], 17 | [[1, 2, 3], [4]]] 18 | end 19 | 20 | it 'can compute the product based on a block' do 21 | expect([-1, 1].sum_by(&:abs)).to eq(2) 22 | end 23 | 24 | it 'can compute the product based on a block' do 25 | expect([-1, 1].product_by(&:abs)).to eq(1) 26 | end 27 | end 28 | 29 | describe Integer do 30 | it 'is finite' do 31 | expect(3.finite?).to be true 32 | end 33 | end 34 | 35 | describe Object do 36 | context 'for objects with a to_color method' do 37 | subject(:obj) do 38 | class Foo 39 | def to_color 40 | '[red]foo[/]' 41 | end 42 | end 43 | 44 | Foo.new 45 | end 46 | 47 | it 'should inspect colored output when stdout is a terminal' do 48 | old_stdout = STDOUT 49 | Object.instance_eval { remove_const 'STDOUT' } 50 | STDOUT = double('stdout', tty?: true, write: nil) 51 | 52 | expect(obj.inspect).to eq "\e[31mfoo\e[0m" 53 | 54 | Object.instance_eval { remove_const 'STDOUT' } 55 | STDOUT = old_stdout 56 | end 57 | 58 | it 'should inspect uncolored output when stdout is not a terminal' do 59 | old_stdout = STDOUT 60 | Object.instance_eval { remove_const 'STDOUT' } 61 | STDOUT = StringIO.new 62 | 63 | Object.instance_eval { remove_const 'STDOUT' } 64 | STDOUT = old_stdout 65 | end 66 | end 67 | 68 | context 'for objects without a to_color method' do 69 | subject(:obj) do 70 | class Bar 71 | def to_s 72 | 'foo' 73 | end 74 | end 75 | 76 | Bar.new 77 | end 78 | 79 | it 'should use uncolored output' do 80 | expect(obj.to_s).to eq 'foo' 81 | expect(obj.to_color).to eq 'foo' 82 | end 83 | end 84 | 85 | describe 'Subtype' do 86 | subject(:obj) do 87 | class Foo 88 | end 89 | 90 | class BarBazFoo < Foo 91 | include Subtype 92 | end 93 | 94 | BarBazFoo.new 95 | end 96 | 97 | it 'can produce its name in snake case' do 98 | expect(obj.subtype_name).to eq 'bar_baz' 99 | end 100 | 101 | it 'can produce its name in camel case' do 102 | expect(obj.subtype_name(name_case: :camel)).to eq 'BarBaz' 103 | end 104 | end 105 | 106 | describe 'Supertype' do 107 | subject(:cls) do 108 | class Foo 109 | include Supertype 110 | end 111 | 112 | class BarBazFoo < Foo 113 | end 114 | 115 | Foo 116 | end 117 | 118 | it 'can produce a subclass from a name in snake case' do 119 | subclass = cls.subtype_class 'bar_baz' 120 | expect(subclass).to be_a Class 121 | expect(subclass.name).to eq 'BarBazFoo' 122 | end 123 | 124 | it 'can produce a subclass from a name in camel case' do 125 | subclass = cls.subtype_class 'BarBaz' 126 | expect(subclass).to be_a Class 127 | expect(subclass.name).to eq 'BarBazFoo' 128 | end 129 | end 130 | end 131 | 132 | describe Cardinality do 133 | include_context 'entities' 134 | 135 | it 'estimates one for a simple ID lookup' do 136 | cardinality = Cardinality.filter tweet.count, [tweet['TweetId']], nil 137 | 138 | expect(cardinality).to eq(1) 139 | end 140 | 141 | it 'correctly estimates based on field cardinality for equality' do 142 | cardinality = Cardinality.filter user.count, [user['City']], nil 143 | 144 | expect(cardinality).to eq(2) 145 | end 146 | 147 | it 'uses a static estimate for range filters' do 148 | cardinality = Cardinality.filter tweet.count, [tweet['Body']], 149 | tweet['Timestamp'] 150 | 151 | expect(cardinality).to eq(20) 152 | end 153 | end 154 | 155 | describe Listing do 156 | let(:superclass) do 157 | class Super 158 | include Listing 159 | end 160 | end 161 | 162 | let(:subclass) do 163 | class Sub < Super 164 | end 165 | 166 | Sub 167 | end 168 | 169 | it 'allows tracking of subclasses' do 170 | expect(superclass.subclasses).to eq({"Sub" => subclass}) 171 | end 172 | end 173 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to NoSE 2 | 3 | This is adapted from the [Node.js contributor guidelines](https://github.com/nodejs/node/blob/master/CONTRIBUTING.md). 4 | 5 | ## Issue Contributions 6 | 7 | When reporting an issue with NoSE, please provide as much context as possible. 8 | At minimum, try running the tests and post the output of any failed tests. 9 | Also be sure to provide the version of the Ruby interpreter you are using. 10 | 11 | ## Code Contributions 12 | 13 | This document will guide you through the contribution process. 14 | 15 | ### Step 1: Fork 16 | 17 | Fork the project [on GitHub](https://github.com/michaelmior/NoSE) and check out your 18 | copy locally. 19 | 20 | ```text 21 | $ git clone git@github.com:username/NoSE.git 22 | $ cd NoSE 23 | $ git remote add upstream git://github.com/michaelmior/NoSE.git 24 | ``` 25 | 26 | #### Which branch? 27 | 28 | For developing new features and bug fixes, the `master` branch should be pulled 29 | and built upon. 30 | 31 | ### Step 2: Branch 32 | 33 | Create a feature branch and start hacking: 34 | 35 | ```text 36 | $ git checkout -b my-feature-branch -t origin/master 37 | ``` 38 | 39 | ### Step 3: Commit 40 | 41 | Make sure git knows your name and email address: 42 | 43 | ```text 44 | $ git config --global user.name "J. Random User" 45 | $ git config --global user.email "j.random.user@example.com" 46 | ``` 47 | 48 | Writing good commit logs is important. A commit log should describe what 49 | changed and why. Follow these guidelines when writing one: 50 | 51 | 1. The first line should be 50 characters or less and contain a short 52 | description of the change. 53 | 2. Keep the second line blank. 54 | 3. Wrap all other lines at 72 columns. 55 | 56 | A good commit log can look something like this: 57 | 58 | ``` 59 | explaining the commit in one line 60 | 61 | Body of commit message is a few lines of text, explaining things 62 | in more detail, possibly giving some background about the issue 63 | being fixed, etc. etc. 64 | 65 | The body of the commit message can be several paragraphs, and 66 | please do proper word-wrap and keep columns shorter than about 67 | 72 characters or so. That way `git log` will show things 68 | nicely even when it is indented. 69 | ``` 70 | 71 | The header line should be meaningful; it is what other people see when they 72 | run `git shortlog` or `git log --oneline`. 73 | 74 | ### Step 4: Rebase 75 | 76 | Use `git rebase` (not `git merge`) to sync your work from time to time. 77 | 78 | ```text 79 | $ git fetch upstream 80 | $ git rebase upstream/master 81 | ``` 82 | 83 | ### Step 5: Test 84 | 85 | Bug fixes and features **should come with tests**. Add your tests in the 86 | `spec` directory. Look at other tests to see how they should be 87 | structured. 88 | 89 | ```text 90 | $ bundle exec rspec 91 | ``` 92 | 93 | Make sure that all tests pass. Please, do not submit 94 | patches with failing tests. 95 | 96 | If you are updating tests and just want to run a single test to check it, you 97 | can use this syntax: 98 | 99 | ```text 100 | $ bundle exec rspec spec/backend_spec.rb 101 | ``` 102 | 103 | ### Step 6: Push 104 | 105 | ```text 106 | $ git push origin my-feature-branch 107 | ``` 108 | 109 | Go to `https://github.com/yourusername/NoSE` and select your feature branch. 110 | Click the 'Pull Request' button and fill out the form. 111 | 112 | Pull requests are usually reviewed within a few days. If there are comments 113 | to address, apply your changes in a separate commit and push that to your 114 | feature branch. Post a comment in the pull request afterwards; GitHub does 115 | not send out notifications when you add commits. 116 | 117 | 118 | ## Developer's Certificate of Origin 1.1 119 | 120 | By making a contribution to this project, I certify that: 121 | 122 | * (a) The contribution was created in whole or in part by me and I 123 | have the right to submit it under the open source license 124 | indicated in the file; or 125 | 126 | * (b) The contribution is based upon previous work that, to the best 127 | of my knowledge, is covered under an appropriate open source 128 | license and I have the right under that license to submit that 129 | work with modifications, whether created in whole or in part 130 | by me, under the same open source license (unless I am 131 | permitted to submit under a different license), as indicated 132 | in the file; or 133 | 134 | * (c) The contribution was provided directly to me by some other 135 | person who certified (a), (b) or (c) and I have not modified 136 | it. 137 | 138 | * (d) I understand and agree that this project and the contribution 139 | are public and that a record of the contribution (including all 140 | personal information I submit with it, including my sign-off) is 141 | maintained indefinitely and may be redistributed consistent with 142 | this project or the open source license(s) involved. 143 | -------------------------------------------------------------------------------- /spec/model_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | describe Entity do 3 | subject(:entity) { Entity.new('Foo') } 4 | 5 | it 'can store fields' do 6 | entity << Fields::IntegerField.new('Bar') 7 | entity << Fields::IntegerField.new('Baz') 8 | 9 | expect(entity.fields.keys).to match_array %w(Bar Baz) 10 | end 11 | 12 | it 'can have foreign keys' do 13 | other = entity * 100 14 | field = Fields::ForeignKeyField.new('other', other) 15 | entity << field 16 | 17 | expect(field.entity).to be(other) 18 | expect(field.class.subtype_name).to eq('foreign_key') 19 | expect(field.relationship).to eq(:one) 20 | expect(field.cardinality).to eq(100) 21 | end 22 | 23 | it 'can tell fields when they are added' do 24 | field = Fields::IntegerField.new('Bar') 25 | 26 | expect(field.parent).to be_nil 27 | 28 | entity << field 29 | 30 | expect(field.parent).to be(entity) 31 | end 32 | 33 | it 'can create entities using a DSL' do 34 | entity = Entity.new 'Foo' do 35 | ID 'Bar' 36 | Integer 'Baz' 37 | String 'Quux', 20 38 | etc 39 | end 40 | 41 | expect(entity.fields).to have(4).items 42 | expect(entity.fields['Quux'].size).to eq 20 43 | expect(entity['**'].class).to be Fields::HashField 44 | end 45 | 46 | it 'raises an exception for nonexistent fields' do 47 | expect { entity['Bar'] }.to raise_error FieldNotFound 48 | end 49 | 50 | it 'can generate random entities' do 51 | entity << Fields::IntegerField.new('Bar') 52 | expect(entity.random_entity).to be_a Hash 53 | expect(entity.random_entity.keys).to match_array ['Foo_Bar'] 54 | end 55 | end 56 | 57 | describe Fields::Field do 58 | subject(:field) do 59 | Fields::IDField.new 'Bar' 60 | end 61 | 62 | it 'has an ID based on the entity and name' do 63 | Entity.new('Foo') << field 64 | expect(field.id).to eq 'Foo_Bar' 65 | end 66 | 67 | it 'can have its cardinality updated by multiplication' do 68 | expect((field * 5).cardinality).to eq 5 69 | end 70 | end 71 | 72 | describe Fields::IntegerField do 73 | it 'can convert string literals' do 74 | expect(Fields::IntegerField.value_from_string '42').to eq 42 75 | end 76 | 77 | it 'can produce random integers' do 78 | field = Fields::IntegerField.new 'Foo', count: 10 79 | expect(field.random_value).to be_a Integer 80 | expect(field.random_value).to be_between(0, field.cardinality) 81 | end 82 | end 83 | 84 | describe Fields::FloatField do 85 | it 'can convert string literals' do 86 | expect(Fields::FloatField.value_from_string '3.14159').to eq 3.14159 87 | end 88 | 89 | it 'can produce random floats' do 90 | field = Fields::FloatField.new 'Foo', count: 10 91 | expect(field.random_value).to be_a Float 92 | expect(field.random_value).to be_between(0, field.cardinality) 93 | end 94 | end 95 | 96 | describe Fields::StringField do 97 | it 'can convert string literals' do 98 | expect(Fields::StringField.value_from_string 'pudding').to eq 'pudding' 99 | end 100 | 101 | it 'can produce random strings' do 102 | field = Fields::StringField.new 'Foo', 10 103 | expect(field.random_value).to be_a String 104 | expect(field.random_value).to have(10).characters 105 | end 106 | end 107 | 108 | describe Fields::DateField do 109 | it 'can convert string literals' do 110 | date = Fields::DateField.value_from_string '2001-02-03T04:05:06+07:00' 111 | expect(date).to eq DateTime.new(2001, 2, 3, 4, 5, 6, '+7').to_time 112 | end 113 | 114 | it 'can produce random dates' do 115 | field = Fields::DateField.new 'Foo' 116 | expect(field.random_value).to be_a Time 117 | end 118 | end 119 | 120 | describe Fields::BooleanField do 121 | it 'can convert boolean strings' do 122 | expect(Fields::BooleanField.value_from_string 'false').to be_falsey 123 | end 124 | 125 | it 'can convert integers in strings' do 126 | expect(Fields::BooleanField.value_from_string '1').to be_truthy 127 | end 128 | 129 | it 'can produce random booleans' do 130 | field = Fields::BooleanField.new 'Foo' 131 | expect(field.random_value).to satisfy { |v| [true, false].include? v } 132 | end 133 | end 134 | 135 | describe Model do 136 | let(:model) do 137 | Model.new do 138 | Entity 'Foo' do 139 | ID 'FooID' 140 | end 141 | 142 | Entity 'Bar' do 143 | ID 'BarID' 144 | end 145 | 146 | HasOne 'foo', 'bars', 147 | {'Bar' => 'Foo'} 148 | end 149 | end 150 | 151 | it 'can create a to-one relationship' do 152 | bar_key = model.entities['Bar']['foo'] 153 | expect(bar_key.relationship).to eq(:one) 154 | end 155 | 156 | it 'can create a to-many relationship' do 157 | foo_key = model.entities['Foo']['bars'] 158 | expect(foo_key.relationship).to eq(:many) 159 | end 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /lib/nose/statements/insert.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # A representation of an insert in the workload 5 | class Insert < Statement 6 | include StatementConditions 7 | include StatementSettings 8 | include StatementSupportQuery 9 | 10 | def initialize(params, text, group: nil, label: nil) 11 | super params, text, group: group, label: label 12 | 13 | @settings = params[:settings] 14 | fail InvalidStatementException, 'Must insert primary key' \ 15 | unless @settings.map(&:field).include?(entity.id_field) 16 | 17 | populate_conditions params 18 | end 19 | 20 | # Build a new insert from a provided parse tree 21 | # @return [Insert] 22 | def self.parse(tree, params, text, group: nil, label: nil) 23 | settings_from_tree tree, params 24 | conditions_from_tree tree, params 25 | 26 | Insert.new params, text, group: group, label: label 27 | end 28 | 29 | # Extract conditions from a parse tree 30 | # @return [Hash] 31 | def self.conditions_from_tree(tree, params) 32 | connections = tree[:connections] || [] 33 | connections = connections.map do |connection| 34 | field = params[:entity][connection[:target].to_s] 35 | value = connection[:target_pk] 36 | 37 | type = field.class.const_get 'TYPE' 38 | value = field.class.value_from_string(value.to_s) \ 39 | unless type.nil? || value.nil? 40 | 41 | connection.delete :value 42 | Condition.new field, :'=', value 43 | end 44 | 45 | params[:conditions] = Hash[connections.map do |connection| 46 | [connection.field.id, connection] 47 | end] 48 | end 49 | private_class_method :conditions_from_tree 50 | 51 | # Produce the SQL text corresponding to this insert 52 | # @return [String] 53 | def unparse 54 | insert = "INSERT INTO #{entity.name} " 55 | insert += settings_clause 56 | 57 | insert << ' AND CONNECT TO ' << @conditions.values.map do |condition| 58 | value = maybe_quote condition.value, condition.field 59 | "#{condition.field.name}(#{value})" 60 | end.join(', ') unless @conditions.empty? 61 | 62 | insert 63 | end 64 | 65 | def ==(other) 66 | other.is_a?(Insert) && 67 | @graph == other.graph && 68 | entity == other.entity && 69 | @settings == other.settings && 70 | @conditions == other.conditions 71 | end 72 | alias eql? == 73 | 74 | def hash 75 | @hash ||= [@graph, entity, @settings, @conditions].hash 76 | end 77 | 78 | # Determine if this insert modifies an index 79 | def modifies_index?(index) 80 | return true if modifies_single_entity_index?(index) 81 | return false if index.graph.size == 1 82 | return false unless index.graph.entities.include? entity 83 | 84 | # Check if the index crosses all of the connection keys 85 | keys = @conditions.each_value.map(&:field) 86 | index.graph.keys_from_entity(entity).all? { |k| keys.include? k } 87 | end 88 | 89 | # Specifies that inserts require insertion 90 | def requires_insert?(_index) 91 | true 92 | end 93 | 94 | # Support queries are required for index insertion with connection 95 | # to select attributes of the other related entities 96 | # @return [Array] 97 | def support_queries(index) 98 | return [] unless modifies_index?(index) && 99 | !modifies_single_entity_index?(index) 100 | 101 | # Get all fields which need to be selected by support queries 102 | select = index.all_fields - 103 | @settings.map(&:field).to_set - 104 | @conditions.each_value.map do |condition| 105 | condition.field.entity.id_field 106 | end.to_set 107 | return [] if select.empty? 108 | 109 | index.graph.split(entity).map do |graph| 110 | support_fields = select.select do |field| 111 | graph.entities.include? field.parent 112 | end.to_set 113 | 114 | # Build conditions by traversing the foreign keys 115 | conditions = @conditions.each_value.map do |c| 116 | next unless graph.entities.include? c.field.entity 117 | 118 | Condition.new c.field.entity.id_field, c.operator, c.value 119 | end.compact 120 | conditions = Hash[conditions.map do |condition| 121 | [condition.field.id, condition] 122 | end] 123 | 124 | split_entity = split_entity graph, index.graph, entity 125 | build_support_query split_entity, index, graph, support_fields, 126 | conditions 127 | end.compact 128 | end 129 | 130 | # The settings fields are provided with the insertion 131 | def given_fields 132 | @settings.map(&:field) + @conditions.each_value.map do |condition| 133 | condition.field.entity.id_field 134 | end 135 | end 136 | 137 | private 138 | 139 | # Check if the insert modifies a single entity index 140 | # @return [Boolean] 141 | def modifies_single_entity_index?(index) 142 | !(@settings.map(&:field).to_set & index.all_fields).empty? && 143 | index.graph.size == 1 && index.graph.entities.first == entity 144 | end 145 | end 146 | end 147 | -------------------------------------------------------------------------------- /lib/nose/statements/query.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # A representation of a query in the workload 5 | class Query < Statement 6 | include StatementConditions 7 | 8 | attr_reader :select, :order, :limit 9 | 10 | def initialize(params, text, group: nil, label: nil) 11 | super params, text, group: group, label: label 12 | 13 | populate_conditions params 14 | @select = params[:select] 15 | @order = params[:order] || [] 16 | 17 | fail InvalidStatementException, 'can\'t order by IDs' \ 18 | if @order.any? { |f| f.is_a? Fields::IDField } 19 | 20 | if join_order.first != @key_path.entities.first 21 | @key_path = @key_path.reverse 22 | end 23 | 24 | fail InvalidStatementException, 'must have an equality predicate' \ 25 | if @conditions.empty? || @conditions.values.all?(&:is_range) 26 | 27 | @limit = params[:limit] 28 | end 29 | 30 | # Build a new query from a provided parse tree 31 | # @return [Query] 32 | def self.parse(tree, params, text, group: nil, label: nil) 33 | conditions_from_tree tree, params 34 | fields_from_tree tree, params 35 | order_from_tree tree, params 36 | params[:limit] = tree[:limit].to_i if tree[:limit] 37 | 38 | new params, text, group: group, label: label 39 | end 40 | 41 | # Produce the SQL text corresponding to this query 42 | # @return [String] 43 | def unparse 44 | field_namer = -> (f) { field_path f } 45 | 46 | query = 'SELECT ' + @select.map(&field_namer).join(', ') 47 | query << " FROM #{from_path @graph.longest_path}" 48 | query << where_clause(field_namer) 49 | 50 | query << ' ORDER BY ' << @order.map(&field_namer).join(', ') \ 51 | unless @order.empty? 52 | query << " LIMIT #{@limit}" unless @limit.nil? 53 | query << " -- #{@comment}" unless @comment.nil? 54 | 55 | query 56 | end 57 | 58 | def ==(other) 59 | other.is_a?(Query) && 60 | @graph == other.graph && 61 | @select == other.select && 62 | @conditions == other.conditions && 63 | @order == other.order && 64 | @limit == other.limit && 65 | @comment == other.comment 66 | end 67 | alias eql? == 68 | 69 | def hash 70 | @hash ||= [@graph, @select, @conditions, @order, @limit, @comment].hash 71 | end 72 | 73 | # The order entities should be joined according to the query graph 74 | # @return [Array] 75 | def join_order 76 | @graph.join_order(@eq_fields) 77 | end 78 | 79 | # Specifies that queries don't modify data 80 | def read_only? 81 | true 82 | end 83 | 84 | # All fields referenced anywhere in the query 85 | # @return [Set] 86 | def all_fields 87 | (@select + @conditions.each_value.map(&:field) + @order).to_set 88 | end 89 | 90 | # Extract fields to be selected from a parse tree 91 | # @return [Set] 92 | def self.fields_from_tree(tree, params) 93 | params[:select] = tree[:select].flat_map do |field| 94 | if field.last == '*' 95 | # Find the entity along the path 96 | entity = params[:key_path].entities[tree[:path].index(field.first)] 97 | entity.fields.values 98 | else 99 | field = add_field_with_prefix tree[:path], field, params 100 | 101 | fail InvalidStatementException, 'Foreign keys cannot be selected' \ 102 | if field.is_a? Fields::ForeignKeyField 103 | 104 | field 105 | end 106 | end.to_set 107 | end 108 | private_class_method :fields_from_tree 109 | 110 | # Extract ordering fields from a parse tree 111 | # @return [Array] 112 | def self.order_from_tree(tree, params) 113 | return params[:order] = [] if tree[:order].nil? 114 | 115 | params[:order] = tree[:order][:fields].each_slice(2).map do |field| 116 | field = field.first if field.first.is_a?(Array) 117 | add_field_with_prefix tree[:path], field, params 118 | end 119 | end 120 | private_class_method :order_from_tree 121 | 122 | private 123 | 124 | def field_path(field) 125 | path = @graph.path_between @graph.longest_path.entities.first, 126 | field.parent 127 | path = path.drop_while { |k| @graph.longest_path.include? k } << path[-1] 128 | path = KeyPath.new(path) unless path.is_a?(KeyPath) 129 | 130 | from_path path, @graph.longest_path, field 131 | end 132 | end 133 | 134 | # A query required to support an update 135 | class SupportQuery < Query 136 | attr_reader :statement, :index, :entity 137 | 138 | def initialize(entity, params, text, group: nil, label: nil) 139 | super params, text, group: group, label: label 140 | 141 | @entity = entity 142 | end 143 | 144 | # Support queries must also have their statement and index checked 145 | def ==(other) 146 | other.is_a?(SupportQuery) && @statement == other.statement && 147 | @index == other.index && @comment == other.comment 148 | end 149 | alias eql? == 150 | 151 | def hash 152 | @hash ||= Zlib.crc32_combine super, @index.hash, @index.hash_str.length 153 | end 154 | 155 | # :nocov: 156 | def to_color 157 | super.to_color + ' for [magenta]' + @index.key + '[/]' 158 | end 159 | # :nocov: 160 | end 161 | end 162 | -------------------------------------------------------------------------------- /lib/nose/statements/connection.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | # Superclass for connect and disconnect statements 5 | class Connection < Statement 6 | include StatementSupportQuery 7 | 8 | attr_reader :source_pk, :target, :target_pk, :conditions 9 | alias source entity 10 | 11 | def initialize(params, text, group: nil, label: nil) 12 | super params, text, group: group, label: label 13 | fail InvalidStatementException, 'Incorrect connection initialization' \ 14 | unless text.split.first == self.class.name.split('::').last.upcase 15 | 16 | populate_conditions params 17 | end 18 | 19 | # Build a new disconnect from a provided parse tree 20 | # @return [Connection] 21 | def self.parse(tree, params, text, group: nil, label: nil) 22 | keys_from_tree tree, params 23 | 24 | new params, text, group: group, label: label 25 | end 26 | 27 | # @return[void] 28 | def self.keys_from_tree(tree, params) 29 | params[:source_pk] = tree[:source_pk] 30 | params[:target] = params[:entity].foreign_keys[tree[:target].to_s] 31 | params[:target_pk] = tree[:target_pk] 32 | end 33 | 34 | # Produce the SQL text corresponding to this connection 35 | # @return [String] 36 | def unparse 37 | "CONNECT #{source.name}(\"#{source_pk}\") TO " \ 38 | "#{target.name}(\"#{target_pk}\")" 39 | end 40 | 41 | def ==(other) 42 | self.class == other.class && 43 | @graph == other.graph && 44 | @source == other.source && 45 | @target == other.target && 46 | @conditions == other.conditions 47 | end 48 | alias eql? == 49 | 50 | def hash 51 | @hash ||= [@graph, @source, @target, @conditions].hash 52 | end 53 | 54 | # A connection modifies an index if the relationship is in the path 55 | def modifies_index?(index) 56 | index.path.include?(@target) || index.path.include?(@target.reverse) 57 | end 58 | 59 | # Get the support queries for updating an index 60 | def support_queries(index) 61 | return [] unless modifies_index?(index) 62 | 63 | select = index.all_fields - @conditions.each_value.map(&:field).to_set 64 | return [] if select.empty? 65 | 66 | index.graph.split(entity).map do |graph| 67 | support_fields = select.select do |field| 68 | graph.entities.include? field.parent 69 | end.to_set 70 | conditions = @conditions.select do |_, c| 71 | graph.entities.include? c.field.parent 72 | end 73 | 74 | split_entity = split_entity graph, index.graph, entity 75 | build_support_query split_entity, index, graph, support_fields, 76 | conditions 77 | end.compact 78 | end 79 | 80 | protected 81 | 82 | # The two key fields are provided with the connection 83 | def given_fields 84 | [@target.parent.id_field, @target.entity.id_field] 85 | end 86 | 87 | private 88 | 89 | # Validate the types of the primary keys 90 | # @return [void] 91 | def validate_keys 92 | # XXX Only works for non-composite PKs 93 | source_type = source.id_field.class.const_get 'TYPE' 94 | fail TypeError unless source_type.nil? || source_pk.is_a?(type) 95 | 96 | target_type = @target.class.const_get 'TYPE' 97 | fail TypeError unless target_type.nil? || target_pk.is_a?(type) 98 | end 99 | 100 | # Populate the list of condition objects 101 | # @return [void] 102 | def populate_conditions(params) 103 | @source_pk = params[:source_pk] 104 | @target = params[:target] 105 | @target_pk = params[:target_pk] 106 | 107 | validate_keys 108 | 109 | # This is needed later when planning updates 110 | @eq_fields = [@target.parent.id_field, 111 | @target.entity.id_field] 112 | 113 | source_id = source.id_field 114 | target_id = @target.entity.id_field 115 | @conditions = { 116 | source_id.id => Condition.new(source_id, :'=', @source_pk), 117 | target_id.id => Condition.new(target_id, :'=', @target_pk) 118 | } 119 | end 120 | 121 | # Get the where clause for a support query over the given path 122 | # @return [String] 123 | def support_query_condition_for_path(path, reversed) 124 | key = (reversed ? target.entity : target.parent).id_field 125 | path = path.reverse if path.entities.last != key.entity 126 | eq_key = path.entries[-1] 127 | if eq_key.is_a? Fields::ForeignKeyField 128 | where = "WHERE #{eq_key.name}.#{eq_key.entity.id_field.name} = ?" 129 | else 130 | where = "WHERE #{eq_key.parent.name}." \ 131 | "#{eq_key.parent.id_field.name} = ?" 132 | end 133 | 134 | where 135 | end 136 | end 137 | 138 | # A representation of a connect in the workload 139 | class Connect < Connection 140 | # Specifies that connections require insertion 141 | def requires_insert?(_index) 142 | true 143 | end 144 | end 145 | 146 | # A representation of a disconnect in the workload 147 | class Disconnect < Connection 148 | # Produce the SQL text corresponding to this disconnection 149 | # @return [String] 150 | def unparse 151 | "DISCONNECT #{source.name}(\"#{source_pk}\") FROM " \ 152 | "#{target.name}(\"#{target_pk}\")" 153 | end 154 | 155 | # Specifies that disconnections require deletion 156 | def requires_delete?(_index) 157 | true 158 | end 159 | end 160 | end 161 | -------------------------------------------------------------------------------- /lib/nose/search/constraints.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Search 5 | # Base class for constraints 6 | class Constraint 7 | # If this is not overridden, apply query-specific constraints 8 | # @return [void] 9 | def self.apply(problem) 10 | problem.queries.each_with_index do |query, q| 11 | apply_query query, q, problem 12 | end 13 | end 14 | 15 | # To be implemented in subclasses for query-specific constraints 16 | # @return [void] 17 | def self.apply_query(*_args) 18 | end 19 | end 20 | 21 | # Constraints which force indexes to be present to be used 22 | class IndexPresenceConstraints < Constraint 23 | # Add constraint for indices being present 24 | def self.apply(problem) 25 | problem.indexes.each do |index| 26 | problem.queries.each_with_index do |query, q| 27 | name = "q#{q}_#{index.key}_avail" if ENV['NOSE_LOG'] == 'debug' 28 | constr = MIPPeR::Constraint.new problem.query_vars[index][query] + 29 | problem.index_vars[index] * -1, 30 | :<=, 0, name 31 | problem.model << constr 32 | end 33 | end 34 | end 35 | end 36 | 37 | # The single constraint used to enforce a maximum storage cost 38 | class SpaceConstraint < Constraint 39 | # Add space constraint if needed 40 | def self.apply(problem) 41 | return unless problem.data[:max_space].finite? 42 | 43 | fail 'Space constraint not supported when grouping by ID graph' \ 44 | if problem.data[:by_id_graph] 45 | 46 | space = problem.total_size 47 | constr = MIPPeR::Constraint.new space, :<=, 48 | problem.data[:max_space] * 1.0, 49 | 'max_space' 50 | problem.model << constr 51 | end 52 | end 53 | 54 | # Constraints that force each query to have an available plan 55 | class CompletePlanConstraints < Constraint 56 | # Add the discovered constraints to the problem 57 | def self.add_query_constraints(query, q, constraints, problem) 58 | constraints.each do |entities, constraint| 59 | name = "q#{q}_#{entities.map(&:name).join '_'}" \ 60 | if ENV['NOSE_LOG'] == 'debug' 61 | 62 | # If this is a support query, then we might not need a plan 63 | if query.is_a? SupportQuery 64 | # Find the index associated with the support query and make 65 | # the requirement of a plan conditional on this index 66 | index_var = if problem.data[:by_id_graph] 67 | problem.index_vars[query.index.to_id_graph] 68 | else 69 | problem.index_vars[query.index] 70 | end 71 | next if index_var.nil? 72 | 73 | constr = MIPPeR::Constraint.new constraint + index_var * -1.0, 74 | :==, 0, name 75 | else 76 | constr = MIPPeR::Constraint.new constraint, :==, 1, name 77 | end 78 | 79 | problem.model << constr 80 | end 81 | end 82 | 83 | # Add complete query plan constraints 84 | def self.apply_query(query, q, problem) 85 | entities = query.join_order 86 | query_constraints = Hash[entities.each_cons(2).map do |e, next_e| 87 | [[e, next_e], MIPPeR::LinExpr.new] 88 | end] 89 | 90 | # Add the sentinel entities at the end and beginning 91 | last = Entity.new '__LAST__' 92 | query_constraints[[entities.last, last]] = MIPPeR::LinExpr.new 93 | first = Entity.new '__FIRST__' 94 | query_constraints[[entities.first, first]] = MIPPeR::LinExpr.new 95 | 96 | problem.data[:costs][query].each do |index, (steps, _)| 97 | # All indexes should advance a step if possible unless 98 | # this is either the last step from IDs to entity 99 | # data or the first step going from data to IDs 100 | index_step = steps.first 101 | fail if entities.length > 1 && index.graph.size == 1 && \ 102 | !(steps.last.state.answered? || 103 | index_step.parent.is_a?(Plans::RootPlanStep)) 104 | 105 | # Join each step in the query graph 106 | index_var = problem.query_vars[index][query] 107 | index_entities = index.graph.entities.sort_by do |entity| 108 | entities.index entity 109 | end 110 | index_entities.each_cons(2) do |entity, next_entity| 111 | # Make sure the constraints go in the correct direction 112 | if query_constraints.key?([entity, next_entity]) 113 | query_constraints[[entity, next_entity]] += index_var 114 | else 115 | query_constraints[[next_entity, entity]] += index_var 116 | end 117 | end 118 | 119 | # If this query has been answered, add the jump to the last step 120 | query_constraints[[entities.last, last]] += index_var \ 121 | if steps.last.state.answered? 122 | 123 | # If this index is the first step, add this index to the beginning 124 | query_constraints[[entities.first, first]] += index_var \ 125 | if index_step.parent.is_a?(Plans::RootPlanStep) 126 | 127 | # Ensure the previous index is available 128 | parent_index = index_step.parent.parent_index 129 | next if parent_index.nil? 130 | 131 | parent_var = problem.query_vars[parent_index][query] 132 | name = "q#{q}_#{index.key}_parent" if ENV['NOSE_LOG'] == 'debug' 133 | constr = MIPPeR::Constraint.new index_var * 1.0 + parent_var * -1.0, 134 | :<=, 0, name 135 | problem.model << constr 136 | end 137 | 138 | # Ensure we have exactly one index on each component of the query graph 139 | add_query_constraints query, q, query_constraints, problem 140 | end 141 | end 142 | end 143 | end 144 | -------------------------------------------------------------------------------- /spec/backend/cassandra_backend_spec.rb: -------------------------------------------------------------------------------- 1 | require 'nose/backend/cassandra' 2 | 3 | module NoSE 4 | module Backend 5 | shared_context 'dummy Cassandra backend' do 6 | include_context 'dummy cost model' 7 | include_context 'entities' 8 | 9 | let(:backend) { CassandraBackend.new workload, [index], [], [], {} } 10 | end 11 | 12 | describe CassandraBackend do 13 | include_examples 'backend processing', cassandra: true do 14 | let(:config) do 15 | { 16 | name: 'cassandra', 17 | hosts: ['127.0.0.1'], 18 | port: 9042, 19 | keyspace: 'nose' 20 | } 21 | end 22 | 23 | let(:backend) do 24 | CassandraBackend.new plans.schema.model, plans.schema.indexes.values, 25 | [], [], config 26 | end 27 | 28 | before(:all) do 29 | next if RSpec.configuration.exclusion_filter[:cassandra] 30 | cluster = Cassandra.cluster hosts: ['127.0.0.1'], port: 9042, 31 | timeout: nil 32 | 33 | keyspace_definition = <<-KEYSPACE_CQL 34 | CREATE KEYSPACE "nose" 35 | WITH replication = { 36 | 'class': 'SimpleStrategy', 37 | 'replication_factor': 1 38 | } 39 | KEYSPACE_CQL 40 | 41 | session = cluster.connect 42 | 43 | keyspace = cluster.has_keyspace? 'nose' 44 | session.execute 'DROP KEYSPACE "nose"' if keyspace 45 | 46 | session.execute keyspace_definition 47 | end 48 | end 49 | 50 | it 'is a type of backend' do 51 | expect(CassandraBackend.subtype_name).to eq 'cassandra' 52 | end 53 | end 54 | 55 | describe CassandraBackend do 56 | context 'when not connected' do 57 | include_context 'dummy Cassandra backend' 58 | 59 | it 'can generate DDL for a simple index' do 60 | expect(backend.indexes_ddl).to match_array [ 61 | 'CREATE COLUMNFAMILY "TweetIndex" ("User_Username" text, ' \ 62 | '"Tweet_Timestamp" timestamp, "User_UserId" uuid, '\ 63 | '"Tweet_TweetId" uuid, ' \ 64 | '"Tweet_Body" text, PRIMARY KEY(("User_Username"), ' \ 65 | '"Tweet_Timestamp", "User_UserId", "Tweet_TweetId"));' 66 | ] 67 | end 68 | end 69 | end 70 | 71 | describe CassandraBackend::IndexLookupStatementStep do 72 | include_context 'dummy Cassandra backend' 73 | 74 | it 'can lookup data for an index based on a plan' do 75 | # Materialize a view for the given query 76 | query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \ 77 | 'WHERE User.Username = "Bob" ' \ 78 | 'ORDER BY Tweet.Timestamp LIMIT 10', 79 | workload.model 80 | index = query.materialize_view 81 | planner = Plans::QueryPlanner.new workload.model, [index], cost_model 82 | step = planner.min_plan(query).first 83 | 84 | # Validate the expected CQL query 85 | client = double('client') 86 | backend_query = 'SELECT "Tweet_Body", "User_Username", ' \ 87 | '"Tweet_Timestamp" ' + "FROM \"#{index.key}\" " \ 88 | 'WHERE "User_Username" = ? ' \ 89 | 'ORDER BY "Tweet_Timestamp" LIMIT 10' 90 | expect(client).to receive(:prepare).with(backend_query) \ 91 | .and_return(backend_query) 92 | 93 | # Define a simple array providing empty results 94 | results = [] 95 | def results.last_page? 96 | true 97 | end 98 | expect(client).to receive(:execute) do |query, values| 99 | expect(query).to eq backend_query 100 | expect(values[:arguments][0]).to eq 'Bob' 101 | end.and_return(results) 102 | 103 | step_class = CassandraBackend::IndexLookupStatementStep 104 | prepared = step_class.new client, query.all_fields, query.conditions, 105 | step, nil, step.parent 106 | prepared.process query.conditions, nil 107 | end 108 | end 109 | 110 | describe CassandraBackend::InsertStatementStep do 111 | include_context 'dummy Cassandra backend' 112 | 113 | it 'can insert into an index' do 114 | client = double('client') 115 | index = link.simple_index 116 | values = [{ 117 | 'Link_LinkId' => nil, 118 | 'Link_URL' => 'http://www.example.com/' 119 | }] 120 | backend_insert = "INSERT INTO #{index.key} (\"Link_LinkId\", " \ 121 | '"Link_URL") VALUES (?, ?)' 122 | expect(client).to receive(:prepare).with(backend_insert) \ 123 | .and_return(backend_insert) 124 | expect(client).to receive(:execute) do |query, values| 125 | expect(query).to eq backend_insert 126 | expect(values[:arguments][0]).to be_a Cassandra::Uuid 127 | expect(values[:arguments][1]).to eq 'http://www.example.com/' 128 | end 129 | 130 | step_class = CassandraBackend::InsertStatementStep 131 | prepared = step_class.new client, index, [link['LinkId'], link['URL']] 132 | prepared.process values 133 | end 134 | end 135 | 136 | describe CassandraBackend::DeleteStatementStep do 137 | include_context 'dummy Cassandra backend' 138 | 139 | it 'can delete from an index' do 140 | client = double('client') 141 | index = link.simple_index 142 | backend_delete = "DELETE FROM #{index.key} WHERE \"Link_LinkId\" = ?" 143 | expect(client).to receive(:prepare).with(backend_delete) \ 144 | .and_return(backend_delete) 145 | expect(client).to receive(:execute) do |query, values| 146 | expect(query).to eq backend_delete 147 | expect(values[:arguments][0]).to be_a Cassandra::Uuid 148 | end 149 | 150 | step_class = CassandraBackend::DeleteStatementStep 151 | prepared = step_class.new client, index 152 | prepared.process [links.first['Link_LinkId']] 153 | end 154 | end 155 | end 156 | end 157 | -------------------------------------------------------------------------------- /spec/indexes_spec.rb: -------------------------------------------------------------------------------- 1 | module NoSE 2 | describe Index do 3 | include_context 'entities' 4 | 5 | let(:equality_query) do 6 | Statement.parse 'SELECT Tweet.Body FROM Tweet WHERE Tweet.TweetId = ?', 7 | workload.model 8 | end 9 | let(:combo_query) do 10 | Statement.parse 'SELECT Tweet.Body FROM Tweet ' \ 11 | 'WHERE Tweet.Timestamp > ? ' \ 12 | 'AND Tweet.TweetId = ?', workload.model 13 | end 14 | let(:order_query) do 15 | Statement.parse 'SELECT Tweet.Body FROM Tweet WHERE Tweet.TweetId = ? ' \ 16 | 'ORDER BY Tweet.Timestamp', workload.model 17 | end 18 | 19 | before(:each) do 20 | workload.add_statement equality_query 21 | workload.add_statement combo_query 22 | workload.add_statement order_query 23 | end 24 | 25 | it 'can return fields by field ID' do 26 | expect(index['Tweet_Body']).to eq(tweet['Body']) 27 | end 28 | 29 | it 'contains fields' do 30 | index = Index.new [tweet['TweetId']], [], [tweet['Body']], 31 | QueryGraph::Graph.from_path([tweet.id_field]) 32 | expect(index.contains_field? tweet['TweetId']).to be true 33 | end 34 | 35 | it 'can store additional fields' do 36 | index = Index.new [tweet['TweetId']], [], [tweet['Body']], 37 | QueryGraph::Graph.from_path([tweet.id_field]) 38 | expect(index.contains_field? tweet['Body']).to be true 39 | end 40 | 41 | it 'can calculate its size' do 42 | index = Index.new [tweet['TweetId']], [], [tweet['Body']], 43 | QueryGraph::Graph.from_path([tweet.id_field]) 44 | entry_size = tweet['TweetId'].size + tweet['Body'].size 45 | expect(index.entry_size).to eq(entry_size) 46 | expect(index.size).to eq(entry_size * tweet.count) 47 | end 48 | 49 | context 'when materializing views' do 50 | it 'supports equality predicates' do 51 | index = equality_query.materialize_view 52 | expect(index.hash_fields).to eq([tweet['TweetId']].to_set) 53 | end 54 | 55 | it 'support range queries' do 56 | index = combo_query.materialize_view 57 | expect(index.order_fields).to eq([tweet['Timestamp']]) 58 | end 59 | 60 | it 'supports multiple predicates' do 61 | index = combo_query.materialize_view 62 | expect(index.hash_fields).to eq([tweet['TweetId']].to_set) 63 | expect(index.order_fields).to eq([tweet['Timestamp']]) 64 | end 65 | 66 | it 'supports order by' do 67 | index = order_query.materialize_view 68 | expect(index.order_fields).to eq([tweet['Timestamp']]) 69 | end 70 | 71 | it 'keeps a static key' do 72 | index = combo_query.materialize_view 73 | expect(index.key).to eq 'i1632091596' 74 | end 75 | 76 | it 'includes only one entity in the hash fields' do 77 | query = Statement.parse 'SELECT Tweet.TweetId FROM Tweet.User ' \ 78 | 'WHERE Tweet.Timestamp = ? AND User.City = ?', 79 | workload.model 80 | index = query.materialize_view 81 | expect(index.hash_fields.map(&:parent).uniq).to have(1).item 82 | end 83 | end 84 | 85 | it 'can tell if it maps identities for a field' do 86 | index = Index.new [tweet['TweetId']], [], [tweet['Body']], 87 | QueryGraph::Graph.from_path([tweet.id_field]) 88 | expect(index.identity?).to be true 89 | end 90 | 91 | it 'can be created to map entity fields by id' do 92 | index = tweet.simple_index 93 | expect(index.hash_fields).to eq([tweet['TweetId']].to_set) 94 | expect(index.order_fields).to eq([]) 95 | expect(index.extra).to eq([ 96 | tweet['Body'], 97 | tweet['Timestamp'], 98 | tweet['Retweets'] 99 | ].to_set) 100 | expect(index.key).to eq 'Tweet' 101 | end 102 | 103 | context 'when checking validity' do 104 | it 'cannot have empty hash fields' do 105 | expect do 106 | Index.new [], [], [tweet['TweetId']], 107 | QueryGraph::Graph.from_path([tweet.id_field]) 108 | end.to raise_error InvalidIndexException 109 | end 110 | 111 | it 'cannot have hash fields involving multiple entities' do 112 | expect do 113 | Index.new [tweet['Body'], user['City']], 114 | [tweet.id_field, user.id_field], [], 115 | QueryGraph::Graph.from_path([tweet.id_field, 116 | tweet['User']]) 117 | end.to raise_error InvalidIndexException 118 | end 119 | 120 | it 'must have fields at the start of the path' do 121 | expect do 122 | Index.new [tweet['TweetId']], [], [], 123 | QueryGraph::Graph.from_path([tweet.id_field, 124 | tweet['User']]) 125 | end.to raise_error InvalidIndexException 126 | end 127 | 128 | it 'must have fields at the end of the path' do 129 | expect do 130 | Index.new [user['City']], [], [], 131 | QueryGraph::Graph.from_path([tweet.id_field, 132 | tweet['User']]) 133 | end.to raise_error InvalidIndexException 134 | end 135 | end 136 | 137 | context 'when reducing to an ID graph' do 138 | it 'moves non-ID fields to extra data' do 139 | index = Index.new [user['City']], [user['UserId']], [], 140 | QueryGraph::Graph.from_path([user.id_field]) 141 | id_graph = index.to_id_graph 142 | 143 | expect(id_graph.hash_fields).to match_array [user['UserId']] 144 | expect(id_graph.order_fields).to be_empty 145 | expect(id_graph.extra).to match_array [user['City']] 146 | end 147 | 148 | it 'does not change indexes which are already ID paths' do 149 | index = Index.new [user['UserId']], [tweet['TweetId']], 150 | [tweet['Body']], QueryGraph::Graph.from_path( 151 | [user.id_field, user['Tweets']] 152 | ) 153 | id_graph = index.to_id_graph 154 | 155 | expect(id_graph).to eq(index) 156 | end 157 | end 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /lib/nose/backend/file.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module NoSE 4 | module Backend 5 | # Simple backend which persists data to a file 6 | class FileBackend < Backend 7 | include Subtype 8 | 9 | def initialize(model, indexes, plans, update_plans, config) 10 | super 11 | 12 | # Try to load data from file or start fresh 13 | @index_data = if !config[:file].nil? && File.file?(config[:file]) 14 | Marshal.load File.open(config[:file]) 15 | else 16 | {} 17 | end 18 | 19 | # Ensure the data is saved when we exit 20 | ObjectSpace.define_finalizer self, self.class.finalize(@index_data, 21 | config[:file]) 22 | end 23 | 24 | # Save data when the object is destroyed 25 | def self.finalize(index_data, file) 26 | proc do 27 | if !file.nil? 28 | Marshal.dump(index_data, File.open(file, 'w')) 29 | end 30 | end 31 | end 32 | 33 | # Check for an empty array for the data 34 | def index_empty?(index) 35 | !index_exists?(index) || @index_data[index.key].empty? 36 | end 37 | 38 | # Check if we have prepared space for this index 39 | def index_exists?(index) 40 | @index_data.key? index.key 41 | end 42 | 43 | # @abstract Subclasses implement to allow inserting 44 | def index_insert_chunk(index, chunk) 45 | @index_data[index.key].concat chunk 46 | end 47 | 48 | # Generate a simple UUID 49 | def generate_id 50 | SecureRandom.uuid 51 | end 52 | 53 | # Allocate space for data on the new indexes 54 | def indexes_ddl(execute = false, skip_existing = false, 55 | drop_existing = false) 56 | @indexes.each do |index| 57 | # Do the appropriate behaviour based on the flags passed in 58 | if index_exists?(index) 59 | next if skip_existing 60 | fail unless drop_existing 61 | end 62 | 63 | @index_data[index.key] = [] 64 | end if execute 65 | 66 | # We just use the original index definition as DDL 67 | @indexes.map(&:inspect) 68 | end 69 | 70 | # Sample a number of values from the given index 71 | def index_sample(index, count) 72 | data = @index_data[index.key] 73 | data.nil? ? [] : data.sample(count) 74 | end 75 | 76 | # We just produce the data here which can be manipulated as needed 77 | # @return [Hash] 78 | def client 79 | @index_data 80 | end 81 | 82 | # Provide some helper functions which allow the matching of rows 83 | # based on a set of list of conditions 84 | module RowMatcher 85 | # Check if a row matches the given condition 86 | # @return [Boolean] 87 | def row_matches?(row, conditions) 88 | row_matches_eq?(row, conditions) && 89 | row_matches_range?(row, conditions) 90 | end 91 | 92 | # Check if a row matches the given condition on equality predicates 93 | # @return [Boolean] 94 | def row_matches_eq?(row, conditions) 95 | @eq_fields.all? do |field| 96 | row[field.id] == conditions.find { |c| c.field == field }.value 97 | end 98 | end 99 | 100 | # Check if a row matches the given condition on the range predicate 101 | # @return [Boolean] 102 | def row_matches_range?(row, conditions) 103 | return true if @range_field.nil? 104 | 105 | range_cond = conditions.find { |c| c.field == @range_field } 106 | row[@range_field.id].send range_cond.operator, range_cond.value 107 | end 108 | end 109 | 110 | # Look up data on an index in the backend 111 | class IndexLookupStatementStep < Backend::IndexLookupStatementStep 112 | include RowMatcher 113 | 114 | # Filter all the rows in the specified index to those requested 115 | def process(conditions, results) 116 | # Get the set of conditions we need to process 117 | results = initial_results(conditions) if results.nil? 118 | condition_list = result_conditions conditions, results 119 | 120 | # Loop through all rows to find the matching ones 121 | rows = @client[@index.key] || [] 122 | selected = condition_list.flat_map do |condition| 123 | rows.select { |row| row_matches? row, condition } 124 | end.compact 125 | 126 | # Apply the limit and only return selected fields 127 | field_ids = Set.new @step.fields.map(&:id).to_set 128 | selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row| 129 | row.select { |k, _| field_ids.include? k } 130 | end 131 | end 132 | end 133 | 134 | # Insert data into an index on the backend 135 | class InsertStatementStep < Backend::InsertStatementStep 136 | # Add new rows to the index 137 | def process(results) 138 | key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set 139 | 140 | results.each do |row| 141 | # Pick out primary key fields we can use to match 142 | conditions = row.select do |field_id| 143 | key_ids.include? field_id 144 | end 145 | 146 | # If we have all the primary keys, check for a match 147 | if conditions.length == key_ids.length 148 | # Try to find a row with this ID and update it 149 | matching_row = @client[index.key].find do |index_row| 150 | index_row.merge(conditions) == index_row 151 | end 152 | 153 | unless matching_row.nil? 154 | matching_row.merge! row 155 | next 156 | end 157 | end 158 | 159 | # Populate IDs as needed 160 | key_ids.each do |key_id| 161 | row[key_id] = SecureRandom.uuid if row[key_id].nil? 162 | end 163 | 164 | @client[index.key] << row 165 | end 166 | end 167 | end 168 | 169 | # Delete data from an index on the backend 170 | class DeleteStatementStep < Backend::DeleteStatementStep 171 | include RowMatcher 172 | 173 | # Remove rows matching the results from the dataset 174 | def process(results) 175 | # Loop over all rows 176 | @client[index.key].reject! do |row| 177 | # Check against all results 178 | results.any? do |result| 179 | # If all fields match, drop the row 180 | result.all? do |field, value| 181 | row[field] == value 182 | end 183 | end 184 | end 185 | end 186 | end 187 | end 188 | end 189 | end 190 | -------------------------------------------------------------------------------- /lib/nose/loader/mysql.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # This is optional so other things can run under JRuby, 4 | # however this loader won't work so we need to use MRI 5 | begin 6 | require 'mysql2' 7 | rescue LoadError 8 | require 'mysql' 9 | end 10 | 11 | module NoSE 12 | module Loader 13 | # Load data from a MySQL database into a backend 14 | class MysqlLoader < LoaderBase 15 | def initialize(workload = nil, backend = nil) 16 | @logger = Logging.logger['nose::loader::mysqlloader'] 17 | 18 | @workload = workload 19 | @backend = backend 20 | end 21 | 22 | # Load a generated set of indexes with data from MySQL 23 | def load(indexes, config, show_progress = false, limit = nil, 24 | skip_existing = true) 25 | indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph 26 | 27 | # XXX Assuming backend is thread-safe 28 | Parallel.each(indexes, in_threads: 2) do |index| 29 | load_index index, config, show_progress, limit, skip_existing 30 | end 31 | end 32 | 33 | # Read all tables in the database and construct a workload object 34 | def workload(config) 35 | client = new_client config 36 | 37 | workload = Workload.new 38 | results = if @array_options 39 | client.query('SHOW TABLES').each(**@array_options) 40 | else 41 | client.query('SHOW TABLES').each 42 | end 43 | 44 | results.each do |table, *| 45 | # TODO: Handle foreign keys 46 | workload << entity_for_table(client, table) 47 | end 48 | 49 | workload 50 | end 51 | 52 | private 53 | 54 | # Create a new client from the given configuration 55 | def new_client(config) 56 | if Object.const_defined?(:Mysql2) 57 | @query_options = { stream: true, cache_rows: false } 58 | @array_options = { as: :array } 59 | Mysql2::Client.new host: config[:host], 60 | username: config[:username], 61 | password: config[:password], 62 | database: config[:database] 63 | else 64 | @query_options = false 65 | @array_options = false 66 | Mysql.connect config[:host], config[:username], config[:password], 67 | config[:database] 68 | end 69 | end 70 | 71 | # Load a single index into the backend 72 | # @return [void] 73 | def load_index(index, config, show_progress, limit, skip_existing) 74 | client = new_client config 75 | 76 | # Skip this index if it's not empty 77 | if skip_existing && !@backend.index_empty?(index) 78 | @logger.info "Skipping index #{index.inspect}" if show_progress 79 | return 80 | end 81 | @logger.info index.inspect if show_progress 82 | 83 | sql, fields = index_sql index, limit 84 | results = if @query_options 85 | client.query(sql, **@query_options) 86 | else 87 | client.query(sql).map { |row| hash_from_row row, fields } 88 | end 89 | 90 | result_chunk = [] 91 | results.each do |result| 92 | result_chunk.push result 93 | next if result_chunk.length < 1000 94 | 95 | @backend.index_insert_chunk index, result_chunk 96 | result_chunk = [] 97 | end 98 | @backend.index_insert_chunk index, result_chunk \ 99 | unless result_chunk.empty? 100 | end 101 | 102 | # Construct a hash from the given row returned by the client 103 | # @return [Hash] 104 | def hash_from_row(row, fields) 105 | row_hash = {} 106 | fields.each_with_index do |field, i| 107 | value = field.class.value_from_string row[i] 108 | row_hash[field.id] = value 109 | end 110 | 111 | row_hash 112 | end 113 | 114 | # Get all the fields selected by this index 115 | def index_sql_select(index) 116 | fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a 117 | 118 | [fields, fields.map do |field| 119 | "#{field.parent.name}.#{field.name} AS " \ 120 | "#{field.parent.name}_#{field.name}" 121 | end] 122 | end 123 | 124 | # Get the list of tables along with the join condition 125 | # for a query to fetch index data 126 | # @return [String] 127 | def index_sql_tables(index) 128 | # Create JOIN statements 129 | tables = index.graph.entities.map(&:name).join ' JOIN ' 130 | return tables if index.graph.size == 1 131 | 132 | tables << ' WHERE ' 133 | tables << index.path.each_cons(2).map do |_prev_key, key| 134 | key = key.reverse if key.relationship == :many 135 | "#{key.parent.name}.#{key.name}=" \ 136 | "#{key.entity.name}.#{key.entity.id_field.name}" 137 | end.join(' AND ') 138 | 139 | tables 140 | end 141 | 142 | # Construct a SQL statement to fetch the data to populate this index 143 | # @return [String] 144 | def index_sql(index, limit = nil) 145 | # Get all the necessary fields 146 | fields, select = index_sql_select index 147 | 148 | # Construct the join condition 149 | tables = index_sql_tables index 150 | 151 | query = "SELECT #{select.join ', '} FROM #{tables}" 152 | query += " LIMIT #{limit}" unless limit.nil? 153 | 154 | @logger.debug query 155 | [query, fields] 156 | end 157 | 158 | # Generate an entity definition from a given table 159 | # @return [Entity] 160 | def entity_for_table(client, table) 161 | entity = Entity.new table 162 | count = client.query("SELECT COUNT(*) FROM #{table}").first 163 | entity.count = count.is_a?(Hash) ? count.values.first : count 164 | 165 | describe = if @array_options 166 | client.query("DESCRIBE #{table}").each(**@array_options) 167 | else 168 | client.query("DESCRIBE #{table}").each 169 | end 170 | 171 | describe.each do |name, type, _, key| 172 | field_class = key == 'PRI' ? Fields::IDField : field_class(type) 173 | entity << field_class.new(name) 174 | end 175 | 176 | entity 177 | end 178 | 179 | # Produce the Ruby class used to represent a MySQL type 180 | # @return [Class] 181 | def field_class(type) 182 | case type 183 | when /datetime/ 184 | Fields::DateField 185 | when /float/ 186 | Fields::FloatField 187 | when /text/ 188 | # TODO: Get length 189 | Fields::StringField 190 | when /varchar\(([0-9]+)\)/ 191 | # TODO: Use length 192 | Fields::StringField 193 | when /(tiny)?int/ 194 | Fields::IntegerField 195 | end 196 | end 197 | end 198 | end 199 | end 200 | -------------------------------------------------------------------------------- /workloads/rubis.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | NoSE::Workload.new do 4 | Model 'rubis' 5 | 6 | # Define queries and their relative weights, weights taken from below 7 | # http://rubis.ow2.org/results/SB-BMP/Bidding/JBoss-SB-BMP-Bi-1500/perf.html#run_stat 8 | # http://rubis.ow2.org/results/SB-BMP/Browsing/JBoss-SB-BMP-Br-1500/perf.html#run_stat 9 | DefaultMix :browsing 10 | 11 | Group 'BrowseCategories', browsing: 4.44, 12 | bidding: 7.65, 13 | write_medium: 7.65, 14 | write_heavy: 7.65 do 15 | Q 'SELECT users.nickname, users.password FROM users WHERE users.id = ? -- 1' 16 | # XXX Must have at least one equality predicate 17 | Q 'SELECT categories.id, categories.name FROM categories WHERE ' \ 18 | 'categories.dummy = 1 -- 2' 19 | end 20 | 21 | Group 'ViewBidHistory', browsing: 2.38, 22 | bidding: 1.54, 23 | write_medium: 1.54, 24 | write_heavy: 1.54 do 25 | Q 'SELECT items.name FROM items WHERE items.id = ? -- 3' 26 | Q 'SELECT users.id, users.nickname, bids.id, item.id, bids.qty, ' \ 27 | 'bids.bid, bids.date FROM users.bids.item WHERE item.id = ? ' \ 28 | 'ORDER BY bids.date -- 4' 29 | end 30 | 31 | Group 'ViewItem', browsing: 22.95, 32 | bidding: 14.17, 33 | write_medium: 14.17, 34 | write_heavy: 14.17 do 35 | Q 'SELECT items.* FROM items WHERE items.id = ? -- 5' 36 | Q 'SELECT bids.* FROM items.bids WHERE items.id = ? -- 6' 37 | end 38 | 39 | Group 'SearchItemsByCategory', browsing: 27.77, 40 | bidding: 15.94, 41 | write_medium: 15.94, 42 | write_heavy: 15.94 do 43 | Q 'SELECT items.id, items.name, items.initial_price, items.max_bid, ' \ 44 | 'items.nb_of_bids, items.end_date FROM items.category WHERE ' \ 45 | 'category.id = ? AND items.end_date >= ? LIMIT 25 -- 7' 46 | end 47 | 48 | Group 'ViewUserInfo', browsing: 4.41, 49 | bidding: 2.48, 50 | write_medium: 2.48, 51 | write_heavy: 2.48 do 52 | # XXX Not including region name below 53 | Q 'SELECT users.* FROM users WHERE users.id = ? -- 8' 54 | Q 'SELECT comments.id, comments.rating, comments.date, comments.comment ' \ 55 | 'FROM comments.to_user WHERE to_user.id = ? -- 9' 56 | end 57 | 58 | Group 'RegisterItem', bidding: 0.53, 59 | write_medium: 0.53 * 10, 60 | write_heavy: 0.53 * 100 do 61 | Q 'INSERT INTO items SET id=?, name=?, description=?, initial_price=?, ' \ 62 | 'quantity=?, reserve_price=?, buy_now=?, nb_of_bids=0, max_bid=0, ' \ 63 | 'start_date=?, end_date=? AND CONNECT TO category(?), seller(?) -- 10' 64 | end 65 | 66 | Group 'RegisterUser', bidding: 1.07, 67 | write_medium: 1.07 * 10, 68 | write_heavy: 1.07 * 100 do 69 | Q 'INSERT INTO users SET id=?, firstname=?, lastname=?, nickname=?, ' \ 70 | 'password=?, email=?, rating=0, balance=0, creation_date=? ' \ 71 | 'AND CONNECT TO region(?) -- 11' 72 | end 73 | 74 | Group 'BuyNow', bidding: 1.16, 75 | write_medium: 1.16, 76 | write_heavy: 1.16 do 77 | Q 'SELECT users.nickname FROM users WHERE users.id=? -- 12' 78 | Q 'SELECT items.* FROM items WHERE items.id=? -- 13' 79 | end 80 | 81 | Group 'StoreBuyNow', bidding: 1.10, 82 | write_medium: 1.10 * 10, 83 | write_heavy: 1.10 * 100 do 84 | Q 'SELECT items.quantity, items.nb_of_bids, items.end_date FROM items ' \ 85 | 'WHERE items.id=? -- 14' 86 | Q 'UPDATE items SET quantity=?, nb_of_bids=?, end_date=? WHERE items.id=? -- 15' 87 | Q 'INSERT INTO buynow SET id=?, qty=?, date=? ' \ 88 | 'AND CONNECT TO item(?), buyer(?) -- 16' 89 | end 90 | 91 | Group 'PutBid', bidding: 5.40, 92 | write_medium: 5.40, 93 | write_heavy: 5.40 do 94 | Q 'SELECT users.nickname, users.password FROM users WHERE users.id=? -- 17' 95 | Q 'SELECT items.* FROM items WHERE items.id=? -- 18' 96 | Q 'SELECT bids.qty, bids.date FROM bids.item WHERE item.id=? ' \ 97 | 'ORDER BY bids.bid LIMIT 2 -- 19' 98 | end 99 | 100 | Group 'StoreBid', bidding: 3.74, 101 | write_medium: 3.74 * 10, 102 | write_heavy: 3.74 * 100 do 103 | Q 'INSERT INTO bids SET id=?, qty=?, bid=?, date=? ' \ 104 | 'AND CONNECT TO item(?), user(?) -- 20' 105 | Q 'SELECT items.nb_of_bids, items.max_bid FROM items WHERE items.id=? -- 21' 106 | Q 'UPDATE items SET nb_of_bids=?, max_bid=? WHERE items.id=? -- 22' 107 | end 108 | 109 | Group 'PutComment', bidding: 0.46, 110 | write_medium: 0.46, 111 | write_heavy: 0.46 do 112 | Q 'SELECT users.nickname, users.password FROM users WHERE users.id=? -- 23' 113 | Q 'SELECT items.* FROM items WHERE items.id=? -- 24' 114 | Q 'SELECT users.* FROM users WHERE users.id=? -- 25' 115 | end 116 | 117 | Group 'StoreComment', bidding: 0.45, 118 | write_medium: 0.45 * 10, 119 | write_heavy: 0.45 * 100 do 120 | Q 'SELECT users.rating FROM users WHERE users.id=? -- 26' 121 | Q 'UPDATE users SET rating=? WHERE users.id=? -- 27' 122 | Q 'INSERT INTO comments SET id=?, rating=?, date=?, comment=? ' \ 123 | 'AND CONNECT TO to_user(?), from_user(?), item(?) -- 28' 124 | end 125 | 126 | Group 'AboutMe', bidding: 1.71, 127 | write_medium: 1.71, 128 | write_heavy: 1.71 do 129 | Q 'SELECT users.* FROM users WHERE users.id=? -- 29' 130 | Q 'SELECT comments_received.* FROM users.comments_received ' \ 131 | 'WHERE users.id = ? -- 30' 132 | Q 'SELECT from_user.nickname FROM comments.from_user WHERE comments.id = ? -- 31' 133 | Q 'SELECT bought_now.*, items.* FROM items.bought_now.buyer ' \ 134 | 'WHERE buyer.id = ? AND bought_now.date>=? -- 32' 135 | Q 'SELECT items.* FROM items.seller WHERE seller.id=? AND ' \ 136 | 'items.end_date >=? -- 33' 137 | Q 'SELECT items.* FROM items.bids.user WHERE user.id=? AND ' \ 138 | 'items.end_date>=? -- 34' 139 | end 140 | 141 | Group 'SearchItemsByRegion', browsing: 8.26, 142 | bidding: 6.34, 143 | write_medium: 6.34, 144 | write_heavy: 6.34 do 145 | Q 'SELECT items.id, items.name, items.initial_price, items.max_bid, ' \ 146 | 'items.nb_of_bids, items.end_date FROM ' \ 147 | 'items.seller WHERE seller.region.id = ? AND items.category.id = ? ' \ 148 | 'AND items.end_date >= ? LIMIT 25 -- 35' 149 | end 150 | 151 | Group 'BrowseRegions', browsing: 3.21, 152 | bidding: 5.39, 153 | write_medium: 5.39, 154 | write_heavy: 5.39 do 155 | # XXX Must have at least one equality predicate 156 | Q 'SELECT regions.id, regions.name FROM regions ' \ 157 | 'WHERE regions.dummy = 1 -- 36' 158 | end 159 | end 160 | -------------------------------------------------------------------------------- /experiments/rubis/rubis-schema.sql: -------------------------------------------------------------------------------- 1 | -- MySQL dump 10.13 Distrib 5.7.13-6, for debian-linux-gnu (x86_64) 2 | -- 3 | -- Host: localhost Database: rubis_big 4 | -- ------------------------------------------------------ 5 | -- Server version 5.7.13-6 6 | 7 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; 8 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; 9 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; 10 | /*!40101 SET NAMES utf8 */; 11 | /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; 12 | /*!40103 SET TIME_ZONE='+00:00' */; 13 | /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; 14 | /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; 15 | /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; 16 | /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; 17 | 18 | -- 19 | -- Table structure for table `bids` 20 | -- 21 | 22 | DROP TABLE IF EXISTS `bids`; 23 | /*!40101 SET @saved_cs_client = @@character_set_client */; 24 | /*!40101 SET character_set_client = utf8 */; 25 | CREATE TABLE `bids` ( 26 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 27 | `user_id` int(10) unsigned NOT NULL, 28 | `item_id` int(10) unsigned NOT NULL, 29 | `qty` int(10) unsigned NOT NULL, 30 | `bid` float unsigned NOT NULL, 31 | `max_bid` float unsigned NOT NULL, 32 | `date` datetime DEFAULT NULL, 33 | PRIMARY KEY (`id`), 34 | UNIQUE KEY `id` (`id`) 35 | ) ENGINE=InnoDB AUTO_INCREMENT=20000001 DEFAULT CHARSET=latin1; 36 | /*!40101 SET character_set_client = @saved_cs_client */; 37 | 38 | -- 39 | -- Table structure for table `buy_now` 40 | -- 41 | 42 | DROP TABLE IF EXISTS `buy_now`; 43 | /*!40101 SET @saved_cs_client = @@character_set_client */; 44 | /*!40101 SET character_set_client = utf8 */; 45 | CREATE TABLE `buy_now` ( 46 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 47 | `buyer_id` int(10) unsigned NOT NULL, 48 | `item_id` int(10) unsigned NOT NULL, 49 | `qty` int(10) unsigned NOT NULL, 50 | `date` datetime DEFAULT NULL, 51 | PRIMARY KEY (`id`), 52 | UNIQUE KEY `id` (`id`) 53 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 54 | /*!40101 SET character_set_client = @saved_cs_client */; 55 | 56 | -- 57 | -- Table structure for table `categories` 58 | -- 59 | 60 | DROP TABLE IF EXISTS `categories`; 61 | /*!40101 SET @saved_cs_client = @@character_set_client */; 62 | /*!40101 SET character_set_client = utf8 */; 63 | CREATE TABLE `categories` ( 64 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 65 | `name` varchar(50) DEFAULT NULL, 66 | PRIMARY KEY (`id`), 67 | UNIQUE KEY `id` (`id`) 68 | ) ENGINE=InnoDB AUTO_INCREMENT=51 DEFAULT CHARSET=latin1; 69 | /*!40101 SET character_set_client = @saved_cs_client */; 70 | 71 | -- 72 | -- Table structure for table `comments` 73 | -- 74 | 75 | DROP TABLE IF EXISTS `comments`; 76 | /*!40101 SET @saved_cs_client = @@character_set_client */; 77 | /*!40101 SET character_set_client = utf8 */; 78 | CREATE TABLE `comments` ( 79 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 80 | `from_user_id` int(10) unsigned NOT NULL, 81 | `to_user_id` int(10) unsigned NOT NULL, 82 | `item_id` int(10) unsigned NOT NULL, 83 | `rating` int(11) DEFAULT NULL, 84 | `date` datetime DEFAULT NULL, 85 | `comment` text, 86 | PRIMARY KEY (`id`), 87 | UNIQUE KEY `id` (`id`) 88 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 89 | /*!40101 SET character_set_client = @saved_cs_client */; 90 | 91 | -- 92 | -- Table structure for table `items` 93 | -- 94 | 95 | DROP TABLE IF EXISTS `items`; 96 | /*!40101 SET @saved_cs_client = @@character_set_client */; 97 | /*!40101 SET character_set_client = utf8 */; 98 | CREATE TABLE `items` ( 99 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 100 | `name` varchar(100) DEFAULT NULL, 101 | `description` text, 102 | `initial_price` float unsigned NOT NULL, 103 | `quantity` int(10) unsigned NOT NULL, 104 | `reserve_price` float unsigned DEFAULT '0', 105 | `buy_now` float unsigned DEFAULT '0', 106 | `nb_of_bids` int(10) unsigned DEFAULT '0', 107 | `max_bid` float unsigned DEFAULT '0', 108 | `start_date` datetime DEFAULT NULL, 109 | `end_date` datetime DEFAULT NULL, 110 | `seller` int(10) unsigned NOT NULL, 111 | `category` int(10) unsigned NOT NULL, 112 | PRIMARY KEY (`id`), 113 | UNIQUE KEY `id` (`id`) 114 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 115 | /*!40101 SET character_set_client = @saved_cs_client */; 116 | 117 | -- 118 | -- Table structure for table `old_items` 119 | -- 120 | 121 | DROP TABLE IF EXISTS `old_items`; 122 | /*!40101 SET @saved_cs_client = @@character_set_client */; 123 | /*!40101 SET character_set_client = utf8 */; 124 | CREATE TABLE `old_items` ( 125 | `id` int(10) unsigned NOT NULL, 126 | `name` varchar(100) DEFAULT NULL, 127 | `description` text, 128 | `initial_price` float unsigned NOT NULL, 129 | `quantity` int(10) unsigned NOT NULL, 130 | `reserve_price` float unsigned DEFAULT '0', 131 | `buy_now` float unsigned DEFAULT '0', 132 | `nb_of_bids` int(10) unsigned DEFAULT '0', 133 | `max_bid` float unsigned DEFAULT '0', 134 | `start_date` datetime DEFAULT NULL, 135 | `end_date` datetime DEFAULT NULL, 136 | `seller` int(10) unsigned NOT NULL, 137 | `category` int(10) unsigned NOT NULL, 138 | PRIMARY KEY (`id`), 139 | UNIQUE KEY `id` (`id`) 140 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 141 | /*!40101 SET character_set_client = @saved_cs_client */; 142 | 143 | -- 144 | -- Table structure for table `regions` 145 | -- 146 | 147 | DROP TABLE IF EXISTS `regions`; 148 | /*!40101 SET @saved_cs_client = @@character_set_client */; 149 | /*!40101 SET character_set_client = utf8 */; 150 | CREATE TABLE `regions` ( 151 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 152 | `name` varchar(25) DEFAULT NULL, 153 | PRIMARY KEY (`id`), 154 | UNIQUE KEY `id` (`id`) 155 | ) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=latin1; 156 | /*!40101 SET character_set_client = @saved_cs_client */; 157 | 158 | -- 159 | -- Table structure for table `users` 160 | -- 161 | 162 | DROP TABLE IF EXISTS `users`; 163 | /*!40101 SET @saved_cs_client = @@character_set_client */; 164 | /*!40101 SET character_set_client = utf8 */; 165 | CREATE TABLE `users` ( 166 | `id` int(10) unsigned NOT NULL AUTO_INCREMENT, 167 | `firstname` varchar(20) DEFAULT NULL, 168 | `lastname` varchar(20) DEFAULT NULL, 169 | `nickname` varchar(20) NOT NULL, 170 | `password` varchar(20) NOT NULL, 171 | `email` varchar(50) NOT NULL, 172 | `rating` int(11) DEFAULT NULL, 173 | `balance` float DEFAULT NULL, 174 | `creation_date` datetime DEFAULT NULL, 175 | `region` int(10) unsigned NOT NULL, 176 | PRIMARY KEY (`id`), 177 | UNIQUE KEY `id` (`id`) 178 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 179 | /*!40101 SET character_set_client = @saved_cs_client */; 180 | /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; 181 | 182 | /*!40101 SET SQL_MODE=@OLD_SQL_MODE */; 183 | /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; 184 | /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; 185 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; 186 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; 187 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; 188 | /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; 189 | 190 | -- Dump completed on 2016-08-20 23:00:10 191 | -------------------------------------------------------------------------------- /lib/nose/enumerator.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'logging' 4 | 5 | module NoSE 6 | # Produces potential indices to be used in schemas 7 | class IndexEnumerator 8 | def initialize(workload) 9 | @logger = Logging.logger['nose::enumerator'] 10 | 11 | @workload = workload 12 | end 13 | 14 | # Produce all possible indices for a given query 15 | # @return [Array] 16 | def indexes_for_query(query) 17 | @logger.debug "Enumerating indexes for query #{query.text}" 18 | 19 | range = if query.range_field.nil? 20 | query.order 21 | else 22 | [query.range_field] + query.order 23 | end 24 | 25 | eq = query.eq_fields.group_by(&:parent) 26 | eq.default_proc = ->(*) { [] } 27 | 28 | range = range.group_by(&:parent) 29 | range.default_proc = ->(*) { [] } 30 | 31 | query.graph.subgraphs.flat_map do |graph| 32 | indexes_for_graph graph, query.select, eq, range 33 | end.uniq << query.materialize_view 34 | end 35 | 36 | # Produce all possible indices for a given workload 37 | # @return [Set] 38 | def indexes_for_workload(additional_indexes = [], by_id_graph = false) 39 | queries = @workload.queries 40 | indexes = Parallel.map(queries) do |query| 41 | indexes_for_query(query).to_a 42 | end.inject(additional_indexes, &:+) 43 | 44 | # Add indexes generated for support queries 45 | supporting = support_indexes indexes, by_id_graph 46 | supporting += support_indexes supporting, by_id_graph 47 | indexes += supporting 48 | 49 | # Deduplicate indexes, combine them and deduplicate again 50 | indexes.uniq! 51 | combine_indexes indexes 52 | indexes.uniq! 53 | 54 | @logger.debug do 55 | "Indexes for workload:\n" + indexes.map.with_index do |index, i| 56 | "#{i} #{index.inspect}" 57 | end.join("\n") 58 | end 59 | 60 | indexes 61 | end 62 | 63 | private 64 | 65 | # Produce the indexes necessary for support queries for these indexes 66 | # @return [Array] 67 | def support_indexes(indexes, by_id_graph) 68 | # If indexes are grouped by ID graph, convert them before updating 69 | # since other updates will be managed automatically by index maintenance 70 | indexes = indexes.map(&:to_id_graph).uniq if by_id_graph 71 | 72 | # Collect all possible support queries 73 | queries = indexes.flat_map do |index| 74 | @workload.updates.flat_map do |update| 75 | update.support_queries(index) 76 | end 77 | end 78 | 79 | # Enumerate indexes for each support query 80 | queries.uniq! 81 | queries.flat_map do |query| 82 | indexes_for_query(query).to_a 83 | end 84 | end 85 | 86 | # Combine the data of indices based on matching hash fields 87 | def combine_indexes(indexes) 88 | no_order_indexes = indexes.select do |index| 89 | index.order_fields.empty? 90 | end 91 | no_order_indexes = no_order_indexes.group_by do |index| 92 | [index.hash_fields, index.graph] 93 | end 94 | 95 | no_order_indexes.each do |(hash_fields, graph), hash_indexes| 96 | extra_choices = hash_indexes.map(&:extra).uniq 97 | 98 | # XXX More combos? 99 | combos = extra_choices.combination(2) 100 | 101 | combos.map do |combo| 102 | indexes << Index.new(hash_fields, [], combo.inject(Set.new, &:+), 103 | graph) 104 | @logger.debug "Enumerated combined index #{indexes.last.inspect}" 105 | end 106 | end 107 | end 108 | 109 | # Get all possible choices of fields to use for equality 110 | # @return [Array] 111 | def eq_choices(graph, eq) 112 | entity_choices = graph.entities.flat_map do |entity| 113 | # Get the fields for the entity and add in the IDs 114 | entity_fields = eq[entity] << entity.id_field 115 | entity_fields.uniq! 116 | 1.upto(entity_fields.count).flat_map do |n| 117 | entity_fields.permutation(n).to_a 118 | end 119 | end 120 | 121 | 2.upto(graph.entities.length).flat_map do |n| 122 | entity_choices.permutation(n).map(&:flatten).to_a 123 | end + entity_choices 124 | end 125 | 126 | # Get fields which should be included in an index for the given graph 127 | # @return [Array] 128 | def extra_choices(graph, select, eq, range) 129 | choices = eq.values + range.values << select.to_a 130 | 131 | choices.each do |choice| 132 | choice.select { |field| graph.entities.include?(field.parent) } 133 | end 134 | 135 | choices.reject(&:empty?) << [] 136 | end 137 | 138 | # Get all possible indices which jump a given piece of a query graph 139 | # @return [Array] 140 | def indexes_for_graph(graph, select, eq, range) 141 | eq_choices = eq_choices graph, eq 142 | range_fields = graph.entities.map { |entity| range[entity] }.reduce(&:+) 143 | range_fields.uniq! 144 | order_choices = range_fields.prefixes.flat_map do |fields| 145 | fields.permutation.to_a 146 | end.uniq << [] 147 | extra_choices = extra_choices graph, select, eq, range 148 | extra_choices = 1.upto(extra_choices.length).flat_map do |n| 149 | extra_choices.combination(n).map(&:flatten).map(&:uniq) 150 | end.uniq 151 | 152 | # Generate all possible indices based on the field choices 153 | choices = eq_choices.product(extra_choices) 154 | indexes = choices.map! do |index, extra| 155 | indexes = [] 156 | 157 | order_choices.each do |order| 158 | # Append the primary key of the entities in the graph if needed 159 | order += graph.entities.sort_by(&:name).map(&:id_field) - 160 | (index + order) 161 | 162 | # Partition into the ordering portion 163 | index.partitions.each do |index_prefix, order_prefix| 164 | hash_fields = index_prefix.take_while do |field| 165 | field.parent == index.first.parent 166 | end 167 | order_fields = index_prefix[hash_fields.length..-1] + \ 168 | order_prefix + order 169 | extra_fields = extra - hash_fields - order_fields 170 | next if order_fields.empty? && extra_fields.empty? 171 | 172 | new_index = generate_index hash_fields, order_fields, extra_fields, 173 | graph 174 | indexes << new_index unless new_index.nil? 175 | end 176 | end 177 | 178 | indexes 179 | end.inject([], &:+) 180 | indexes.flatten! 181 | 182 | indexes 183 | end 184 | 185 | # Generate a new index and ignore if invalid 186 | # @return [Index] 187 | def generate_index(hash, order, extra, graph) 188 | begin 189 | index = Index.new hash, order.uniq, extra, graph 190 | @logger.debug { "Enumerated #{index.inspect}" } 191 | rescue InvalidIndexException 192 | # This combination of fields is not valid, that's ok 193 | index = nil 194 | end 195 | 196 | index 197 | end 198 | end 199 | end 200 | --------------------------------------------------------------------------------