├── experiments
    └── rubis
    │   ├── .gitignore
    │   ├── package.json
    │   ├── rubis-truncate.sql
    │   ├── rubis-update.sql
    │   ├── fake.js
    │   ├── README.md
    │   └── rubis-schema.sql
├── spec
    ├── .rubocop.yml
    ├── support
    │   ├── dummy_cost_model.rb
    │   ├── entities.rb
    │   └── backend.rb
    ├── backend
    │   ├── mongo_backend_spec.rb
    │   └── cassandra_backend_spec.rb
    ├── loader
    │   ├── csv_loader_spec.rb
    │   └── mysql_loader_spec.rb
    ├── schema_spec.rb
    ├── results_spec.rb
    ├── serialize_spec.rb
    ├── random_spec.rb
    ├── spec_helper.rb
    ├── cost_spec.rb
    ├── backend_spec.rb
    ├── query_graph_spec.rb
    ├── workload_spec.rb
    ├── enumerator_spec.rb
    ├── search_spec.rb
    ├── util_spec.rb
    ├── model_spec.rb
    └── indexes_spec.rb
├── Gemfile
├── assets
    ├── NSERC-logo.png
    └── packagecloud-logo.png
├── .scrutinizer.yml
├── workloads
    ├── .rubocop.yml
    ├── ebay.rb
    ├── eac.rb
    ├── rubis_synthetic.rb
    └── rubis.rb
├── .rspec
├── .yardopts
├── .github
    ├── dependabot.yml
    └── workflows
    │   └── ci.yml
├── .simplecov
├── .editorconfig
├── Rakefile
├── .rubocop.yml
├── yard_extensions.rb
├── schemas
    ├── ebay.rb
    ├── rubis_baseline.rb
    └── rubis_expert.rb
├── .gitignore
├── lib
    ├── nose
    │   ├── cost
    │   │   ├── entity_count.rb
    │   │   ├── field_size.rb
    │   │   ├── request_count.rb
    │   │   └── cassandra.rb
    │   ├── loader.rb
    │   ├── plans
    │   │   ├── limit.rb
    │   │   ├── sort.rb
    │   │   ├── update.rb
    │   │   └── filter.rb
    │   ├── random
    │   │   ├── watts_strogatz.rb
    │   │   └── barbasi_albert.rb
    │   ├── debug.rb
    │   ├── loader
    │   │   ├── random.rb
    │   │   ├── sql.rb
    │   │   ├── csv.rb
    │   │   └── mysql.rb
    │   ├── cost.rb
    │   ├── timing.rb
    │   ├── schema.rb
    │   ├── statements
    │   │   ├── delete.rb
    │   │   ├── update.rb
    │   │   ├── insert.rb
    │   │   ├── query.rb
    │   │   └── connection.rb
    │   ├── proxy.rb
    │   ├── model.rb
    │   ├── model
    │   │   └── entity.rb
    │   ├── plans.rb
    │   ├── search
    │   │   └── constraints.rb
    │   ├── backend
    │   │   └── file.rb
    │   └── enumerator.rb
    └── nose.rb
├── CITATION.cff
├── models
    ├── ebay.rb
    ├── eac.rb
    └── rubis.rb
├── templates
    └── workload.erb
├── plans
    └── ebay.rb
├── README.md
├── nose.gemspec
└── CONTRIBUTING.md


/experiments/rubis/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | 


--------------------------------------------------------------------------------
/spec/.rubocop.yml:
--------------------------------------------------------------------------------
1 | Documentation:
2 |   Enabled: false
3 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | source 'https://rubygems.org'
4 | 
5 | gemspec
6 | 


--------------------------------------------------------------------------------
/assets/NSERC-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelmior/NoSE/HEAD/assets/NSERC-logo.png


--------------------------------------------------------------------------------
/.scrutinizer.yml:
--------------------------------------------------------------------------------
1 | tools:
2 |     external_code_coverage:
3 |       timeout: 600 # 10 minute timeout
4 | 


--------------------------------------------------------------------------------
/workloads/.rubocop.yml:
--------------------------------------------------------------------------------
1 | GlobalVars:
2 |   Enabled: false
3 | 
4 | LineLength:
5 |   Enabled: false
6 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --format documentation
3 | --require spec_helper
4 | --tag ~mongo
5 | --tag ~mysql
6 | 


--------------------------------------------------------------------------------
/assets/packagecloud-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelmior/NoSE/HEAD/assets/packagecloud-logo.png


--------------------------------------------------------------------------------
/.yardopts:
--------------------------------------------------------------------------------
1 | --private
2 | --query '!(docstring.blank? && [:inspect, :to_s, :to_color, :hash, :state, :call].include?(name))'
3 | -
4 | LICENSE.md
5 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: 'bundler'
4 |     directory: '/'
5 |     schedule:
6 |       interval: 'weekly'
7 | 


--------------------------------------------------------------------------------
/.simplecov:
--------------------------------------------------------------------------------
1 | SimpleCov.formatters = [
2 |   SimpleCov::Formatter::HTMLFormatter
3 | ]
4 | 
5 | SimpleCov.start do
6 |   add_filter '/spec/'
7 |   add_filter '/vendor/'
8 | end
9 | 


--------------------------------------------------------------------------------
/experiments/rubis/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "name": "rubis-fake",
3 |   "version": "0.0.1",
4 |   "license": "MIT",
5 |   "dependencies": {
6 |     "mysql-faker": "0.0.5"
7 |   }
8 | }
9 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # editorconfig.org
 2 | 
 3 | root = true
 4 | 
 5 | [*]
 6 | indent_style = space
 7 | indent_size = 2
 8 | end_of_line = lf
 9 | charset = utf-8
10 | trim_trailing_whitespace = true
11 | insert_final_newline = true
12 | 


--------------------------------------------------------------------------------
/experiments/rubis/rubis-truncate.sql:
--------------------------------------------------------------------------------
1 | TRUNCATE TABLE `bids`;
2 | TRUNCATE TABLE `buynow`;
3 | TRUNCATE TABLE `categories`;
4 | TRUNCATE TABLE `comments`;
5 | TRUNCATE TABLE `items`;
6 | TRUNCATE TABLE `old_items`;
7 | TRUNCATE TABLE `regions`;
8 | TRUNCATE TABLE `users`;
9 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rspec/core/rake_task'
 4 | require 'yard'
 5 | require 'yard-thor'
 6 | require_relative 'yard_extensions'
 7 | 
 8 | # XXX: Patch OpenStruct for yard-thor
 9 | class OpenStruct
10 |   def delete(name)
11 |     delete_field name
12 |   end
13 | end
14 | 
15 | RSpec::Core::RakeTask.new(:spec)
16 | YARD::Rake::YardocTask.new(:doc)
17 | 
18 | task default: :spec
19 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | Metrics/AbcSize:
 2 |   Max: 20
 3 | 
 4 | Metrics/ClassLength:
 5 |   Max: 200
 6 | 
 7 | Metrics/CyclomaticComplexity:
 8 |   Max: 10
 9 | 
10 | Metrics/MethodLength:
11 |   Max: 20
12 | 
13 | Metrics/ModuleLength:
14 |   Exclude:
15 |     - 'spec/**/*'
16 | 
17 | Metrics/PerceivedComplexity:
18 |   Max: 10
19 | 
20 | Style/ClassAndModuleChildren:
21 |   Exclude:
22 |     - 'spec/**/*'
23 | 
24 | Style/SingleLineBlockParams:
25 |   Enabled: false
26 | 
27 | Style/SignalException:
28 |   EnforcedStyle: semantic
29 | 


--------------------------------------------------------------------------------
/yard_extensions.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Handler to add methods for Parslet rules
 4 | class ParsletHandler < YARD::Handlers::Ruby::Base
 5 |   handles method_call(:rule)
 6 |   namespace_only
 7 | 
 8 |   # Add a method for each Parlset rule
 9 |   def process
10 |     name = statement.parameters.first.jump(:tstring_content, :ident).source
11 |     object = YARD::CodeObjects::MethodObject.new namespace, name
12 |     register(object)
13 |     parse_block(statement.last.last, owner: object)
14 |   end
15 | end
16 | 


--------------------------------------------------------------------------------
/spec/support/dummy_cost_model.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Cost
 3 |     RSpec.shared_examples 'dummy cost model' do
 4 |       let(:cost_model) do
 5 |         # Simple cost model which just counts the number of indexes
 6 |         class DummyCost < NoSE::Cost::Cost
 7 |           include Subtype
 8 | 
 9 |           def index_lookup_cost(_step)
10 |             1
11 |           end
12 | 
13 |           def insert_cost(_step)
14 |             1
15 |           end
16 | 
17 |           def delete_cost(_step)
18 |             1
19 |           end
20 |         end
21 | 
22 |         DummyCost.new
23 |       end
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/spec/backend/mongo_backend_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'nose/backend/mongo'
 2 | 
 3 | module NoSE
 4 |   module Backend
 5 |     describe MongoBackend do
 6 |       include_examples 'backend processing', mongo: true do
 7 |         let(:config) do
 8 |           {
 9 |             name: 'mongo',
10 |             uri: 'mongodb://localhost:27017/',
11 |             database: 'nose'
12 |           }
13 |         end
14 | 
15 |         let(:backend) do
16 |           MongoBackend.new plans.schema.model, plans.schema.indexes.values,
17 |                            [], [], config
18 |         end
19 |       end
20 | 
21 |       it 'is a type of backend' do
22 |         expect(MongoBackend.subtype_name).to eq 'mongo'
23 |       end
24 |     end
25 |   end
26 | end
27 | 


--------------------------------------------------------------------------------
/schemas/ebay.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../lib/nose.rb'
 4 | 
 5 | NoSE::Schema.new do
 6 |   Model 'ebay'
 7 | 
 8 |   Index 'users_by_id' do
 9 |     Hash  users.UserID
10 |     Extra users['*']
11 |     Path  users.UserID
12 |   end
13 | 
14 |   Index 'items_by_id' do
15 |     Hash  items.ItemID
16 |     Extra items['*']
17 |     Path  items.ItemID
18 |   end
19 | 
20 |   Index 'likes_by_user' do
21 |     Hash    users.UserID
22 |     Ordered likes.LikedAt, likes.LikeID, items.ItemID
23 |     Path    users.UserID, users.likes, likes.item
24 |   end
25 | 
26 |   Index 'likes_by_item' do
27 |     Hash    items.ItemID
28 |     Ordered likes.LikedAt, likes.LikeID, users.UserID
29 |     Path    items.ItemID, items.likes, likes.user
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.gem
 2 | *.rbc
 3 | /.config
 4 | /coverage/
 5 | /InstalledFiles
 6 | /pkg/
 7 | /spec/reports/
 8 | /test/tmp/
 9 | /test/version_tmp/
10 | /tmp/
11 | 
12 | ## Documentation cache and generated files:
13 | /.yardoc/
14 | /_yardoc/
15 | /doc/
16 | /rdoc/
17 | /man/
18 | 
19 | ## Environment normalisation:
20 | /.bundle/
21 | /lib/bundler/man/
22 | 
23 | # for a library or gem, you might want to ignore these files since the code is
24 | # intended to run in multiple environments; otherwise, check them in:
25 | Gemfile.lock
26 | .ruby-version
27 | .ruby-gemset
28 | 
29 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
30 | .rvmrc
31 | 
32 | ## Configuration files
33 | *.yml
34 | !.scrutinizer.yml
35 | !.rubocop.yml
36 | !.github/**
37 | 
38 | ## Log files
39 | gurobi.log
40 | 


--------------------------------------------------------------------------------
/lib/nose/cost/entity_count.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Cost
 5 |     # A cost model which estimates the number of entities transferred
 6 |     class EntityCountCost < Cost
 7 |       include Subtype
 8 | 
 9 |       # Rough cost estimate as the number of entities retrieved at each step
10 |       # @return [Numeric]
11 |       def index_lookup_cost(step)
12 |         # Simply count the number of entities at each step
13 |         step.state.cardinality
14 |       end
15 | 
16 |       # Cost estimate as number of entities deleted
17 |       def delete_cost(step)
18 |         step.state.cardinality
19 |       end
20 | 
21 |       # Cost estimate as number of entities inserted
22 |       def insert_cost(step)
23 |         step.state.cardinality
24 |       end
25 |     end
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/workloads/ebay.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Insipired by the blog post below on data modeling in Cassandra
 4 | # www.ebaytechblog.com/2012/07/16/cassandra-data-modeling-best-practices-part-1/
 5 | 
 6 | NoSE::Workload.new do
 7 |   Model 'ebay'
 8 | 
 9 |   # Define queries and their relative weights
10 |   Q 'SELECT users.* FROM users WHERE users.UserID = ? -- 1'
11 |   Q 'SELECT items.* FROM items WHERE items.ItemID = ?'
12 |   Q 'SELECT items.* FROM items.likes.user WHERE user.UserID = ? ORDER BY likes.LikedAt'
13 |   Q 'SELECT users.* FROM users.likes.item WHERE item.ItemID = ? ORDER BY likes.LikedAt'
14 | 
15 |   Q 'INSERT INTO items SET ItemID = ?, Title = ?, Desc = ?'
16 |   Q 'INSERT INTO users SET UserID = ?, Name = ?, Email = ?'
17 |   Q 'INSERT INTO likes SET LikeID = ?, LikedAt = ? AND CONNECT TO user(?), item(?)'
18 | end
19 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # YAML 1.2
 2 | ---
 3 | version: 1.0.3
 4 | authors:
 5 |   -
 6 |     affiliation: "University of Waterloo"
 7 |     family-names: Mior
 8 |     given-names: "Michael Joseph"
 9 |     orcid: "https://orcid.org/0000-0002-4057-8726"
10 |   -
11 |     affiliation: "University of Waterloo"
12 |     family-names: Salem
13 |     given-names: Kenneth
14 |   -
15 |     affiliation: "Qatar Computing Research Institute"
16 |     family-names: Aboulnaga
17 |     given-names: Ashraf
18 |   -
19 |     affiliation: "HP Vertica"
20 |     family-names: Liu
21 |     given-names: Rui
22 | cff-version: "1.0.3"
23 | date-released: 2016-09-11
24 | doi: "10.5281/zenodo.2578159"
25 | license: "GPL-3.0-only"
26 | message: "If you use this software, please cite it using these metadata."
27 | repository-code: "https://github.com/michaelmior/NoSE"
28 | title: "NoSE: Automated schema design for NoSQL applications"
29 | ...
30 | 


--------------------------------------------------------------------------------
/models/ebay.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Insipired by the blog post below on data modeling in Cassandra
 4 | # www.ebaytechblog.com/2012/07/16/cassandra-data-modeling-best-practices-part-1/
 5 | 
 6 | # rubocop:disable all
 7 | 
 8 | NoSE::Model.new do
 9 |   # Define entities along with the size and cardinality of their fields
10 |   # as well as an estimated number of each entity
11 |   (Entity 'users' do
12 |     ID     'UserID'
13 |     String 'Name', 50
14 |     String 'Email', 50
15 |   end) * 100
16 | 
17 |   (Entity 'items' do
18 |     ID     'ItemID'
19 |     String 'Title', 50
20 |     String 'Desc', 200
21 |   end) * 1_000
22 | 
23 |   (Entity 'likes' do
24 |     ID         'LikeID'
25 |     Date       'LikedAt'
26 |   end) * 10_000
27 | 
28 |   HasOne 'user',    'likes',
29 |          {'likes' => 'users'}
30 |   HasOne 'item',    'likes',
31 |          {'likes' => 'items'}
32 | end
33 | 


--------------------------------------------------------------------------------
/lib/nose/cost/field_size.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Cost
 5 |     # A cost model which estimates the total size of data transferred
 6 |     class FieldSizeCost < Cost
 7 |       include Subtype
 8 | 
 9 |       # Rough cost estimate as the size of data returned
10 |       # @return [Numeric]
11 |       def index_lookup_cost(step)
12 |         # If we have an answer to the query, we only need
13 |         # to fetch the data fields which are selected
14 |         fields = step.index.all_fields
15 |         fields &= step.state.query.select if step.state.answered?
16 | 
17 |         step.state.cardinality * fields.sum_by(&:size)
18 |       end
19 | 
20 |       # Cost estimate as the size of an index entry
21 |       def delete_cost(step)
22 |         step.index.entry_size
23 |       end
24 | 
25 |       # Cost estimate as the size of an index entry
26 |       def insert_cost(step)
27 |         step.index.entry_size
28 |       end
29 |     end
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/lib/nose/cost/request_count.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Cost
 5 |     # A cost model which estimates the number of requests to the backend
 6 |     class RequestCountCost < Cost
 7 |       include Subtype
 8 | 
 9 |       # Rough cost estimate as the number of requests made
10 |       # @return [Numeric]
11 |       def index_lookup_cost(step)
12 |         # We always start with a single lookup, then the number
13 |         # of lookups is determined by the cardinality at the preceding step
14 |         if step.parent.is_a?(Plans::RootPlanStep)
15 |           1
16 |         else
17 |           step.state.cardinality
18 |         end
19 |       end
20 | 
21 |       # Cost estimate as number of entities deleted
22 |       def delete_cost(step)
23 |         step.state.cardinality
24 |       end
25 | 
26 |       # Cost estimate as number of entities inserted
27 |       def insert_cost(step)
28 |         step.state.cardinality
29 |       end
30 |     end
31 |   end
32 | end
33 | 


--------------------------------------------------------------------------------
/experiments/rubis/rubis-update.sql:
--------------------------------------------------------------------------------
 1 | alter table regions add column dummy tinyint(1);
 2 | update regions set dummy=1;
 3 | alter table comments change column from_user_id from_user int(10) unsigned not null, algorithm=inplace;
 4 | alter table comments change column to_user_id to_user int(10) unsigned not null, algorithm=inplace;
 5 | alter table categories add column dummy tinyint(1), algorithm=inplace;
 6 | update categories set dummy=1;
 7 | alter table comments change column item_id item int(10) unsigned not null, algorithm=inplace;
 8 | alter table bids change column item_id item int(10) unsigned not null, algorithm=inplace;
 9 | alter table bids change column user_id user int(10) unsigned not null, algorithm=inplace;
10 | rename table buy_now to buynow;
11 | alter table buynow change column buyer_id buyer int(10) unsigned not null, algorithm=inplace;
12 | alter table buynow change column item_id item int(10) unsigned not null, algorithm=inplace;
13 | alter table users change column nickname nickname varchar(50) not null, algorithm=inplace;
14 | 


--------------------------------------------------------------------------------
/lib/nose.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Namespace module for the whole project
 4 | module NoSE
 5 | end
 6 | 
 7 | require_relative 'nose/util'
 8 | 
 9 | require_relative 'nose/backend'
10 | require_relative 'nose/cost'
11 | require_relative 'nose/debug'
12 | require_relative 'nose/enumerator'
13 | require_relative 'nose/indexes'
14 | require_relative 'nose/loader'
15 | require_relative 'nose/model'
16 | require_relative 'nose/parser'
17 | require_relative 'nose/plans'
18 | require_relative 'nose/proxy'
19 | require_relative 'nose/query_graph'
20 | require_relative 'nose/random'
21 | require_relative 'nose/schema'
22 | require_relative 'nose/search'
23 | require_relative 'nose/statements'
24 | require_relative 'nose/timing'
25 | require_relative 'nose/workload'
26 | 
27 | require_relative 'nose/serialize'
28 | 
29 | # :nocov:
30 | require 'logging'
31 | 
32 | logger = Logging.logger['nose']
33 | logger.level = (ENV['NOSE_LOG'] || 'info').downcase.to_sym
34 | 
35 | logger.add_appenders Logging.appenders.stderr
36 | logger = nil # rubocop:disable Lint/UselessAssignment
37 | # :nocov:
38 | 


--------------------------------------------------------------------------------
/lib/nose/cost/cassandra.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Cost
 5 |     # A cost model which estimates the number of requests to the backend
 6 |     class CassandraCost < Cost
 7 |       include Subtype
 8 | 
 9 |       # Rough cost estimate as the number of requests made
10 |       # @return [Numeric]
11 |       def index_lookup_cost(step)
12 |         return nil if step.state.nil?
13 |         rows = step.state.cardinality
14 |         parts = step.state.hash_cardinality
15 | 
16 |         @options[:index_cost] + parts * @options[:partition_cost] +
17 |           rows * @options[:row_cost]
18 |       end
19 | 
20 |       # Cost estimate as number of entities deleted
21 |       def delete_cost(step)
22 |         return nil if step.state.nil?
23 |         step.state.cardinality * @options[:delete_cost]
24 |       end
25 | 
26 |       # Cost estimate as number of entities inserted
27 |       def insert_cost(step)
28 |         return nil if step.state.nil?
29 |         step.state.cardinality * @options[:insert_cost]
30 |       end
31 |     end
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/lib/nose/loader.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   # Loaders which insert data into indexes from external sources
 5 |   module Loader
 6 |     # Superclass for all data loaders
 7 |     class LoaderBase
 8 |       def initialize(workload = nil, backend = nil)
 9 |         @workload = workload
10 |         @backend = backend
11 |       end
12 | 
13 |       # :nocov:
14 |       # @abstract Subclasses should produce a workload
15 |       # @return [void]
16 |       def workload(_config)
17 |         fail NotImplementedError
18 |       end
19 |       # :nocov:
20 | 
21 |       # :nocov:
22 |       # @abstract Subclasses should load data for the given list of indexes
23 |       # @return [void]
24 |       def load(_indexes, _config, _show_progress = false, _limit = nil,
25 |                _skip_existing = true)
26 |         fail NotImplementedError
27 |       end
28 |       # :nocov:
29 | 
30 |       # @abstract Subclasses should generate a model from the external source
31 |       # :nocov:
32 |       def model(_config)
33 |         fail NotImplementedError
34 |       end
35 |       # :nocov:
36 |     end
37 |   end
38 | end
39 | 


--------------------------------------------------------------------------------
/models/eac.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | # rubocop:disable all
 3 | 
 4 | NoSE::Model.new do
 5 |   # Define entities along with the size and cardinality of their fields
 6 |   # as well as an estimated number of each entity
 7 | 
 8 |   (Entity 'Player' do
 9 |     ID      'PlayerID'
10 |     String  'PlayerName'
11 |     Integer 'PlayerFlags'
12 |     Boolean 'IsAdmin'
13 |   end) * 100_000
14 | 
15 |   (Entity 'Session' do
16 |     ID      'SessionID'
17 |     Date    'TimeStarted'
18 |     Date    'TimeEnded'
19 |   end) * 100_000
20 | 
21 |   (Entity 'PlayerState' do
22 |     ID      'StateID'
23 |     Float   'PosX'
24 |     Float   'PosY'
25 |     Float   'PosZ'
26 |     Date    'ClientTimestamp'
27 |     Date    'ServerTimestamp'
28 |   end) * 4_000_000
29 | 
30 |   (Entity 'Server' do
31 |     ID      'ServerID'
32 |     String  'ServerIP'
33 |     String  'ServerName'
34 |   end) * 5_000
35 | 
36 |   HasOne 'player',    'sessions',
37 |          {'Session' => 'Player'}
38 | 
39 |   HasOne 'server',    'sessions',
40 |          {'Session' => 'Server'}
41 | 
42 |   HasOne 'session',       'states',
43 |          {'PlayerState' => 'Session'}
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/nose/plans/limit.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Plans
 5 |     # Limit results from a previous lookup
 6 |     # This should only ever occur at the end of a plan
 7 |     class LimitPlanStep < PlanStep
 8 |       attr_reader :limit
 9 | 
10 |       def initialize(limit, state = nil)
11 |         super()
12 |         @limit = limit
13 | 
14 |         return if state.nil?
15 |         @state = state.dup
16 |         @state.cardinality = @limit
17 |       end
18 | 
19 |       # Two limit steps are equal if they have the same value for the limit
20 |       def ==(other)
21 |         other.instance_of?(self.class) && @limit == other.limit
22 |       end
23 |       alias eql? ==
24 | 
25 |       def hash
26 |         @limit
27 |       end
28 | 
29 |       # Check if we can apply a limit
30 |       # @return [LimitPlanStep]
31 |       def self.apply(_parent, state)
32 |         # TODO: Apply if have IDs of the last entity set
33 |         #       with no filter/sort needed
34 | 
35 |         return nil if state.query.limit.nil?
36 |         return nil unless state.answered? check_limit: false
37 | 
38 |         LimitPlanStep.new state.query.limit, state
39 |       end
40 |     end
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/templates/workload.erb:
--------------------------------------------------------------------------------
 1 | <% foreign_keys = [] %>
 2 | # rubocop:disable all
 3 | 
 4 | workload = NoSE::Workload.new do
 5 |   # Define entities along with the size and cardinality of their fields
 6 |   # as well as an estimated number of each entity
 7 |   <% workload.model.entities.each_value do |entity| %>
 8 | (Entity '<%= entity.name %>' do
 9 |   <% entity.fields.each_value do |field| %>
10 |   <%
11 |     if field.is_a? NoSE::Fields::ForeignKeyField
12 |       foreign_keys << field
13 |       next
14 |     end
15 |   %>
16 | <%= field.subtype_name name_case: :camel %> '<%= field.name %>',<%=
17 |   case [field.class]
18 |   when [NoSE::Fields::StringField]
19 |     "#{field.size}, "
20 |   else
21 |     ''
22 |   end
23 |   %> count: <%= field.cardinality %>
24 | 
25 |   <% end %>
26 | end) * <%= entity.count %>
27 | 
28 | 
29 |   <% end %>
30 | 
31 |   <% foreign_keys.each do |key| %>
32 |     ForeignKey '<%= key.name %>', '<%= key.parent.name %>', '<%= key.entity.name %>', count: <%= key.cardinality %>
33 | 
34 |   <% end %>
35 | 
36 |   # Define queries and their relative weights
37 |   <% workload.statement_weights.each do |statement, weight| %>
38 |   Q '<%= statement.query %>', <%= weight %>
39 | 
40 |   <% end %>
41 | end
42 | # rubocop:enable all
43 | 


--------------------------------------------------------------------------------
/spec/loader/csv_loader_spec.rb:
--------------------------------------------------------------------------------
 1 | require 'nose/loader/csv'
 2 | 
 3 | module NoSE
 4 |   module Loader
 5 |     describe CsvLoader do
 6 |       include_context 'entities'
 7 |       include FakeFS::SpecHelpers
 8 | 
 9 |       before(:each) do
10 |         FileUtils.mkdir_p '/tmp/csv'
11 | 
12 |         File.open '/tmp/csv/User.csv', 'w' do |file|
13 |           file.puts <<-EOF.gsub(/^ {10}/, '')
14 |             UserId,Username,City
15 |             1,Alice,Chicago
16 |           EOF
17 |         end
18 |       end
19 | 
20 |       it 'can load data into a backend' do
21 |         backend = instance_spy Backend::Backend
22 |         allow(backend).to receive(:by_id_graph).and_return(false)
23 | 
24 |         index = Index.new [user['City']], [user['UserId']],
25 |                           [user['Username']],
26 |                           QueryGraph::Graph.from_path([user.id_field])
27 |         loader = CsvLoader.new workload, backend
28 |         loader.load([index], directory: '/tmp/csv')
29 | 
30 |         expect(backend).to have_received(:index_insert_chunk).with(
31 |           index,
32 |           [{
33 |             'User_UserId' => '1',
34 |             'User_Username' => 'Alice',
35 |             'User_City' => 'Chicago'
36 |           }]
37 |         )
38 |       end
39 |     end
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/spec/schema_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   describe Schema do
 3 |     it 'can model a simple index' do
 4 |       schema = Schema.new do
 5 |         Model 'rubis'
 6 | 
 7 |         Index 'users_by_id' do
 8 |           Hash  users.id
 9 |           Extra users['*']
10 |           Path  users.id
11 |         end
12 |       end
13 | 
14 |       model = schema.model
15 |       users = model.entities['users']
16 | 
17 |       expect(schema.indexes.values).to match_array [
18 |         Index.new([users['id']], [], users.fields.values,
19 |                   QueryGraph::Graph.from_path([users['id']]))
20 |       ]
21 |     end
22 | 
23 |     it 'can model an index over multiple entities' do
24 |       schema = Schema.new do
25 |         Model 'rubis'
26 | 
27 |         Index 'user_region' do
28 |           Hash    users.id
29 |           Ordered regions.id
30 |           Extra   regions.name
31 |           Path    users.id, users.region
32 |         end
33 |       end
34 | 
35 |       model = schema.model
36 |       users = model.entities['users']
37 |       regions = model.entities['regions']
38 | 
39 |       expect(schema.indexes.values).to match_array [
40 |         Index.new([users['id']], [regions['id']], [regions['name']],
41 |                   QueryGraph::Graph.from_path([users['id'], users['region']]))
42 |       ]
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/nose/random/watts_strogatz.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Random
 5 |     # Generates a random graph using the Watts-Strogatz model
 6 |     class WattsStrogatzNetwork < Network
 7 |       def initialize(params = {})
 8 |         super params
 9 | 
10 |         @beta = params.fetch :beta, 0.5
11 |         @node_degree = params.fetch :node_degree, 2
12 |         @nodes = 0..(@nodes_nb - 1)
13 | 
14 |         @entities = @nodes.map do |node|
15 |           create_entity node
16 |         end
17 | 
18 |         build_initial_links
19 |         rewire_links
20 |         add_foreign_keys
21 |       end
22 | 
23 |       private
24 | 
25 |       # Set up the initial links between all nodes
26 |       # @return [void]
27 |       def build_initial_links
28 |         @nodes.each do |node|
29 |           (@node_degree / 2).times do |i|
30 |             add_link node, (node + i + 1) % @nodes_nb
31 |           end
32 |         end
33 |       end
34 | 
35 |       # Rewire all links between nodes
36 |       # @return [void]
37 |       def rewire_links
38 |         (@node_degree / 2).times do |i|
39 |           @nodes.each do |node|
40 |             next unless rand < @beta
41 | 
42 |             neighbour = (node + i + 1) % @nodes_nb
43 |             remove_link node, neighbour
44 |             add_link node, new_neighbour(node, neighbour)
45 |           end
46 |         end
47 |       end
48 |     end
49 |   end
50 | end
51 | 


--------------------------------------------------------------------------------
/spec/results_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Search
 3 |     describe Results do
 4 |       include_context 'entities'
 5 | 
 6 |       let(:problem) do
 7 |         OpenStruct.new(
 8 |           objective_type: Objective::COST,
 9 |           query_vars: []
10 |         )
11 |       end
12 | 
13 |       let(:results) do
14 |         r = Results.new problem
15 |         r.workload = workload
16 |         r.enumerated_indexes = []
17 |         r.indexes = []
18 |         r.plans = []
19 |         r.update_plans = []
20 |         r.indexes = []
21 |         r.total_cost = 0
22 |         r.total_size = 0
23 | 
24 |         r
25 |       end
26 | 
27 |       it 'can ensure only enumerated indexes are used' do
28 |         index = Index.new [tweet['TweetId']], [], [tweet['Body']],
29 |                           QueryGraph::Graph.from_path([tweet.id_field])
30 | 
31 |         results.indexes = [index]
32 |         expect { results.validate }.to \
33 |           raise_error InvalidResultsException
34 |       end
35 | 
36 |       it 'checks for the correct cost objective value' do
37 |         results.total_cost = 1
38 |         expect { results.validate }.to raise_error InvalidResultsException
39 |       end
40 | 
41 |       it 'checks for the correct size objective value' do
42 |         problem.objective_type = Objective::SPACE
43 |         results.total_size = 1
44 |         expect { results.validate }.to raise_error InvalidResultsException
45 |       end
46 |     end
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/lib/nose/debug.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | # rubocop:disable Lint/HandleExceptions
 3 | begin
 4 |   require 'binding_of_caller'
 5 |   require 'pry'
 6 | rescue LoadError
 7 |   # Ignore in case we are not in development mode
 8 | end
 9 | # rubocop:enable Lint/HandleExceptions
10 | 
11 | module NoSE
12 |   # Various helpful debugging snippets
13 |   module Debug
14 |     # Convenience method to break in IndexLookupStep when
15 |     # a particular set of indexes is reach when planning
16 |     # @return [void]
17 |     def self.break_on_indexes(*index_keys)
18 |       apply = binding.of_caller(1)
19 |       parent = apply.eval 'parent'
20 |       index = apply.eval 'index'
21 |       current_keys = parent.parent_steps.indexes.map(&:key) << index.key
22 | 
23 |       # rubocop:disable Lint/Debugger
24 |       binding.pry if current_keys == index_keys
25 |       # rubocop:enable Lint/Debugger
26 |     end
27 | 
28 |     # Export entities in a model as global
29 |     # variales for easier access when debugging
30 |     # @return [void]
31 |     def self.export_model(model)
32 |       model.entities.each do |name, entity|
33 |         # rubocop:disable Lint/Eval
34 |         eval("$#{name} = entity")
35 |         # rubocop:enable Lint/Eval
36 | 
37 |         entity.fields.merge(entity.foreign_keys).each do |field_name, field|
38 |           entity.define_singleton_method field_name.to_sym, -> { field }
39 |         end
40 |       end
41 | 
42 |       nil
43 |     end
44 |   end
45 | end
46 | 


--------------------------------------------------------------------------------
/lib/nose/random/barbasi_albert.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Random
 5 |     # Generates a random graph using the Barbasi-Albert model
 6 |     class BarbasiAlbertNetwork < Network
 7 |       def initialize(params = {})
 8 |         super params
 9 | 
10 |         # We assume for now that m0 = m = 2
11 | 
12 |         create_entities
13 |         add_foreign_keys
14 |       end
15 | 
16 |       private
17 | 
18 |       # Create all the entities in the graph and their connections
19 |       # @return [void]
20 |       def create_entities
21 |         # Add the initial one or two entities
22 |         @entities = [create_entity(0)]
23 |         return if @nodes_nb == 1
24 | 
25 |         @entities << create_entity(1)
26 |         add_link 0, 1
27 |         return if @nodes_nb == 2
28 | 
29 |         @entities << create_entity(2)
30 |         add_link 2, 0
31 |         add_link 2, 1
32 |         return if @nodes_nb == 3
33 | 
34 |         # Add and connect more entities as needed
35 |         3.upto(@nodes_nb - 1).each do |node|
36 |           @entities << create_entity(node)
37 |           pick = Pickup.new(0.upto(node - 1).to_a,
38 |                             key_func: ->(n) { n },
39 |                             weight_func: ->(n) { @neighbours[n].size },
40 |                             uniq: true)
41 |           pick.pick(2).each do |node2|
42 |             add_link node, node2
43 |           end
44 |         end
45 |       end
46 |     end
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/lib/nose/plans/sort.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Plans
 5 |     # A query plan step performing external sort
 6 |     class SortPlanStep < PlanStep
 7 |       attr_reader :sort_fields
 8 | 
 9 |       def initialize(sort_fields, state = nil)
10 |         super()
11 | 
12 |         @sort_fields = sort_fields
13 |         @state = state
14 |       end
15 | 
16 |       # :nocov:
17 |       def to_color
18 |         super + ' [' + @sort_fields.map(&:to_color).join(', ') + ']'
19 |       end
20 |       # :nocov:
21 | 
22 |       # Two sorting steps are equal if they sort on the same fields
23 |       def ==(other)
24 |         other.instance_of?(self.class) && @sort_fields == other.sort_fields
25 |       end
26 |       alias eql? ==
27 | 
28 |       def hash
29 |         @sort_fields.map(&:id).hash
30 |       end
31 | 
32 |       # Check if an external sort can used (if a sort is the last step)
33 |       # @return [SortPlanStep]
34 |       def self.apply(parent, state)
35 |         fetched_all_ids = state.fields.none? { |f| f.is_a? Fields::IDField }
36 |         resolved_predicates = state.eq.empty? && state.range.nil?
37 |         can_order = !(state.order_by.to_set & parent.fields).empty?
38 |         return nil unless fetched_all_ids && resolved_predicates && can_order
39 | 
40 |         new_state = state.dup
41 |         new_state.order_by = []
42 |         new_step = SortPlanStep.new(state.order_by, new_state)
43 |         new_step.state.freeze
44 | 
45 |         new_step
46 |       end
47 |     end
48 |   end
49 | end
50 | 


--------------------------------------------------------------------------------
/lib/nose/loader/random.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Loader
 5 |     # Load some random data (mostly useful for testing)
 6 |     class RandomLoader < LoaderBase
 7 |       def initialize(workload = nil, backend = nil)
 8 |         @logger = Logging.logger['nose::loader::randomloader']
 9 | 
10 |         @workload = workload
11 |         @backend = backend
12 |       end
13 | 
14 |       # Load a generated set of indexes with data from MySQL
15 |       # @return [void]
16 |       def load(indexes, config, show_progress = false, limit = nil,
17 |                skip_existing = true)
18 |         limit = 1 if limit.nil?
19 | 
20 |         indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
21 |         indexes.uniq.each do |index|
22 |           load_index index, config, show_progress, limit, skip_existing
23 |         end
24 |       end
25 | 
26 |       private
27 | 
28 |       # Load a single index into the backend
29 |       # @return [void]
30 |       def load_index(index, _config, show_progress, limit, skip_existing)
31 |         # Skip this index if it's not empty
32 |         if skip_existing && !@backend.index_empty?(index)
33 |           @logger.info "Skipping index #{index.inspect}" if show_progress
34 |           return
35 |         end
36 |         @logger.info index.inspect if show_progress
37 | 
38 |         chunk = Array.new(limit) do
39 |           Hash[index.all_fields.map do |field|
40 |             [field.id, field.random_value]
41 |           end]
42 |         end
43 | 
44 |         @backend.index_insert_chunk index, chunk
45 |       end
46 |     end
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/workloads/eac.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | NoSE::Workload.new do
 4 |   Model 'eac'
 5 | 
 6 |   # Server session exists
 7 |   Q 'SELECT Server.ServerID FROM Server WHERE ' \
 8 |     'Server.ServerID = ?', 3
 9 | 
10 |   # Get sessions by GUID
11 |   Q 'SELECT Session.SessionID FROM ' \
12 |     'Session.player WHERE player.PlayerID = ?', 3
13 | 
14 |   # Get player session
15 |   Q 'SELECT states.PosX, states.PosY, states.PosZ, ' \
16 |     'states.ServerTimestamp FROM ' \
17 |     'Server.sessions.states WHERE Server.ServerID = ? AND ' \
18 |     'sessions.player.PlayerID = ? ORDER BY states.ServerTimestamp', 6
19 | 
20 |   # Get new data
21 |   Q 'SELECT states.PosX, states.PosY, states.PosZ, ' \
22 |     'states.ServerTimestamp, sessions.player.PlayerID FROM ' \
23 |     'Server.sessions.states WHERE sessions.player.IsAdmin = 0 AND ' \
24 |     'Server.ServerID = ? AND states.ServerTimestamp > ? AND ' \
25 |     'states.ServerTimestamp <= ? ORDER BY states.ServerTimestamp', 6
26 | 
27 |   # Get server information
28 |   Q 'SELECT Server.ServerName, Server.ServerIP FROM ' \
29 |     'Server WHERE Server.ServerID = ?', 2
30 | 
31 |   # Add new player
32 |   Q 'INSERT INTO Player SET PlayerID=?, PlayerName=?, PlayerFlags=?, ' \
33 |     'IsAdmin=?', 4
34 | 
35 |   # Record new state
36 |   Q 'INSERT INTO PlayerState SET StateID=?, PosX=?, PosY=?, PosZ=?, ' \
37 |     'ClientTimestamp=?, ServerTimestamp=? AND CONNECT TO session(?)', 71
38 | 
39 |   Q 'INSERT INTO Session SET SessionID=?, TimeStarted=?, TimeEnded=? ' \
40 |     'AND CONNECT TO server(?), player(?)', 4
41 | 
42 |   Q 'INSERT INTO Server SET ServerID=?, ServerIP=?, ' \
43 |     'ServerName=?', 1
44 | end
45 | 


--------------------------------------------------------------------------------
/lib/nose/plans/update.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   module Plans
 5 |     # A superclass for steps which modify indexes
 6 |     class UpdatePlanStep < PlanStep
 7 |       attr_reader :index
 8 |       attr_accessor :state
 9 | 
10 |       def initialize(index, type, state = nil)
11 |         super()
12 |         @index = index
13 |         @type = type
14 | 
15 |         return if state.nil?
16 |         @state = state.dup
17 |         @state.freeze
18 |       end
19 | 
20 |       # :nocov:
21 |       def to_color
22 |         "#{super} #{@index.to_color} * #{@state.cardinality}"
23 |       end
24 |       # :nocov:
25 | 
26 |       # Two insert steps are equal if they use the same index
27 |       def ==(other)
28 |         other.instance_of?(self.class) && @index == other.index && \
29 |           @type == other.instance_variable_get(:@type)
30 |       end
31 |       alias eql? ==
32 | 
33 |       def hash
34 |         [@index, @type].hash
35 |       end
36 |     end
37 | 
38 |     # A step which inserts data into a given index
39 |     class InsertPlanStep < UpdatePlanStep
40 |       attr_reader :fields
41 | 
42 |       def initialize(index, state = nil, fields = Set.new)
43 |         super index, :insert, state
44 |         @fields = if fields.empty?
45 |                     index.all_fields
46 |                   else
47 |                     fields.to_set & index.all_fields
48 |                   end
49 |         @fields += index.hash_fields + index.order_fields.to_set
50 |       end
51 |     end
52 | 
53 |     # A step which deletes data into a given index
54 |     class DeletePlanStep < UpdatePlanStep
55 |       def initialize(index, state = nil)
56 |         super index, :delete, state
57 |       end
58 |     end
59 |   end
60 | end
61 | 


--------------------------------------------------------------------------------
/spec/serialize_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Serialize
 3 |     describe EntityFieldRepresenter do
 4 |       include_context 'entities'
 5 | 
 6 |       it 'serializes a field and its properties' do
 7 |         field = EntityFieldRepresenter.represent(user['Username']).to_hash
 8 |         expect(field).to eq(
 9 |           'name' => 'Username',
10 |           'size' => 10,
11 |           'cardinality' => 10,
12 |           'type' => 'string'
13 |         )
14 |       end
15 |     end
16 | 
17 |     describe FieldRepresenter do
18 |       include_context 'entities'
19 | 
20 |       it 'serializes a field to a simple hash' do
21 |         field = FieldRepresenter.represent(user['Username']).to_hash
22 |         expect(field).to eq('name' => 'Username', 'parent' => 'User')
23 |       end
24 |     end
25 | 
26 |     describe IndexRepresenter do
27 |       include_context 'entities'
28 | 
29 |       it 'serializes an index to a key' do
30 |         index = Index.new [user['Username']], [user['UserId']], [],
31 |                           QueryGraph::Graph.from_path([user.id_field]),
32 |                           saved_key: 'IndexKey'
33 |         hash = IndexRepresenter.represent(index).to_hash
34 |         expect(hash).to eq('key' => 'IndexKey')
35 |       end
36 |     end
37 | 
38 |     describe EntityRepresenter do
39 |       it 'serializes an empty entity' do
40 |         entity = Entity.new('Foo') * 10
41 |         hash = EntityRepresenter.represent(entity).to_hash
42 |         expect(hash).to eq('name' => 'Foo', 'count' => 10, 'fields' => [])
43 |       end
44 |     end
45 | 
46 |     describe StatementRepresenter do
47 |       include_context 'entities'
48 | 
49 |       it 'serializes queries to a string' do
50 |         expect(StatementRepresenter.represent(query).to_hash).to eq(query.text)
51 |       end
52 |     end
53 |   end
54 | end
55 | 


--------------------------------------------------------------------------------
/spec/random_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Random
 3 |     shared_examples 'a network' do
 4 |       it 'has a default of 10 entities' do
 5 |         expect(network.entities).to have(10).items
 6 |       end
 7 | 
 8 |       it 'generates entities with IDs' do
 9 |         expect(network.entities).to all(satisfy do |entity|
10 |           entity.fields.values.any? { |field| field.is_a? Fields::IDField }
11 |         end)
12 |       end
13 | 
14 |       it 'does not generate disconnected entities' do
15 |         expect(network.entities).to all(satisfy do |entity|
16 |           connected = !entity.foreign_keys.empty?
17 |           connected ||= network.entities.any? do |other|
18 |             other.foreign_keys.each_value.map(&:entity).include? entity
19 |           end
20 | 
21 |           connected
22 |         end)
23 |       end
24 |     end
25 | 
26 |     describe BarbasiAlbertNetwork do
27 |       let(:network) { BarbasiAlbertNetwork.new }
28 |       it_behaves_like 'a network'
29 |     end
30 | 
31 |     describe WattsStrogatzNetwork do
32 |       let(:network) { BarbasiAlbertNetwork.new }
33 |       it_behaves_like 'a network'
34 |     end
35 | 
36 |     describe StatementGenerator do
37 |       include_context 'entities'
38 | 
39 |       subject(:sgen) { StatementGenerator.new(workload.model) }
40 | 
41 |       before(:each) { ::Random.srand 0 }
42 | 
43 |       it 'generates valid insertions' do
44 |         expect(sgen.random_insert).to be_a Insert
45 |       end
46 | 
47 |       it 'generates valid updates' do
48 |         expect(sgen.random_update).to be_a Update
49 |       end
50 | 
51 |       it 'generates valid deletions' do
52 |         expect(sgen.random_delete).to be_a Delete
53 |       end
54 | 
55 |       it 'generates valid queries' do
56 |         expect(sgen.random_query).to be_a Query
57 |       end
58 | 
59 |       it 'generates random graphs' do
60 |         expect(sgen.random_graph(4)).to be_a QueryGraph::Graph
61 |       end
62 |     end
63 |   end
64 | end
65 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | name: CI
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - '*'
 7 |     tags:
 8 |       - 'v*'
 9 |   pull_request:
10 |     branches:
11 |       - main
12 | jobs:
13 |   test:
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       matrix:
17 |         ruby-version: ['3.1.4', '3.2.3', '3.3.0']
18 |     services:
19 |       mysql:
20 |         image: mysql:5.6
21 |         env:
22 |           MYSQL_ALLOW_EMPTY_PASSWORD: 1
23 |           MYSQL_DATABASE: nose
24 |         ports:
25 |           - 3306:3306
26 |         options: >-
27 |           --health-cmd="mysqladmin ping"
28 |           --health-interval=10s
29 |           --health-timeout=5s
30 |           --health-retries=5
31 |       mongo:
32 |         image: mongo:5
33 |         ports:
34 |           - 27017:27017
35 |         options: >-
36 |           --health-cmd=mongo
37 |           --health-interval=10s
38 |           --health-timeout=5s
39 |           --health-retries=5
40 |       cassandra:
41 |         image: cassandra:3
42 |         ports:
43 |           - 9042:9042
44 |         options: >-
45 |           --health-cmd="cqlsh --debug"
46 |           --health-start-period=30s
47 |           --health-interval=10s
48 |           --health-timeout=5s
49 |           --health-retries=5
50 |     steps:
51 |     - name: Install required packages
52 |       run: sudo apt update && sudo apt-get install coinor-cbc coinor-libcbc-dev
53 |     - uses: actions/checkout@v4
54 |     - uses: ruby/setup-ruby@v1
55 |       with:
56 |         ruby-version: ${{ matrix.ruby-version }}
57 |         bundler-cache: true
58 |     - name: Populate MySQL
59 |       run: mysql -h 127.0.0.1 -uroot -Dnose < spec/support/data/mysql.sql
60 |     - name: Test
61 |       run: |
62 |         sed -i '/^--tag/d' .rspec
63 |         bundle exec rspec
64 |     - name: Upload coverage to Codecov
65 |       uses: codecov/codecov-action@v4
66 |       with:
67 |         fail_ci_if_error: true
68 |         token: ${{ secrets.CODECOV_TOKEN }}
69 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # This file was generated by the `rspec --init` command. Conventionally, all
 2 | # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
 3 | # Require this file using `require "spec_helper"` to ensure that it is only
 4 | # loaded once.
 5 | #
 6 | # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
 7 | 
 8 | require 'simplecov'
 9 | SimpleCov.start
10 | 
11 | require 'simplecov-cobertura'
12 | SimpleCov.formatter = SimpleCov::Formatter::CoberturaFormatter
13 | 
14 | require 'rspec/collection_matchers'
15 | require 'fakefs/safe'
16 | require 'fakefs/spec_helpers'
17 | 
18 | Dir['./spec/support/**/*.rb'].sort.each { |f| require f }
19 | 
20 | RSpec.configure do |config|
21 |   config.expect_with :rspec do |expectations|
22 |     expectations.syntax = :expect
23 |     expectations.include_chain_clauses_in_custom_matcher_descriptions = true
24 |   end
25 | 
26 |   # rspec-mocks config goes here. You can use an alternate test double
27 |   # library (such as bogus or mocha) by changing the `mock_with` option here.
28 |   config.mock_with :rspec do |mocks|
29 |     # Prevents you from mocking or stubbing a method that does not exist on
30 |     # a real object. This is generally recommended, and will default to
31 |     # `true` in RSpec 4.
32 |     mocks.verify_partial_doubles = true
33 |   end
34 | 
35 |   config.run_all_when_everything_filtered = true
36 | 
37 |   # Run specs in random order to surface order dependencies. If you find an
38 |   # order dependency and want to debug it, you can fix the order by providing
39 |   # the seed, which is printed after each run.
40 |   #     --seed 1234
41 |   config.order = 'random'
42 | 
43 |   # Seed global randomization in this process using the `--seed` CLI option.
44 |   # Setting this allows you to use `--seed` to deterministically reproduce
45 |   # test failures related to randomization by passing the same `--seed` value
46 |   # as the one that triggered the failure.
47 |   Kernel.srand config.seed
48 | end
49 | 
50 | require 'nose'
51 | 
52 | # Disable parallelism for tests
53 | Parallel.instance_variable_set(:@processor_count, 0)
54 | 


--------------------------------------------------------------------------------
/plans/ebay.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | NoSE::Plans::ExecutionPlans.new do
 4 |   Schema 'ebay'
 5 | 
 6 |   Group 'GetUser' do
 7 |     Plan 'GetUser' do
 8 |       Select users['*']
 9 |       Param users.UserID, :==
10 |       Lookup 'users_by_id', [users.UserID, :==]
11 |     end
12 |   end
13 | 
14 |   Group 'GetItem' do
15 |     Plan 'GetItem' do
16 |       Select items['*']
17 |       Param items.ItemID, :==
18 |       Lookup 'items_by_id', [items.ItemID, :==]
19 |     end
20 |   end
21 | 
22 |   Group 'GetUserLikes' do
23 |     Plan 'GetUserLikes' do
24 |       Select items['*']
25 |       Param users.UserID, :==
26 |       Lookup 'likes_by_user', [users.UserID, :==]
27 |       Lookup 'items_by_id', [items.ItemID, :==]
28 |     end
29 |   end
30 | 
31 |   Group 'GetItemLikes' do
32 |     Plan 'GetItemLikes' do
33 |       Select users['*']
34 |       Param items.ItemID, :==
35 |       Lookup 'likes_by_item', [items.ItemID, :==]
36 |       Lookup 'users_by_id', [users.UserID, :==]
37 |     end
38 |   end
39 | 
40 |   Group 'AddLike' do
41 |     Plan 'AddItemLike' do
42 |       Param items.ItemID, :==
43 |       Param likes.LikeID, :==
44 |       Param likes.LikedAt, :==
45 |       Param users.UserID, :==
46 |       Insert 'likes_by_item'
47 |     end
48 | 
49 |     Plan 'AddUserLike' do
50 |       Param users.UserID, :==
51 |       Param likes.LikeID, :==
52 |       Param likes.LikedAt, :==
53 |       Param items.ItemID, :==
54 |       Insert 'likes_by_user'
55 |     end
56 |   end
57 | 
58 |   Group 'AddUser' do
59 |     Plan 'AddUser' do
60 |       Param users.UserID, :==
61 |       Param users.Name, :==
62 |       Param users.Email, :==
63 |       Insert 'users_by_id'
64 |     end
65 |   end
66 | 
67 |   Group 'AddItem' do
68 |     Plan 'AddItem' do
69 |       Param items.ItemID, :==
70 |       Param items.Title, :==
71 |       Param items.Desc, :==
72 |       Insert 'items_by_id'
73 |     end
74 |   end
75 | 
76 |   Group 'UpdateItemTitle' do
77 |     Plan 'UpdateItemTitle' do
78 |       Param items.ItemID, :==
79 |       Param items.Title, :==
80 |       Insert 'items_by_id', items.ItemID, items.Title
81 |     end
82 |   end
83 | end
84 | 


--------------------------------------------------------------------------------
/lib/nose/cost.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   # Cost models for steps of backend statement excution
 5 |   module Cost
 6 |     # Cost model for a backend database
 7 |     class Cost
 8 |       include Listing
 9 |       include Supertype
10 | 
11 |       def initialize(**options)
12 |         @options = options
13 |       end
14 | 
15 |       # The cost of filtering intermediate results
16 |       # @return [Integer]
17 |       def filter_cost(_step)
18 |         # Assume this has no cost and the cost is captured in the fact that we
19 |         # have to retrieve more data earlier. All this does is skip records.
20 |         0
21 |       end
22 | 
23 |       # The cost of limiting a result set
24 |       # @return [Integer]
25 |       def limit_cost(_step)
26 |         # This is basically free since we just discard data
27 |         0
28 |       end
29 | 
30 |       # The cost of sorting a set of results
31 |       # @return [Integer]
32 |       def sort_cost(_step)
33 |         # TODO: Find some estimate of sort cost
34 |         #       This could be partially captured by the fact that sort + limit
35 |         #       effectively removes the limit
36 |         1
37 |       end
38 | 
39 |       # The cost of performing a lookup via an index
40 |       # @return [Integer]
41 |       def index_lookup_cost(_step)
42 |         fail NotImplementedError, 'Must be implemented in a subclass'
43 |       end
44 | 
45 |       # The cost of performing a deletion from an index
46 |       # @return [Integer]
47 |       def delete_cost(_step)
48 |         fail NotImplementedError, 'Must be implemented in a subclass'
49 |       end
50 | 
51 |       # The cost of performing an insert into an index
52 |       # @return [Integer]
53 |       def insert_cost(_step)
54 |         fail NotImplementedError, 'Must be implemented in a subclass'
55 |       end
56 | 
57 |       # This is here for debugging purposes because we need a cost
58 |       # @return [Integer]
59 |       def pruned_cost(_step)
60 |         0
61 |       end
62 |     end
63 |   end
64 | end
65 | 
66 | require_relative 'cost/cassandra'
67 | require_relative 'cost/entity_count'
68 | require_relative 'cost/field_size'
69 | require_relative 'cost/request_count'
70 | 


--------------------------------------------------------------------------------
/spec/support/entities.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   RSpec.shared_examples 'entities' do
 3 |     let(:workload) do
 4 |       # rubocop:disable Lint/Void
 5 |       Workload.new do
 6 |         (Entity 'User' do
 7 |           ID     'UserId'
 8 |           String 'Username', 10
 9 |           String 'City', count: 5
10 |           String 'Country'
11 | 
12 |           etc
13 |         end) * 10
14 | 
15 |         (Entity 'Link' do
16 |           ID     'LinkId'
17 |           String 'URL'
18 |         end) * 100
19 | 
20 |         (Entity 'Tweet' do
21 |           ID         'TweetId'
22 |           String     'Body', 140, count: 5
23 |           Date       'Timestamp'
24 |           Integer    'Retweets'
25 |         end) * 1000
26 | 
27 |         HasOne 'User',    'Tweets',
28 |                {'Tweet' => 'User'}
29 | 
30 |         HasOne 'Favourite',    'Favourited',
31 |                {'User' =>       'Tweet'}
32 | 
33 |         HasOne 'Link',    'Tweets',
34 |                {'Tweet' => 'Link'}
35 |       end
36 |     end
37 |     # rubocop:enable Lint/Void
38 | 
39 |     let(:tweet) { workload.model['Tweet'] }
40 |     let(:user) { workload.model['User'] }
41 |     let(:link) { workload.model['Link'] }
42 |     let(:query) do
43 |       Statement.parse 'SELECT Link.URL FROM Link.Tweets.User ' \
44 |                       'WHERE User.Username = ? LIMIT 5', workload.model
45 |     end
46 | 
47 |     let(:index) do
48 |       Index.new [user['Username']],
49 |                 [tweet['Timestamp'], user['UserId'], tweet['TweetId']],
50 |                 [tweet['Body']],
51 |                 QueryGraph::Graph.from_path([user.id_field, user['Tweets']]),
52 |                 saved_key: 'TweetIndex'
53 |     end
54 | 
55 |     let(:users) do
56 |       [{
57 |         'User_UserId'   => '18a9a155-c9c7-43b5-9ab0-5967c49f56e9',
58 |         'User_Username' => 'Bob'
59 |       }]
60 |     end
61 | 
62 |     let(:tweets) do
63 |       [{
64 |         'Tweet_Timestamp' => Time.now,
65 |         'Tweet_TweetId'   => 'e2dee9ee-5297-4f91-a3f7-9dd169008407',
66 |         'Tweet_Body'      => 'This is a test'
67 |       }]
68 |     end
69 | 
70 |     let(:links) do
71 |       [{
72 |         'Link_LinkId' => '4a5339d8-e619-4ad5-a1be-c0bbceb1cdab',
73 |         'Link_URL' => 'http://www.example.com/'
74 |       }]
75 |     end
76 |   end
77 | end
78 | 


--------------------------------------------------------------------------------
/schemas/rubis_baseline.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../lib/nose.rb'
 4 | 
 5 | NoSE::Schema.new do
 6 |   Model 'rubis'
 7 | 
 8 |   SimpleIndex 'categories'
 9 |   SimpleIndex 'regions'
10 |   SimpleIndex 'items'
11 |   SimpleIndex 'comments'
12 | 
13 |   Index 'users_by_region' do
14 |     Hash    regions.id
15 |     Ordered users.id
16 |     Extra   users.nickname
17 |     Path    regions.id, regions.users
18 |   end
19 | 
20 |   Index 'users' do
21 |     Hash    users.id
22 |     Ordered regions.id
23 |     Extra   users['*']
24 |     Path    users.id, users.region
25 |   end
26 | 
27 |   Index 'bids' do
28 |     Hash    bids.id
29 |     Ordered users.id, items.id
30 |     Extra   bids['*']
31 |     Path    users.id, users.bids, bids.item
32 |   end
33 | 
34 |   Index 'buynow' do
35 |     Hash    buynow.id
36 |     Ordered items.id
37 |     Extra   buynow['*']
38 |     Path    buynow.id, buynow.item
39 |   end
40 | 
41 |   Index 'all_categories' do
42 |     Hash    categories.dummy
43 |     Ordered categories.id
44 |     Path    categories.id
45 |   end
46 | 
47 |   Index 'all_regions' do
48 |     Hash    regions.dummy
49 |     Ordered regions.id
50 |     Path    regions.id
51 |   end
52 | 
53 |   Index 'bids_by_item' do
54 |     Hash    items.id
55 |     Ordered bids.id
56 |     Path    items.id, items.bids
57 |   end
58 | 
59 |   Index 'items_by_category' do
60 |     Hash    categories.id
61 |     Ordered items.end_date, items.id
62 |     Path    categories.id, categories.items
63 |   end
64 | 
65 |   Index 'items_by_region' do
66 |     Hash    regions.id
67 |     Ordered categories.id, items.end_date, items.id, users.id
68 |     Path    regions.id, regions.users, users.items_sold, items.category
69 |   end
70 | 
71 |   Index 'comments_by_user' do
72 |     Hash    users.id
73 |     Ordered comments.id
74 |     Path    users.id, users.comments_received
75 |   end
76 | 
77 |   Index 'user_items_sold' do
78 |     Hash    users.id
79 |     Ordered items.end_date, items.id
80 |     Path    users.id, users.items_sold
81 |   end
82 | 
83 |   Index 'buynow_by_user' do
84 |     Hash    users.id
85 |     Ordered buynow.date, buynow.id
86 |     Path    users.id, users.bought_now
87 |   end
88 | 
89 |   Index 'bids_by_user' do
90 |     Hash    users.id
91 |     Ordered bids.date, bids.id
92 |     Path    users.id, users.bids
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/spec/cost_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Cost
 3 |     describe Cost do
 4 |       it 'should register all subclasses' do
 5 |         expect(Cost.subclasses).to have_key 'NoSE::Cost::RequestCountCost'
 6 |         expect(Cost.subclasses).to have_key 'NoSE::Cost::EntityCountCost'
 7 |         expect(Cost.subclasses).to have_key 'NoSE::Cost::FieldSizeCost'
 8 |       end
 9 |     end
10 | 
11 |     describe RequestCountCost do
12 |       include_context 'entities'
13 | 
14 |       it 'is a type of cost' do
15 |         expect(RequestCountCost.subtype_name).to eq 'request_count'
16 |       end
17 | 
18 |       it 'counts a single request for a single step plan' do
19 |         planner = Plans::QueryPlanner.new workload.model,
20 |                                           [tweet.simple_index], subject
21 |         plan = planner.min_plan \
22 |           Statement.parse 'SELECT Tweet.* FROM Tweet ' \
23 |                           'WHERE Tweet.TweetId = ?', workload.model
24 |         expect(plan.cost).to eq 1
25 |       end
26 |     end
27 | 
28 |     describe EntityCountCost do
29 |       include_context 'entities'
30 | 
31 |       it 'is a type of cost' do
32 |         expect(EntityCountCost.subtype_name).to eq 'entity_count'
33 |       end
34 | 
35 |       it 'counts multiple requests when multiple entities are selected' do
36 |         query = Statement.parse 'SELECT Tweet.* FROM Tweet.User ' \
37 |                                 'WHERE User.UserId = ?', workload.model
38 |         planner = Plans::QueryPlanner.new workload.model,
39 |                                           [query.materialize_view], subject
40 |         plan = planner.min_plan query
41 |         expect(plan.cost).to eq 100
42 |       end
43 |     end
44 | 
45 |     describe FieldSizeCost do
46 |       include_context 'entities'
47 | 
48 |       it 'is a type of cost' do
49 |         expect(FieldSizeCost.subtype_name).to eq 'field_size'
50 |       end
51 | 
52 |       it 'measures the size of the selected data' do
53 |         index = tweet.simple_index
54 |         planner = Plans::QueryPlanner.new workload.model, [index], subject
55 |         plan = planner.min_plan \
56 |           Statement.parse 'SELECT Tweet.* FROM Tweet ' \
57 |                           'WHERE Tweet.TweetId = ?', workload.model
58 |         expect(plan.cost).to eq index.all_fields.sum_by(&:size)
59 |       end
60 |     end
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/nose/timing.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   # Tracks the runtime of various functions and outputs a measurement
 5 |   class Timer
 6 |     # Start tracking function runtime
 7 |     # @return [void]
 8 |     def self.enable
 9 |       traced = {
10 |         IndexEnumerator => [
11 |           :indexes_for_workload,
12 |           :support_indexes,
13 |           :combine_indexes
14 |         ],
15 |         Search::Search => [
16 |           :query_costs,
17 |           :update_costs,
18 |           :search_overlap,
19 |           :solve_mipper
20 |         ],
21 |         Search::Problem => [
22 |           :setup_model,
23 |           :add_variables,
24 |           :add_constraints,
25 |           :define_objective,
26 |           :total_cost,
27 |           :add_update_costs,
28 |           :total_size,
29 |           :total_indexes,
30 |           :solve
31 |         ],
32 |         MIPPeR::CbcModel => [
33 |           :add_constraints,
34 |           :add_variables,
35 |           :update,
36 |           :optimize
37 |         ]
38 |       }
39 |       @old_methods = Hash.new { |h, k| h[k] = {} }
40 | 
41 |       # Redefine each method to capture timing information on each call
42 |       traced.each do |cls, methods|
43 |         methods.each do |method|
44 |           old_method = cls.instance_method(method)
45 |           cls.send(:define_method, method) do |*args|
46 |             $stderr.puts "#{cls}##{method}\tSTART"
47 | 
48 |             start = Time.now.utc
49 |             result = old_method.bind(self).call(*args)
50 |             elapsed = Time.now.utc - start
51 | 
52 |             # Allow a block to be called with the timing results
53 |             yield cls, method, elapsed if block_given?
54 | 
55 |             $stderr.puts "#{cls}##{method}\tEND\t#{elapsed}"
56 | 
57 |             result
58 |           end
59 | 
60 |           # Save a copy of the old method for later
61 |           @old_methods[cls][method] = old_method
62 |         end
63 |       end
64 |     end
65 | 
66 |     # Stop tracking function runtime
67 |     # @return [void]
68 |     def self.disable
69 |       @old_methods.each do |cls, methods|
70 |         methods.each do |method, old_method|
71 |           cls.send(:define_method, method, old_method)
72 |         end
73 |       end
74 | 
75 |       # Remove the saved method definitions
76 |       @old_methods.clear
77 |     end
78 |   end
79 | end
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # NoSQL Schema Evaluator (NoSE)
 2 | 
 3 | [![CI](https://github.com/michaelmior/NoSE/actions/workflows/ci.yml/badge.svg)](https://github.com/michaelmior/NoSE/actions/workflows/ci.yml)
 4 | [![Depfu](https://badges.depfu.com/badges/69de42ee3415b077a040beadc8941f1e/overview.svg)](https://depfu.com/github/michaelmior/NoSE?project_id=6964)
 5 | [![codecov](https://codecov.io/gh/michaelmior/NoSE/branch/main/graph/badge.svg?token=knALGf3kHn)](https://codecov.io/gh/michaelmior/NoSE)
 6 | [![Docker Build Status](https://img.shields.io/docker/build/michaelmior/nose.svg)]()
 7 | 
 8 | This is a work in progress tool to provide automated physical schema design for NoSQL data stores.
 9 | NoSE is licensed under the [GPLv3 license](LICENSE.md).
10 | 
11 | ## Getting Started
12 | 
13 | If you want to quickly try NoSE, you can get a shell with all necessary dependencies using [Docker](https://www.docker.com/) as follows
14 | 
15 |     docker run --interactive --tty --rm michaelmior/nose /bin/bash
16 | 
17 | For continued use, installing a development version of the NoSE CLI is more flexible.
18 | Instructions can be found in the [nose-cli](https://github.com/michaelmior/nose-cli) repository.
19 | 
20 | ## Publications
21 | 
22 | Mior, M.J.; Kenneth Salem; Ashraf Aboulnaga; Rui Liu, [NoSE: Schema Design for NoSQL Applications](https://www.researchgate.net/publication/296485511_NoSE_Schema_Design_for_NoSQL_Applications), in Data Engineering (ICDE), July 2017.
23 | 
24 | Mior, M.J.; Kenneth Salem; Ashraf Aboulnaga; Rui Liu, [NoSE: Schema Design for NoSQL Applications](https://www.researchgate.net/publication/318126769_NoSE_Schema_Design_for_NoSQL_Applications), Transactions on Knowledge and Data Engineering, 16-20 May 2016.
25 | 
26 | ![ACM DL Author-ize service](http://dl.acm.org/images/oa.gif) Michael J. Mior. 2014. [Automated schema design for NoSQL databases](http://dl.acm.org/authorize?N71145). In Proceedings of the 2014 SIGMOD PhD symposium (SIGMOD'14 PhD Symposium). ACM, New York, NY, USA, 41-45.
27 | 
28 | ## Acknowledgements
29 | 
30 | This work was supported by the Natural Sciences and Engineering Research Council of Canada ([NSERC](http://nserc.gc.ca)).
31 | 
32 | [![NSERC](assets/NSERC-logo.png)](http://nserc.gc.ca)
33 | 
34 | Hosting of [Coin-OR packages](https://packagecloud.io/michaelmior/coinor/) is generously provided by packagecloud.
35 | 
36 | [![packagecloud](assets/packagecloud-logo.png)](https://packagecloud.io)
37 | 


--------------------------------------------------------------------------------
/schemas/rubis_expert.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require_relative '../lib/nose.rb'
 4 | 
 5 | NoSE::Schema.new do
 6 |   Model 'rubis'
 7 | 
 8 |   Index 'users_by_region' do
 9 |     Hash    regions.id
10 |     Ordered users.id
11 |     Extra   users.nickname
12 |     Path    regions.id, regions.users
13 |   end
14 | 
15 |   Index 'user_data' do
16 |     Hash    users.id
17 |     Ordered regions.id
18 |     Extra   users['*'], regions.name
19 |     Path    users.id, users.region
20 |   end
21 | 
22 |   Index 'user_buynow' do
23 |     Hash    users.id
24 |     Ordered buynow.date, buynow.id, items.id
25 |     Extra   buynow.qty
26 |     Path    users.id, users.bought_now, buynow.item
27 |   end
28 | 
29 |   Index 'user_items_bid_on' do
30 |     Hash    users.id
31 |     Ordered items.end_date, bids.id, items.id
32 |     Extra   bids.qty
33 |     Path    users.id, users.bids, bids.item
34 |   end
35 | 
36 |   Index 'user_items_sold' do
37 |     Hash    users.id
38 |     Ordered items.end_date, items.id
39 |     Path    users.id, users.items_sold
40 |   end
41 | 
42 |   Index 'user_comments_received' do
43 |     Hash    users.id
44 |     Ordered comments.id, items.id
45 |     Extra   comments['*']
46 |     Path    users.id, users.comments_received, comments.item
47 |   end
48 | 
49 |   Index 'commenter' do
50 |     Hash    comments.id
51 |     Ordered users.id
52 |     Extra   users.nickname
53 |     Path    comments.id, comments.from_user
54 |   end
55 | 
56 |   Index 'items_with_category' do
57 |     Hash    items.id
58 |     Ordered categories.id
59 |     Extra   items['*']
60 |     Path    items.id, items.category
61 |   end
62 | 
63 |   Index 'item_bids' do
64 |     Hash    items.id
65 |     Ordered bids.id, users.id
66 |     Extra   items.max_bid, users.nickname, bids.qty, bids.bid, bids.date
67 |     Path    items.id, items.bids, bids.user
68 |   end
69 | 
70 |   Index 'items_by_category' do
71 |     Hash    categories.id
72 |     Ordered items.end_date, items.id
73 |     Path    categories.id, categories.items
74 |   end
75 | 
76 |   Index 'category_list' do
77 |     Hash    categories.dummy
78 |     Ordered categories.id
79 |     Extra   categories.name
80 |     Path    categories.id
81 |   end
82 | 
83 |   Index 'region_list' do
84 |     Hash    regions.dummy
85 |     Ordered regions.id
86 |     Extra   regions.name
87 |     Path    regions.id
88 |   end
89 | 
90 |   Index 'regions' do
91 |     Hash    regions.id
92 |     Extra   regions.name
93 |     Path    regions.id
94 |   end
95 | end
96 | 


--------------------------------------------------------------------------------
/experiments/rubis/fake.js:
--------------------------------------------------------------------------------
 1 | var Table = require('mysql-faker').Table,
 2 |     insert = require('mysql-faker').insert;
 3 | 
 4 | var categories = (new Table('categories', 500));
 5 | categories.lorem_words('name', 2);
 6 | 
 7 | var regions = (new Table('regions', 50));
 8 | regions.lorem_words('name', 2);
 9 | 
10 | var users = (new Table('users', 200000));
11 | users.name_firstName('firstname')
12 |      .name_lastName('lastname')
13 |      .random_uuid('nickname')
14 |      .internet_password('password')
15 |      .internet_email('email')
16 |      .random_number('rating', {min: -50, max: 200})
17 |      .finance_amount('balance')
18 |      .date_past('creation_date')
19 |      .random_number('region', {min: 1, max: regions.count});
20 | 
21 | var items = (new Table('items', 2000000));
22 | items.lorem_words('name')
23 |      .lorem_paragraph('description')
24 |      .finance_amount('initial_price')
25 |      .random_number('quantity', {min: 0, max: 10})
26 |      .finance_amount('reserve_price')
27 |      .finance_amount('buy_now')
28 |      .random_number('nb_of_bids', {min: 0, max: 100})
29 |      .finance_amount('max_bid')
30 |      .date_past('start_date')
31 |      .date_past('end_date')
32 |      .random_number('seller', {min: 1, max: users.count})
33 |      .random_number('category', {min: 1, max: categories.count});
34 | 
35 | var bids = (new Table('bids', 20000000));
36 | bids.random_number('qty', {min: 1, max: 5})
37 |     .finance_amount('bid')
38 |     .finance_amount('max_bid')
39 |     .date_past('date')
40 |     .random_number('user', {min: 1, max: users.count})
41 |     .random_number('item', {min: 1, max: items.count});
42 | 
43 | var comments = (new Table('comments', 10000000));
44 | comments.random_number('rating', {min: -5, max: 5})
45 |         .date_past('date')
46 |         .lorem_sentences('comment')
47 |         .random_number('from_user', {min: 1, max: users.count})
48 |         .random_number('to_user', {min: 1, max: users.count})
49 |         .random_number('item', {min: 1, max: items.count});
50 | 
51 | var buy_now = (new Table('buynow', 2000000));
52 | buy_now.random_number('qty', {min: 1, max: 3})
53 |        .date_past('date')
54 |        .random_number('buyer', {min: 1, max: users.count})
55 |        .random_number('item', {min: 1, max: users.count});
56 | 
57 | insert([
58 |   categories,
59 |   regions,
60 |   users,
61 |   items,
62 |   bids,
63 |   comments,
64 |   buy_now
65 | ], {
66 |   host: 'localhost',
67 |   user: 'root',
68 |   password: 'root',
69 |   database: 'rubis'
70 | }, true);
71 | 


--------------------------------------------------------------------------------
/lib/nose/schema.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   # Simple DSL for constructing indexes
 5 |   class Schema
 6 |     attr_reader :model, :indexes
 7 | 
 8 |     def initialize(&block)
 9 |       @indexes = {}
10 |       instance_eval(&block) if block_given?
11 |     end
12 | 
13 |     # Find the schema with the given name
14 |     def self.load(name)
15 |       filename = File.expand_path "../../../schemas/#{name}.rb", __FILE__
16 |       contents = File.read(filename)
17 |       binding.eval contents, filename
18 |     end
19 | 
20 |     # rubocop:disable MethodName
21 | 
22 |     # Set the model to be used by the schema
23 |     # @return [void]
24 |     def Model(name)
25 |       @model = Model.load name
26 |       NoSE::DSL.mixin_fields @model.entities, IndexDSL
27 |     end
28 | 
29 |     # Add a simple index for an entity
30 |     # @return [void]
31 |     def SimpleIndex(entity)
32 |       @indexes[entity] = @model[entity].simple_index
33 |     end
34 | 
35 |     # Wrap commands for defining index attributes
36 |     # @return [void]
37 |     def Index(key, &block)
38 |       # Apply the DSL
39 |       dsl = IndexDSL.new(self)
40 |       dsl.instance_eval(&block) if block_given?
41 |       index = Index.new dsl.hash_fields, dsl.order_fields, dsl.extra,
42 |                         QueryGraph::Graph.from_path(dsl.path_keys),
43 |                         saved_key: key
44 |       @indexes[index.key] = index
45 |     end
46 | 
47 |     # rubocop:enable MethodName
48 |   end
49 | 
50 |   # DSL for index creation within a schema
51 |   class IndexDSL
52 |     attr_reader :hash_fields, :order_fields, :extra, :path_keys
53 | 
54 |     def initialize(schema)
55 |       @schema = schema
56 |       @hash_fields = []
57 |       @order_fields = []
58 |       @extra = []
59 |       @path_keys = []
60 |     end
61 | 
62 |     # rubocop:disable MethodName
63 | 
64 |     # Define a list of hash fields
65 |     # @return [void]
66 |     def Hash(*fields)
67 |       @hash_fields += fields.flatten
68 |     end
69 | 
70 |     # Define a list of ordered fields
71 |     # @return [void]
72 |     def Ordered(*fields)
73 |       @order_fields += fields.flatten
74 |     end
75 | 
76 |     # Define a list of extra fields
77 |     # @return [void]
78 |     def Extra(*fields)
79 |       @extra += fields.flatten
80 |     end
81 | 
82 |     # Define the keys for the index path
83 |     # @return [void]
84 |     def Path(*keys)
85 |       @path_keys += keys
86 |     end
87 | 
88 |     # rubocop:enable MethodName
89 |   end
90 | end
91 | 


--------------------------------------------------------------------------------
/models/rubis.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | # rubocop:disable all
 3 | 
 4 | NoSE::Model.new do
 5 |   # Define entities along with the size and cardinality of their fields
 6 |   # as well as an estimated number of each entity
 7 | 
 8 |   (Entity 'categories' do
 9 |     ID     'id'
10 |     String 'name', 20
11 |     Integer 'dummy', count: 1
12 |   end) * 50
13 | 
14 |   (Entity 'regions' do
15 |     ID      'id'
16 |     String  'name', 25
17 |     Integer 'dummy', count: 1
18 |   end) * 5
19 | 
20 |   (Entity 'users' do
21 |     ID         'id'
22 |     String     'firstname', 6
23 |     String     'lastname', 7
24 |     String     'nickname', 12
25 |     String     'password', 15
26 |     String     'email', 23
27 |     Integer    'rating', count: 50
28 |     Float      'balance', count: 10_000
29 |     Date       'creation_date'
30 |   end) * 2_000
31 | 
32 |   (Entity 'items' do
33 |     ID         'id'
34 |     String     'name', 19
35 |     String     'description', 197
36 |     Float      'initial_price'
37 |     Integer    'quantity', count: 100
38 |     Float      'reserve_price'
39 |     Float      'buy_now'
40 |     Integer    'nb_of_bids', count: 100
41 |     Float      'max_bid'
42 |     Date       'start_date'
43 |     Date       'end_date'
44 |   end) * 20_000
45 | 
46 |   (Entity 'bids' do
47 |     ID         'id'
48 |     Integer    'qty', count: 5
49 |     Float      'bid'
50 |     Date       'date'
51 |   end) * 200_000
52 | 
53 |   (Entity 'comments' do
54 |     ID         'id'
55 |     Integer    'rating', count: 10
56 |     Date       'date'
57 |     String     'comment', 130
58 |   end) * 100_000
59 | 
60 |   (Entity 'buynow' do
61 |     ID         'id'
62 |     Integer    'qty', count: 4
63 |     Date       'date'
64 |   end) * 40_000
65 | 
66 |   HasOne 'region',       'users',
67 |          {'users'      => 'regions'}
68 | 
69 |   HasOne 'seller',       'items_sold',
70 |          {'items'      => 'users'}
71 | 
72 |   HasOne 'category',     'items',
73 |          {'items'      => 'categories'}
74 | 
75 |   HasOne 'user',         'bids',
76 |          {'bids'       => 'users'}
77 | 
78 |   HasOne 'item',         'bids',
79 |          {'bids'       => 'items'}
80 | 
81 |   HasOne 'from_user',    'comments_sent',
82 |          {'comments'   => 'users'}
83 | 
84 |   HasOne 'to_user',      'comments_received',
85 |          {'comments'   => 'users'}
86 | 
87 |   HasOne 'item',         'comments',
88 |          {'comments'   => 'items'}
89 | 
90 |   HasOne 'buyer',        'bought_now',
91 |          {'buynow'     => 'users'}
92 | 
93 |   HasOne 'item',         'bought_now',
94 |          {'buynow'     => 'items'}
95 | end
96 | 
97 | # rubocop:enable all
98 | 


--------------------------------------------------------------------------------
/lib/nose/statements/delete.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | module NoSE
 4 |   # A representation of a delete in the workload
 5 |   class Delete < Statement
 6 |     include StatementConditions
 7 |     include StatementSupportQuery
 8 | 
 9 |     def initialize(params, text, group: nil, label: nil)
10 |       super params, text, group: group, label: label
11 | 
12 |       populate_conditions params
13 |     end
14 | 
15 |     # Build a new delete from a provided parse tree
16 |     # @return [Delete]
17 |     def self.parse(tree, params, text, group: nil, label: nil)
18 |       conditions_from_tree tree, params
19 | 
20 |       Delete.new params, text, group: group, label: label
21 |     end
22 | 
23 |     # Produce the SQL text corresponding to this delete
24 |     # @return [String]
25 |     def unparse
26 |       delete = "DELETE #{entity.name} "
27 |       delete += "FROM #{from_path @key_path}"
28 |       delete << where_clause
29 | 
30 |       delete
31 |     end
32 | 
33 |     def ==(other)
34 |       other.is_a?(Delete) &&
35 |         @graph == other.graph &&
36 |         entity == other.entity &&
37 |         @conditions == other.conditions
38 |     end
39 |     alias eql? ==
40 | 
41 |     def hash
42 |       @hash ||= [@graph, entity, @conditions].hash
43 |     end
44 | 
45 |     # Index contains the entity to be deleted
46 |     def modifies_index?(index)
47 |       index.graph.entities.include? entity
48 |     end
49 | 
50 |     # Specifies that deletes require deletion
51 |     def requires_delete?(_index)
52 |       true
53 |     end
54 | 
55 |     # Get the support queries for deleting from an index
56 |     def support_queries(index)
57 |       return [] unless modifies_index? index
58 |       select = (index.hash_fields + index.order_fields.to_set) -
59 |                @conditions.each_value.map(&:field).to_set
60 |       return [] if select.empty?
61 | 
62 |       support_queries = []
63 | 
64 |       # Build a support query which gets the IDs of the entities being deleted
65 |       graph = @graph.dup
66 |       support_fields = select.select do |field|
67 |         field.parent == entity
68 |       end.to_set
69 |       support_fields << entity.id_field \
70 |         unless @conditions.each_value.map(&:field).include? entity.id_field
71 |       conditions = Hash[@conditions.map { |k, v| [k.dup, v.dup] }]
72 | 
73 |       support_queries << build_support_query(entity, index, graph,
74 |                                              support_fields, conditions)
75 |       support_queries.compact + support_queries_for_entity(index, select)
76 |     end
77 | 
78 |     # The condition fields are provided with the deletion
79 |     def given_fields
80 |       @conditions.each_value.map(&:field)
81 |     end
82 |   end
83 | end
84 | 


--------------------------------------------------------------------------------
/nose.gemspec:
--------------------------------------------------------------------------------
 1 | Gem::Specification.new do |s|
 2 |   s.name        = 'nose'
 3 |   s.version     = '0.1.4'
 4 |   s.license     = 'GPL-3.0'
 5 |   s.summary     = 'Schema design for NoSQL applications'
 6 |   s.author      = 'Michael Mior'
 7 |   s.email       = 'mmior@uwaterloo.ca'
 8 |   s.files       = Dir['lib/**/*'] +
 9 |                   Dir['templates/*'] +
10 |                   Dir['models/*'] +
11 |                   Dir['workloads/*'] +
12 |                   Dir['plans/*'] +
13 |                   Dir['schemas/*'] +
14 |                   Dir['data/**/*']
15 |   s.homepage    = 'https://michael.mior.ca/projects/NoSE/'
16 | 
17 |   s.add_dependency 'faker', '~> 2.16.0', '>= 1.7.0'
18 |   s.add_dependency 'formatador', '~> 0.2.5'
19 |   s.add_dependency 'json-schema', '~> 2.8.0', '>= 2.8.0'
20 |   s.add_dependency 'logging', '>= 2.2', '< 2.4'
21 |   s.add_dependency 'mipper', '~> 0.1.0'
22 |   s.add_dependency 'parallel', '>= 1.20.1', '< 1.25.0'
23 |   s.add_dependency 'parslet', '>= 1.8', '< 2.1'
24 |   s.add_dependency 'pickup', '~> 0.0.11'
25 |   s.add_dependency 'pry', '~> 0.13.1'
26 |   s.add_dependency 'rake', '~> 12.3.2', '>= 12.0.0'
27 |   s.add_dependency 'representable', '~> 3.2.0', '>= 3.0.0'
28 |   s.add_dependency 'ruby-graphviz', '~> 1.2.2', '>= 1.2.0'
29 |   s.add_dependency 'ruby-mysql', '~> 2.9.14', '>= 2.9.0' # for the proxy because it's pure Ruby
30 |   s.add_dependency 'sequel', '>= 5.41', '< 5.80'
31 |   s.add_dependency 'smarter_csv', '1.10.2'
32 |   s.add_dependency 'sorted_set', '~> 1.0', '>= 1.0.3'
33 | 
34 |   # Required for Cassandra backend
35 |   s.add_dependency 'cassandra-driver', '~> 3.2.5', '>= 3.1.0'
36 | 
37 |   # Required for MongoDB backend
38 |   s.add_dependency 'mongo', '>= 2.14', '< 2.21'
39 | 
40 |   s.add_development_dependency 'fakefs', '~> 2.5.0'
41 |   s.add_development_dependency 'memory_profiler', '~> 1.0.0'
42 |   s.add_development_dependency 'pry-byebug', '~> 3.9.0'
43 |   s.add_development_dependency 'pry-doc', '~> 1.5.0'
44 |   s.add_development_dependency 'pry-stack_explorer', '~> 0.5.1'
45 |   s.add_development_dependency 'ronn', '~> 0.7.3'
46 |   s.add_development_dependency 'rspec', '~> 3.10.0'
47 |   s.add_development_dependency 'rspec-core', '~> 3.10.0'
48 |   s.add_development_dependency 'rspec-collection_matchers', '~> 1.2.0', '>= 1.1.0'
49 |   s.add_development_dependency 'ruby-prof', '~> 1.4.2'
50 |   s.add_development_dependency 'simplecov', '~> 0.22.0'
51 |   s.add_development_dependency 'simplecov-cobertura', '~> 2.1'
52 |   s.add_development_dependency 'yard', '~> 0.9.4'
53 | 
54 |   # Below for MRI only (TODO JRuby gemspec)
55 |   s.add_dependency 'rbtree', '~> 0.4.2' # for more efficient SortedSet implementation
56 |   s.add_dependency 'mysql2', '~> 0.5.2' # this is used for the loader for performance
57 |   s.add_development_dependency 'pry-rescue', '~> 1.6.0'
58 |   s.add_development_dependency 'binding_of_caller', '~> 0.8.0'
59 | end
60 | 


--------------------------------------------------------------------------------
/spec/backend_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Backend
 3 |     describe Backend::SortStatementStep do
 4 |       include_context 'entities'
 5 | 
 6 |       it 'can sort a list of results' do
 7 |         results = [
 8 |           { 'User_Username' => 'Bob' },
 9 |           { 'User_Username' => 'Alice' }
10 |         ]
11 |         step = Plans::SortPlanStep.new [user['Username']]
12 | 
13 |         step_class = Backend::SortStatementStep
14 |         prepared = step_class.new nil, [], {}, step, nil, nil
15 |         results = prepared.process nil, results
16 | 
17 |         expect(results).to eq [
18 |           { 'User_Username' => 'Alice' },
19 |           { 'User_Username' => 'Bob' }
20 |         ]
21 |       end
22 |     end
23 | 
24 |     describe Backend::FilterStatementStep do
25 |       include_context 'entities'
26 | 
27 |       it 'can filter results by an equality predicate' do
28 |         results = [
29 |           { 'User_Username' => 'Alice' },
30 |           { 'User_Username' => 'Bob' }
31 |         ]
32 |         step = Plans::FilterPlanStep.new [user['Username']], nil
33 |         query = Statement.parse 'SELECT User.* FROM User ' \
34 |                                 'WHERE User.Username = "Bob"', workload.model
35 | 
36 |         step_class = Backend::FilterStatementStep
37 |         prepared = step_class.new nil, [], {}, step, nil, nil
38 |         results = prepared.process query.conditions, results
39 | 
40 |         expect(results).to eq [
41 |           { 'User_Username' => 'Bob' }
42 |         ]
43 |       end
44 | 
45 |       it 'can filter results by a range predicate' do
46 |         results = [
47 |           { 'User_Username' => 'Alice' },
48 |           { 'User_Username' => 'Bob' }
49 |         ]
50 |         step = Plans::FilterPlanStep.new [], [user['Username']]
51 |         query = Statement.parse 'SELECT User.* FROM User WHERE ' \
52 |                                 'User.Username < "B" AND ' \
53 |                                 'User.City = "New York"', workload.model
54 | 
55 |         step_class = Backend::FilterStatementStep
56 |         prepared = step_class.new nil, [], {}, step, nil, nil
57 |         results = prepared.process query.conditions, results
58 | 
59 |         expect(results).to eq [
60 |           { 'User_Username' => 'Alice' }
61 |         ]
62 |       end
63 |     end
64 | 
65 |     describe Backend::FilterStatementStep do
66 |       include_context 'entities'
67 | 
68 |       it 'can limit results' do
69 |         results = [
70 |           { 'User_Username' => 'Alice' },
71 |           { 'User_Username' => 'Bob' }
72 |         ]
73 |         step = Plans::LimitPlanStep.new 1
74 |         step_class = Backend::LimitStatementStep
75 |         prepared = step_class.new nil, [], {}, step, nil, nil
76 |         results = prepared.process({}, results)
77 | 
78 |         expect(results).to eq [
79 |           { 'User_Username' => 'Alice' }
80 |         ]
81 |       end
82 |     end
83 |   end
84 | end
85 | 


--------------------------------------------------------------------------------
/lib/nose/proxy.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # Query processing proxies to transparently execute queries against a backend
  5 |   module Proxy
  6 |     # A proxy server to interpret our query language and implement query plans
  7 |     class ProxyBase
  8 |       attr_reader :logger
  9 |       def initialize(config, result, backend)
 10 |         @logger = Logging.logger['nose::proxy']
 11 | 
 12 |         @result = result
 13 |         @backend = backend
 14 |         @config = config
 15 | 
 16 |         @continue = true
 17 |       end
 18 | 
 19 |       # Start the proxy server
 20 |       # @return [void]
 21 |       def start
 22 |         @logger.info "Starting server on port #{@config[:port]}"
 23 | 
 24 |         server_socket = TCPServer.new('127.0.0.1', @config[:port])
 25 |         server_socket.listen(100)
 26 | 
 27 |         @read_sockets = [server_socket]
 28 |         @write_sockets = []
 29 |         loop do
 30 |           break unless @continue && select_connection(server_socket)
 31 |         end
 32 |       end
 33 | 
 34 |       # @abstract Subclasses should process a new connection
 35 |       #           on the given socket
 36 |       # :nocov:
 37 |       # @return [void]
 38 |       def handle_connection(_socket)
 39 |         fail NotImplementedError
 40 |       end
 41 |       # :nocov:
 42 | 
 43 |       # @abstract Subclasses should dispose of state associated with the socket
 44 |       # :nocov:
 45 |       # @return [void]
 46 |       def remove_connection(_socket)
 47 |         fail NotImplementedError
 48 |       end
 49 |       # :nocov:
 50 | 
 51 |       # Stop accepting connections
 52 |       # @return [void]
 53 |       def stop
 54 |         @continue = false
 55 |       end
 56 | 
 57 |       private
 58 | 
 59 |       # Select sockets which are available to be processed
 60 |       # @return [void]
 61 |       def select_connection(server_socket)
 62 |         read, write, error = IO.select @read_sockets, @write_sockets,
 63 |                                        @read_sockets + @write_sockets, 5
 64 |         return true if read.nil?
 65 | 
 66 |         # Check if we have a new incoming connection
 67 |         if read.include? server_socket
 68 |           accept_connection server_socket
 69 |           read.delete server_socket
 70 |         elsif error.include? server_socket
 71 |           @logger.error 'Server socket died'
 72 |           return false
 73 |         end
 74 | 
 75 |         # Remove all sockets which have errors
 76 |         error.each { |socket| remove_connection socket }
 77 |         @read_sockets -= error
 78 |         @write_sockets -= error
 79 | 
 80 |         # Handle connections on each available socket
 81 |         process_connections read + write
 82 |       end
 83 | 
 84 |       # Accept the new connection
 85 |       # @return [void]
 86 |       def accept_connection(server_socket)
 87 |         client_socket, = server_socket.accept
 88 |         @read_sockets << client_socket
 89 |         @write_sockets << client_socket
 90 |       end
 91 | 
 92 |       # Process all pending connections
 93 |       # @return [void]
 94 |       def process_connections(sockets)
 95 |         sockets.each do |socket|
 96 |           @write_sockets.delete socket
 97 |           @read_sockets.delete socket unless handle_connection socket
 98 |         end
 99 |       end
100 |     end
101 |   end
102 | end
103 | 


--------------------------------------------------------------------------------
/lib/nose/plans/filter.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   module Plans
  5 |     # A query plan performing a filter without an index
  6 |     class FilterPlanStep < PlanStep
  7 |       attr_reader :eq, :range
  8 | 
  9 |       def initialize(eq, range, state = nil)
 10 |         @eq = eq
 11 |         @range = range
 12 |         super()
 13 | 
 14 |         return if state.nil?
 15 |         @state = state.dup
 16 |         update_state
 17 |         @state.freeze
 18 |       end
 19 | 
 20 |       # Two filtering steps are equal if they filter on the same fields
 21 |       # @return [Boolean]
 22 |       def ==(other)
 23 |         other.instance_of?(self.class) && \
 24 |           @eq == other.eq && @range == other.range
 25 |       end
 26 | 
 27 |       def hash
 28 |         [@eq.map(&:id), @range.nil? ? nil : @range.id].hash
 29 |       end
 30 | 
 31 |       # :nocov:
 32 |       def to_color
 33 |         "#{super} #{@eq.to_color} #{@range.to_color} " +
 34 |           begin
 35 |             "#{@parent.state.cardinality} " \
 36 |               "-> #{state.cardinality}"
 37 |           rescue NoMethodError
 38 |             ''
 39 |           end
 40 |       end
 41 |       # :nocov:
 42 | 
 43 |       # Check if filtering can be done (we have all the necessary fields)
 44 |       def self.apply(parent, state)
 45 |         # Get fields and check for possible filtering
 46 |         filter_fields, eq_filter, range_filter = filter_fields parent, state
 47 |         return nil if filter_fields.empty?
 48 | 
 49 |         FilterPlanStep.new eq_filter, range_filter, state \
 50 |           if required_fields?(filter_fields, parent)
 51 |       end
 52 | 
 53 |       # Get the fields we can possibly filter on
 54 |       def self.filter_fields(parent, state)
 55 |         eq_filter = state.eq.select { |field| parent.fields.include? field }
 56 |         filter_fields = eq_filter.dup
 57 |         if state.range && parent.fields.include?(state.range)
 58 |           range_filter = state.range
 59 |           filter_fields << range_filter
 60 |         else
 61 |           range_filter = nil
 62 |         end
 63 | 
 64 |         [filter_fields, eq_filter, range_filter]
 65 |       end
 66 |       private_class_method :filter_fields
 67 | 
 68 |       # Check that we have all the fields we are filtering
 69 |       # @return [Boolean]
 70 |       def self.required_fields?(filter_fields, parent)
 71 |         filter_fields.map do |field|
 72 |           next true if parent.fields.member? field
 73 | 
 74 |           # We can also filter if we have a foreign key
 75 |           # XXX for now we assume this value is the same
 76 |           next unless field.is_a? IDField
 77 |           parent.fields.any? do |pfield|
 78 |             pfield.is_a?(ForeignKeyField) && pfield.entity == field.parent
 79 |           end
 80 |         end.all?
 81 |       end
 82 |       private_class_method :required_fields?
 83 | 
 84 |       private
 85 | 
 86 |       # Apply the filters and perform a uniform estimate on the cardinality
 87 |       # @return [void]
 88 |       def update_state
 89 |         @state.eq -= @eq
 90 |         @state.cardinality *= @eq.map { |field| 1.0 / field.cardinality } \
 91 |                                  .inject(1.0, &:*)
 92 |         return unless @range
 93 | 
 94 |         @state.range = nil
 95 |         @state.cardinality *= 0.1
 96 |       end
 97 |     end
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/lib/nose/statements/update.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # A representation of an update in the workload
  5 |   class Update < Statement
  6 |     include StatementConditions
  7 |     include StatementSettings
  8 |     include StatementSupportQuery
  9 | 
 10 |     def initialize(params, text, group: nil, label: nil)
 11 |       super params, text, group: group, label: label
 12 | 
 13 |       populate_conditions params
 14 |       @settings = params[:settings]
 15 |     end
 16 | 
 17 |     # Build a new update from a provided parse tree
 18 |     # @return [Update]
 19 |     def self.parse(tree, params, text, group: nil, label: nil)
 20 |       conditions_from_tree tree, params
 21 |       settings_from_tree tree, params
 22 | 
 23 |       Update.new params, text, group: group, label: label
 24 |     end
 25 | 
 26 |     # Produce the SQL text corresponding to this update
 27 |     # @return [String]
 28 |     def unparse
 29 |       update = "UPDATE #{entity.name} "
 30 |       update += "FROM #{from_path @key_path} "
 31 |       update << settings_clause
 32 |       update << where_clause
 33 | 
 34 |       update
 35 |     end
 36 | 
 37 |     def ==(other)
 38 |       other.is_a?(Update) &&
 39 |         @graph == other.graph &&
 40 |         entity == other.entity &&
 41 |         @settings == other.settings &&
 42 |         @conditions == other.conditions
 43 |     end
 44 |     alias eql? ==
 45 | 
 46 |     def hash
 47 |       @hash ||= [@graph, entity, @settings, @conditions].hash
 48 |     end
 49 | 
 50 |     # Specifies that updates require insertion
 51 |     def requires_insert?(_index)
 52 |       true
 53 |     end
 54 | 
 55 |     # Specifies that updates require deletion
 56 |     def requires_delete?(index)
 57 |       !(settings.map(&:field).to_set &
 58 |         (index.hash_fields + index.order_fields.to_set)).empty?
 59 |     end
 60 | 
 61 |     # Get the support queries for updating an index
 62 |     # @return [Array<SupportQuery>]
 63 |     def support_queries(index)
 64 |       return [] unless modifies_index? index
 65 | 
 66 |       # Get the updated fields and check if an update is necessary
 67 |       set_fields = settings.map(&:field).to_set
 68 | 
 69 |       # We only need to fetch all the fields if we're updating a key
 70 |       updated_key = !(set_fields &
 71 |                       (index.hash_fields + index.order_fields)).empty?
 72 | 
 73 |       select = if updated_key
 74 |                  index.all_fields
 75 |                else
 76 |                  index.hash_fields + index.order_fields
 77 |                end - set_fields - @conditions.each_value.map(&:field)
 78 |       return [] if select.empty?
 79 | 
 80 |       support_queries = []
 81 | 
 82 |       graph = @graph.dup
 83 |       support_fields = select.select do |field|
 84 |         field.parent == entity
 85 |       end.to_set
 86 |       support_fields << entity.id_field \
 87 |         unless @conditions.each_value.map(&:field).include? entity.id_field
 88 | 
 89 |       support_queries << build_support_query(entity, index, graph,
 90 |                                              support_fields, conditions)
 91 |       support_queries.compact + support_queries_for_entity(index, select)
 92 |     end
 93 | 
 94 |     # The condition fields are provided with the update
 95 |     # Note that we don't include the settings here because we
 96 |     # care about the previously existing values in the database
 97 |     def given_fields
 98 |       @conditions.each_value.map(&:field)
 99 |     end
100 |   end
101 | end
102 | 


--------------------------------------------------------------------------------
/lib/nose/model.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require_relative 'model/entity'
  4 | require_relative 'model/fields'
  5 | 
  6 | require 'graphviz'
  7 | 
  8 | module NoSE
  9 |   # A conceptual data model of a set of entities
 10 |   class Model
 11 |     # The subdirectory models are loaded from
 12 |     LOAD_PATH = 'models'
 13 |     include Loader
 14 | 
 15 |     attr_reader :entities
 16 | 
 17 |     def initialize(&block)
 18 |       @entities = {}
 19 | 
 20 |       # Apply the DSL
 21 |       WorkloadDSL.new(self).instance_eval(&block) if block_given?
 22 |     end
 23 | 
 24 |     # Compare all entities
 25 |     # @return [Boolean]
 26 |     def ==(other)
 27 |       other.is_a?(Model) && @entities = other.entities
 28 |     end
 29 |     alias eql? ==
 30 | 
 31 |     # Retrieve an entity by name
 32 |     # @return [Entity]
 33 |     def [](name)
 34 |       return @entities[name] if @entities.key? name
 35 |       fail EntityNotFound
 36 |     end
 37 | 
 38 |     # Add an {Entity} to the workload
 39 |     # @return [Entity]
 40 |     def add_entity(entity)
 41 |       fail InvalidEntity, 'no primary key defined' if entity.id_field.nil?
 42 |       @entities[entity.name] = entity
 43 |     end
 44 | 
 45 |     # Find a field given an +Enumerable+ of identifiers
 46 |     # @return [Field]
 47 |     def find_field(field)
 48 |       if field.count > 2
 49 |         find_field_chain field
 50 |       else
 51 |         find_entity_field(*field)
 52 |       end
 53 |     end
 54 | 
 55 |     # Output a PNG representation of entities in the model
 56 |     def output(format, filename, include_fields = false)
 57 |       graph = GraphViz.new :G, type: :digraph
 58 |       nodes = add_graph_nodes graph, include_fields
 59 |       add_graph_edges graph, nodes
 60 | 
 61 |       graph.output(**{ format => filename })
 62 |     end
 63 | 
 64 |     private
 65 | 
 66 |     # Add the nodes (entities) to a GraphViz object
 67 |     def add_graph_nodes(graph, include_fields)
 68 |       Hash[@entities.each_value.map do |entity|
 69 |         label = "#{entity.name}\n"
 70 |         if include_fields
 71 |           label += entity.fields.each_value.map do |field|
 72 |             type = field.class.name.sub(/^NoSE::(.*?)(Field)?$/, '\1')
 73 |             "#{field.name}: #{type}"
 74 |           end.join("\n")
 75 |         end
 76 | 
 77 |         [entity.name, graph.add_nodes(label)]
 78 |       end]
 79 |     end
 80 | 
 81 |     # Add the edges (foreign keys) to a GraphViz object
 82 |     def add_graph_edges(graph, nodes)
 83 |       @entities.each_value do |entity|
 84 |         entity.foreign_keys.each_value do |key|
 85 |           graph.add_edges nodes[entity.name], nodes[key.entity.name]
 86 |         end
 87 |       end
 88 |     end
 89 | 
 90 |     # Find a field in an entity where the entity may be a string or an object
 91 |     def find_field_chain(field)
 92 |       # Do a foreign key lookup
 93 |       field = field.dup
 94 |       key_field = @entities[field[0]][field[1]]
 95 |       field[0..1] = key_field ? key_field.entity.name : field[1]
 96 |       find_field field
 97 |     end
 98 | 
 99 |     # Find a field in an entity where the entity may be a string or an object
100 |     def find_entity_field(entity, field)
101 |       entity = entities[entity] if entity.is_a?(String)
102 |       entity[field]
103 |     end
104 |   end
105 | 
106 |   # Raised when looking up an entity in the workload which does not exist
107 |   class EntityNotFound < StandardError
108 |   end
109 | 
110 |   # Raised when attempting to add an invalid entity to a workload
111 |   class InvalidEntity < StandardError
112 |   end
113 | end
114 | 


--------------------------------------------------------------------------------
/workloads/rubis_synthetic.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | NoSE::Workload.new do
 4 |   Model 'rubis'
 5 | 
 6 |   # Define queries and their relative weights
 7 | 
 8 |   Q 'SELECT comments.date, comments.comment FROM comments.item ' \
 9 |     'WHERE item.id = ? ORDER BY comments.date'
10 |   # 1. SELECT item_id as E_item, date as O_date, from_user_id, date, comment
11 |   #    FROM comments;
12 |   # I2227598752
13 | 
14 |   Q 'SELECT users.id, users.nickname, users.rating FROM users.region ' \
15 |     'WHERE region.id = ? ORDER BY users.rating LIMIT 50'
16 |   # 2. SELECT region as E_region, rating as O_rating, id, nickname, rating
17 |   #    FROM users;
18 |   # I1083340549
19 | 
20 |   Q 'SELECT items.id, items.name, items.description, items.max_bid ' \
21 |     'FROM items.seller.region WHERE region.id = ? LIMIT 50'
22 |   # 3. SELECT region as E_region, items.id, name, description, max_bid FROM
23 |   #    items join users on items.seller=users.id WHERE items.seller.region;
24 |   # I4186334592
25 | 
26 |   Q 'SELECT comments.date, comments.comment FROM ' \
27 |     'comments.item.seller.region WHERE item.quantity = ? AND region.id = ? ' \
28 |     'LIMIT 50'
29 |   # 4. SELECT category AS E_category, region as E_region, from_user_id, date,
30 |   #    comment FROM comments join items on comments.item_id=items.id join
31 |   #    users on items.seller=users.id;
32 |   # I3254083673
33 | 
34 |   Q 'SELECT bids.bid, bids.date FROM bids.item.seller.region WHERE ' \
35 |     'region.id = ? AND item.quantity = ? AND ' \
36 |     'item.end_date < "2015-06-11T14:00:00-04:00"'
37 |   # 5. SELECT region as E_region, category as E_category,
38 |   #    end_date as O_end_date, bids.id as O_id, bid, date FROM bids join
39 |   #    items on bids.item_id=items.id join users on items.seller=users.id
40 |   # I1184534160
41 | 
42 |   Q 'SELECT comments.comment, comments.date FROM comments.item.seller ' \
43 |     'WHERE seller.id = ?'
44 |   # 6. SELECT seller AS E_seller, comments.id AS O_id, from_user_id, comment,
45 |   #    date FROM comments join items on comments.item_id=items.id;
46 |   # I638854407
47 | 
48 |   Q 'SELECT items.id, items.name FROM items.category WHERE category.id = ? ' \
49 |     'LIMIT 1000'
50 |   # 7. SELECT category as E_category, id, name FROM items;
51 |   # I3358488952
52 | 
53 |   Q 'SELECT comments.comment FROM comments.item.category ' \
54 |     'WHERE category.id = ? ORDER BY comments.date LIMIT 100'
55 |   # 8. SELECT category AS E_category, date AS O_date, comment FROM comments
56 |   #    join items ON comments.item_id=items.id;
57 |   # I127205473
58 | 
59 |   # RegisterItem
60 |   Q 'INSERT INTO items SET id=?, name=?, description=?, initial_price=?, ' \
61 |     'quantity=?, reserve_price=?, buy_now=?, nb_of_bids=0, max_bid=0, ' \
62 |     'start_date=?, end_date=?'
63 |   Q 'CONNECT items(?) TO category(?)'
64 |   Q 'CONNECT items(?) TO seller(?)'
65 | 
66 |   # RegisterUser
67 |   Q 'INSERT INTO users SET id=?, firstname=?, lastname=?, nickname=?, ' \
68 |     'password=?, email=?, rating=0, balance=0, creation_date=?'
69 |   Q 'CONNECT users(?) TO region(?)'
70 | 
71 |   # StoreBid
72 |   Q 'INSERT INTO bids SET id=?, qty=?, bid=?, date=?'
73 |   Q 'CONNECT bids(?) TO item(?)'
74 |   Q 'CONNECT bids(?) TO user(?)'
75 |   Q 'SELECT items.nb_of_bids FROM items WHERE items.id=?'
76 |   Q 'UPDATE items SET nb_of_bids=? WHERE items.id=?'
77 | 
78 |   # StoreComment
79 |   Q 'UPDATE users SET rating=? WHERE users.id=?'
80 |   Q 'INSERT INTO comments SET id=?, rating=?, date=?, comment=?'
81 |   # Q 'CONNECT comments(?) TO to_user(?)'
82 |   Q 'CONNECT comments(?) TO from_user(?)'
83 |   Q 'CONNECT comments(?) TO item(?)'
84 | end
85 | 


--------------------------------------------------------------------------------
/spec/query_graph_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module QueryGraph
 3 |     describe QueryGraph do
 4 |       include_context 'entities'
 5 | 
 6 |       context 'when comparing with an isomorphic graph' do
 7 |         let(:graph) { Graph.new [], [user, tweet, user['Tweets']] }
 8 |         let(:graph2) { Graph.new [], [tweet, user, tweet['User']] }
 9 | 
10 |         it 'compares equal with an isomorphic graph' do
11 |           expect(graph).to eq graph2
12 |         end
13 | 
14 |         it 'has the same unique edges' do
15 |           expect(graph.unique_edges).to eq graph2.unique_edges
16 |         end
17 |       end
18 | 
19 |       context 'when producing subgraphs' do
20 |         it 'produces only itself for a single entity graph' do
21 |           graph = Graph.new([user])
22 |           expect(graph.subgraphs).to match_array [graph]
23 |         end
24 | 
25 |         it 'produces single node graphs when splitting with two nodes' do
26 |           graph = Graph.new [], [user, tweet, user['Tweets']]
27 |           subgraphs = graph.subgraphs.to_a
28 |           expect(subgraphs).to match_array [
29 |             graph,
30 |             Graph.new([user]),
31 |             Graph.new([tweet])
32 |           ]
33 |         end
34 | 
35 |         it 'produces all paths when splitting a graph' do
36 |           graph = Graph.new [], [user, tweet, user['Tweets']],
37 |                             [tweet, link, tweet['Link']]
38 |           subgraphs = graph.subgraphs.to_a
39 |           expect(subgraphs).to match_array [
40 |             graph,
41 |             Graph.new([user]),
42 |             Graph.new([tweet]),
43 |             Graph.new([link]),
44 |             Graph.new([], [user, tweet, user['Tweets']]),
45 |             Graph.new([], [tweet, link, tweet['Link']])
46 |           ]
47 |         end
48 |       end
49 | 
50 |       context 'when converting to a path' do
51 |         it 'can convert single node graphs' do
52 |           graph = Graph.new [user]
53 |           expect(graph.to_path(user)).to eq KeyPath.new([user.id_field])
54 |         end
55 | 
56 |         it 'can convert longer paths' do
57 |           graph = Graph.new [], [user, tweet, user['Tweets']]
58 |           expect(graph.to_path(user)).to eq KeyPath.new([user.id_field,
59 |                                                          user['Tweets']])
60 |         end
61 |       end
62 | 
63 |       context 'when converting from a path' do
64 |         it 'converts empty paths to empty graphs' do
65 |           path = KeyPath.new
66 |           expect(Graph.from_path(path)).to eq Graph.new
67 |         end
68 | 
69 |         it 'converts single entity paths' do
70 |           path = KeyPath.new [user.id_field]
71 |           expect(Graph.from_path(path)).to eq Graph.new([user])
72 |         end
73 | 
74 |         it 'converts path with multiple entities' do
75 |           path = KeyPath.new [user.id_field, user['Tweets']]
76 |           expect(Graph.from_path(path)).to eq \
77 |             Graph.new([], [user, tweet, user['Tweets']])
78 |         end
79 |       end
80 | 
81 |       it 'can find the longest path through a graph' do
82 |         graph = Graph.new [], [user, tweet, user['Tweets']],
83 |                           [tweet, link, tweet['Link']]
84 |         expect(graph.longest_path.entities).to eq([user, tweet, link])
85 |       end
86 | 
87 |       it 'can split graphs at a given entity' do
88 |         graph = Graph.new [], [user, tweet, user['Tweets']],
89 |                           [tweet, link, tweet['Link']]
90 |         expect(graph.split(tweet)).to eq [
91 |           Graph.new([user]),
92 |           Graph.new([link])
93 |         ]
94 |       end
95 |     end
96 |   end
97 | end
98 | 


--------------------------------------------------------------------------------
/spec/workload_spec.rb:
--------------------------------------------------------------------------------
  1 | module NoSE
  2 |   describe Workload do
  3 |     subject(:workload) { Workload.new }
  4 |     let(:entity)      { Entity.new('Foo') << field }
  5 |     let(:field)       { Fields::IDField.new('Id') }
  6 | 
  7 |     before(:each) do
  8 |       workload.model.add_entity entity
  9 |     end
 10 | 
 11 |     context 'when adding items' do
 12 |       it 'holds entities' do
 13 |         expect(workload.model.entities).to have(1).item
 14 |         expect(workload.model['Foo']).to be entity
 15 |       end
 16 | 
 17 |       it 'automatically parses queries' do
 18 |         valid_query = Statement.parse 'SELECT Foo.Id FROM Foo ' \
 19 |                                       'WHERE Foo.Id = ?', workload.model
 20 |         workload.add_statement valid_query
 21 | 
 22 |         expect(workload.queries).to have(1).item
 23 |         expect(workload.queries.first).to be_a Query
 24 |       end
 25 | 
 26 |       it 'only accepts entities and queries' do
 27 |         expect { workload << 3 }.to raise_error TypeError
 28 |       end
 29 |     end
 30 | 
 31 |     it 'can find statements with a given tag' do
 32 |       query = Statement.parse 'SELECT Foo.Id FROM Foo WHERE Foo.Id = ? -- foo',
 33 |                               workload.model
 34 |       workload.add_statement query
 35 | 
 36 |       expect(workload.find_with_tag 'foo').to eq(query)
 37 |     end
 38 | 
 39 |     it 'can find fields on entities from queries' do
 40 |       expect(workload.model.find_field %w(Foo Id)).to be field
 41 |     end
 42 | 
 43 |     it 'can find fields which traverse foreign keys' do
 44 |       other_entity = Entity.new 'Bar'
 45 |       other_field = Fields::IDField.new 'Quux'
 46 |       other_entity << other_field
 47 |       workload.model.add_entity other_entity
 48 | 
 49 |       entity << Fields::ForeignKeyField.new('Baz', other_entity)
 50 | 
 51 |       expect(workload.model.find_field %w(Foo Baz Quux)).to be other_field
 52 |     end
 53 | 
 54 |     it 'raises an exception for nonexistent entities' do
 55 |       expect { workload.model['Bar'] }.to raise_error EntityNotFound
 56 |     end
 57 | 
 58 |     it 'can produce an image of itself' do
 59 |       expect_any_instance_of(GraphViz).to \
 60 |         receive(:output).with(png: '/tmp/rubis.png')
 61 |       workload.model.output :png, '/tmp/rubis.png'
 62 |     end
 63 | 
 64 |     it 'can remove updates' do
 65 |       entity << Fields::IntegerField.new('Bar')
 66 | 
 67 |       valid_query = Statement.parse 'SELECT Foo.Id FROM Foo WHERE Foo.Id = ?',
 68 |                                     workload.model
 69 |       workload.add_statement valid_query
 70 |       update = Statement.parse 'UPDATE Foo SET Bar = ? WHERE Foo.Id = ?',
 71 |                                workload.model
 72 |       workload.add_statement update
 73 | 
 74 |       workload.remove_updates
 75 |       expect(workload.queries).not_to be_empty
 76 |       expect(workload.updates).to be_empty
 77 |     end
 78 | 
 79 |     it 'can group statements' do
 80 |       query1 = 'SELECT Foo.Bar FROM Foo WHERE Foo.Id = ?'
 81 |       query2 = 'SELECT Foo.Baz FROM Foo WHERE Foo.Id = ?'
 82 | 
 83 |       workload = Workload.new do
 84 |         Entity 'Foo' do
 85 |           ID 'Id'
 86 |           String 'Bar'
 87 |           String 'Baz'
 88 |         end
 89 | 
 90 |         Group 'Test1', 0.5 do
 91 |           Q query1
 92 |         end
 93 | 
 94 |         Group 'Test2', 0.5 do
 95 |           Q query2
 96 |         end
 97 |       end
 98 | 
 99 |       expect(workload.statement_weights).to eq(
100 |         Statement.parse(query1, workload.model) => 0.5,
101 |         Statement.parse(query2, workload.model) => 0.5
102 |       )
103 |     end
104 |   end
105 | end
106 | 


--------------------------------------------------------------------------------
/lib/nose/loader/sql.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'sequel'
  4 | 
  5 | module NoSE
  6 |   module Loader
  7 |     # Load data from a MySQL database into a backend
  8 |     class SqlLoader < LoaderBase
  9 |       def initialize(workload = nil, backend = nil)
 10 |         @logger = Logging.logger['nose::loader::sqlloader']
 11 | 
 12 |         @workload = workload
 13 |         @backend = backend
 14 |       end
 15 | 
 16 |       # Load a generated set of indexes with data from MySQL
 17 |       def load(indexes, config, show_progress = false, limit = nil,
 18 |                skip_existing = true)
 19 |         indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
 20 | 
 21 |         # XXX Assuming backend is thread-safe
 22 |         Parallel.each(indexes, in_threads: 2) do |index|
 23 |           client = new_client config
 24 | 
 25 |           # Skip this index if it's not empty
 26 |           if skip_existing && !@backend.index_empty?(index)
 27 |             @logger.info "Skipping index #{index.inspect}" if show_progress
 28 |             next
 29 |           end
 30 |           @logger.info index.inspect if show_progress
 31 | 
 32 |           query = index_sql client, index, limit
 33 | 
 34 |           result_chunk = []
 35 |           query.each do |result|
 36 |             result = Hash[result.map { |k, v| [k.to_s, v] }]
 37 |             result_chunk.push result
 38 |             if result_chunk.length >= 100
 39 |               @backend.index_insert_chunk index, result_chunk
 40 |               result_chunk = []
 41 |             end
 42 |           end
 43 |           @backend.index_insert_chunk index, result_chunk \
 44 |             unless result_chunk.empty?
 45 |         end
 46 |       end
 47 | 
 48 |       private
 49 | 
 50 |       # Create a new client from the given configuration
 51 |       def new_client(config)
 52 |         Sequel.connect config[:uri]
 53 |       end
 54 | 
 55 |       # Get all the fields selected by this index
 56 |       # @return [Array<String>]
 57 |       def index_sql_select(index)
 58 |         fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
 59 | 
 60 |         fields.map do |field|
 61 |           "#{field.parent.name}__#{field.name}___" \
 62 |             "#{field.parent.name}_#{field.name}".to_sym
 63 |         end
 64 |       end
 65 | 
 66 |       # Get the list of tables along with the join condition
 67 |       # for a query to fetch index data
 68 |       def index_sql_tables(index)
 69 |         # Create JOIN statements
 70 |         tables = index.graph.entities.map { |entity| entity.name.to_sym }
 71 |         return [tables, []] if index.graph.size == 1
 72 | 
 73 |         keys = index.path.each_cons(2).map do |_prev_key, key|
 74 |           is_many = key.relationship == :many
 75 |           key = key.reverse if is_many
 76 |           fields = [key.entity.id_field.name.to_sym, key.name.to_sym]
 77 |           fields = fields.reverse if is_many
 78 |           Hash[[fields]]
 79 |         end
 80 | 
 81 |         [tables, keys]
 82 |       end
 83 | 
 84 |       # Construct a SQL statement to fetch the data to populate this index
 85 |       def index_sql(client, index, limit = nil)
 86 |         # Get all the necessary fields
 87 |         select = index_sql_select index
 88 | 
 89 |         # Construct the join condition
 90 |         tables, keys = index_sql_tables index
 91 | 
 92 |         query = client[tables.first]
 93 |         keys.map.with_index do |key, i|
 94 |           query = query.join tables[i + 1], key
 95 |         end
 96 | 
 97 |         query = query.select(*select)
 98 |         query = query.limit limit unless limit.nil?
 99 | 
100 |         @logger.debug { query.sql }
101 |         query
102 |       end
103 |     end
104 |   end
105 | end
106 | 


--------------------------------------------------------------------------------
/experiments/rubis/README.md:
--------------------------------------------------------------------------------
 1 | # RUBiS Experiments
 2 | 
 3 | This directory contains instructions and various scripts for running a performance analysis on different RUBiS schemas.
 4 | Currently these experiments are run against the Cassandra backend using the MySQL loader to populate the column families.
 5 | You will need to configure a Cassandra cluster with a keyspace named `rubis` and a MySQL cluster with a database named `rubis`.
 6 | Once this is done, initialize `nose.yml` in the root of the repository with the configuration below.
 7 | Note that you will need to edit the configuration with the correct connection information for Cassandra and MySQL.
 8 | 
 9 | ```yaml
10 | backend:
11 |   name: cassandra
12 |   hosts:
13 |     - 10.0.0.2
14 |   port: 9042
15 |   keyspace: rubis
16 | cost_model:
17 |   name: cassandra
18 | 
19 |   index_cost: 0.0078395645
20 |   partition_cost: 0.0013692786
21 |   row_cost: 1.17093638386496e-005
22 |   delete_cost: 0.0013287903
23 |   insert_cost: 0.013329108
24 | loader:
25 |   name: mysql
26 |   directory: /tmp/csv
27 |   host: 127.0.0.1
28 |   database: rubis
29 |   username: root
30 |   password: root
31 | ```
32 | 
33 | First create the RUBiS schema in MySQL.
34 | 
35 |     mysql -uroot -proot -Drubis < rubis-schema.sql
36 |     mysql -uroot -proot -Drubis < rubis-update.sql
37 | 
38 | To populate the MySQL database with some test data, we use the [mysql-faker](https://www.npmjs.com/package/mysql-faker) Node.js package.
39 | This package does not use the MySQL configuration in `nose.yml` so it may need to be manually edited.
40 | Next, install mysql-faker and populate the database.
41 | 
42 |     npm install
43 |     node fake.js
44 | 
45 | Once this script finishes, we are ready to load data in Cassandra.
46 | At this point, you can use either one of the manually-defined schemas, `rubis_baseline` or `rubis_expert` or use a JSON results file output by `nose search`.
47 | We refer to the choice of schema to use as `SCHEMA` for the remainder of the instructions.
48 | Now we can create the Cassandra column families and load the data from MySQL.
49 | This step may take several hours to complete.
50 | 
51 |     bundle exec nose create SCHEMA
52 |     bundle exec nose load SCHEMA
53 | 
54 | Since the experiments are destructive (i.e. they modify data in the database), it's a good idea to [take a snapshot](https://docs.datastax.com/en/cassandra/2.0/cassandra/operations/ops_backup_restore_c.html) before continuing.
55 | Finally, experiments can be run using `nose execute` for a manually-defined schema or `nose benchmark` for a schema generated with `nose search`.
56 | 
57 | ## Running multiple experiments
58 | 
59 | As mentioned above, experiments are destructive since updates modify the populated data.
60 | The easiest way to run multiple experiments is to take a snapshot after populating the data but before running the first experiment.
61 | 
62 |     nodetool snapshot rubis -t SNAPSHOT_NAME
63 | 
64 | The script below will restore the snapshot at which point you will be ready to run another experiment.
65 | Be sure to replace `SCHEMA` and `SNAPSHOT_NAME` with the appropriate values.
66 | 
67 | ```bash
68 | # Drop and recreate all tables
69 | for cf in $(cqlsh 10.0.0.2 -k rubis -f <(echo 'DESCRIBE COLUMNFAMILIES') | tr ' ' '\n' | grep -Ev '^$'); do
70 |   cqlsh 10.0.0.2 -k rubis -f <(echo "DROP TABLE $cf;")
71 | done
72 | 
73 | bundle exec nose create SCHEMA
74 | 
75 | # Restore snapshot
76 | for ssdir in $(find /ssd1/mmior/cassandra/data/rubis_big/ -wholename '*/snapshots/SNAPSHOT_NAME' -type d); do
77 |   for file in $(find "$ssdir/" -type f | rev | cut -d/ -f1 | rev); do
78 |     sudo ln "$ssdir/$file" "$ssdir/../../$file"
79 |   done
80 | done
81 | 
82 | # Refresh column families
83 | for cf in $(cqlsh 10.0.0.2 -k rubis -f <(echo 'DESCRIBE COLUMNFAMILIES') | tr ' ' '\n' | grep -Ev '^$'); do
84 |   nodetool refresh rubis $cf
85 | done
86 | ```
87 | 


--------------------------------------------------------------------------------
/lib/nose/loader/csv.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'formatador'
  4 | require 'smarter_csv'
  5 | require 'zlib'
  6 | 
  7 | module NoSE
  8 |   module Loader
  9 |     # Load data into an index from a set of CSV files
 10 |     class CsvLoader < LoaderBase
 11 |       def initialize(workload = nil, backend = nil)
 12 |         super
 13 | 
 14 |         @logger = Logging.logger['nose::loader::csvloader']
 15 |       end
 16 | 
 17 |       # Load data for all the indexes
 18 |       def load(indexes, config, show_progress = false, limit = nil,
 19 |                skip_existing = true)
 20 |         indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
 21 | 
 22 |         simple_indexes = find_simple_indexes indexes, skip_existing
 23 |         simple_indexes.each do |entity, simple_index_list|
 24 |           filename = File.join config[:directory], "#{entity.name}.csv"
 25 |           total_rows = (limit || 0) - 1 # account for header row
 26 |           File.open(filename) { |file| file.each_line { total_rows += 1 } }
 27 | 
 28 |           progress = initialize_progress entity, simple_index_list,
 29 |                                          total_rows if show_progress
 30 |           load_file_indexes filename, entity, simple_index_list, progress
 31 |         end
 32 |       end
 33 | 
 34 |       private
 35 | 
 36 |       # Find the simple indexes we should populate
 37 |       # @return [Hash<Entity, Index>]
 38 |       def find_simple_indexes(indexes, skip_existing)
 39 |         simple_indexes = indexes.select do |index|
 40 |           index.graph.size == 1 &&
 41 |             !(skip_existing && !@backend.index_empty?(index))
 42 |         end
 43 | 
 44 |         simple_indexes.group_by do |index|
 45 |           index.hash_fields.first.parent
 46 |         end
 47 |       end
 48 | 
 49 |       # Initialize a progress bar to reporting loading results
 50 |       # @return [Formatador::ProgressBar]
 51 |       def initialize_progress(entity, simple_index_list, total_rows)
 52 |         @logger.info "Loading simple indexes for #{entity.name}"
 53 |         @logger.info simple_index_list.map(&:key).join(', ')
 54 | 
 55 |         Formatador.new.redisplay_progressbar 0, total_rows
 56 |         Formatador::ProgressBar.new total_rows, started_at: Time.now.utc
 57 |       end
 58 | 
 59 |       # Load all indexes for a given file
 60 |       # @return [void]
 61 |       def load_file_indexes(filename, entity, simple_index_list, progress)
 62 |         SmarterCSV.process(filename,
 63 |                            downcase_header: false,
 64 |                            chunk_size: 1000,
 65 |                            convert_values_to_numeric: false) do |chunk|
 66 |           Parallel.each(chunk.each_slice(100),
 67 |                         finish: (lambda do |_, _, _|
 68 |                           next if progress.nil?
 69 |                           inc = [progress.total - progress.current, 100].min
 70 |                           progress.increment inc
 71 |                         end)) do |minichunk|
 72 |             load_simple_chunk minichunk, entity, simple_index_list
 73 |           end
 74 |         end
 75 |       end
 76 | 
 77 |       # Load a chunk of data from a simple entity index
 78 |       # @return [void]
 79 |       def load_simple_chunk(chunk, entity, indexes)
 80 |         # Prefix all hash keys with the entity name and convert values
 81 |         chunk.map! do |row|
 82 |           index_row = {}
 83 |           row.each_key do |key|
 84 |             field_class = entity[key.to_s].class
 85 |             value = field_class.value_from_string row[key]
 86 |             index_row["#{entity.name}_#{key}"] = value
 87 |           end
 88 | 
 89 |           index_row
 90 |         end
 91 | 
 92 |         # Insert the batch into the index
 93 |         indexes.each do |index|
 94 |           @backend.index_insert_chunk index, chunk
 95 |         end
 96 |       end
 97 |     end
 98 |   end
 99 | end
100 | 


--------------------------------------------------------------------------------
/lib/nose/model/entity.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # A representation of an object in the conceptual data model
  5 |   class Entity
  6 |     attr_reader :fields
  7 |     attr_reader :foreign_keys, :name
  8 |     attr_accessor :count
  9 | 
 10 |     def initialize(name, &block)
 11 |       @name = name
 12 |       @fields = {}
 13 |       @foreign_keys = {}
 14 |       @count = 1
 15 | 
 16 |       # Precompute the hash
 17 |       hash
 18 | 
 19 |       # Apply the DSL
 20 |       EntityDSL.new(self).instance_eval(&block) if block_given?
 21 |     end
 22 | 
 23 |     # :nocov:
 24 |     # @return [String]
 25 |     def to_color
 26 |       "[light_blue]#{@name}[/] [#{fields.each_key.map(&:to_color).join ', '}]"
 27 |     end
 28 |     # :nocov:
 29 | 
 30 |     # Compare by name
 31 |     # @return [Boolean]
 32 |     def ==(other)
 33 |       @name == other.instance_variable_get(:@name)
 34 |     end
 35 |     alias eql? ==
 36 | 
 37 |     # The hash is based on the name of the entity and its fields
 38 |     # @return [Integer]
 39 |     def hash
 40 |       @hash ||= @name.hash
 41 |     end
 42 | 
 43 |     # Get the key fields for the entity
 44 |     # @return [Fields::IDField>]
 45 |     def id_field
 46 |       fields.each_value.find(&:primary_key?)
 47 |     end
 48 | 
 49 |     # Adds a {Fields::Field} to the entity
 50 |     # @return [self] the current entity to allow chaining
 51 |     def <<(field, freeze: true)
 52 |       if field.is_a? Fields::ForeignKeyField
 53 |         @foreign_keys[field.name] = field
 54 |       else
 55 |         @fields[field.name] = field
 56 |       end
 57 | 
 58 |       field.instance_variable_set(:@parent, self)
 59 |       field.hash
 60 |       field.freeze if freeze
 61 | 
 62 |       self
 63 |     end
 64 | 
 65 |     # Shortcut for {#count=}
 66 |     # @return [Entity]
 67 |     def *(other)
 68 |       fail TypeError, 'count must be an integer' unless other.is_a? Integer
 69 |       @count = other
 70 | 
 71 |       self
 72 |     end
 73 | 
 74 |     # Get the field on the entity with the given name
 75 |     # @return [Field]
 76 |     def [](field_name)
 77 |       field = @fields[field_name] || @foreign_keys[field_name]
 78 |       fail FieldNotFound if field.nil?
 79 |       field
 80 |     end
 81 | 
 82 |     # Return true if the entity contains a field with the given name
 83 |     def field?(field)
 84 |       @fields.key? field
 85 |     end
 86 | 
 87 |     # Generate a hash with random values for fields in the entity
 88 |     # @return [Hash]
 89 |     def random_entity(prefix_entity = true)
 90 |       Hash[@fields.map do |name, field|
 91 |         key = name
 92 |         key = "#{@name}_#{name}" if prefix_entity
 93 |         [key, field.random_value]
 94 |       end]
 95 |     end
 96 |   end
 97 | 
 98 |   # A helper class for DSL creation to avoid messing with {Entity}
 99 |   class EntityDSL
100 |     def initialize(entity)
101 |       @entity = entity
102 |     end
103 | 
104 |     # rubocop:disable MethodName
105 | 
106 |     # Specify a list of field names for the primary key
107 |     def PrimaryKey(*names)
108 |       # Unset the old keys and set new ones,
109 |       # we dup because the fields are frozen
110 |       @entity.fields.each_value do |field|
111 |         next unless field.primary_key?
112 |         field = field.dup
113 |         field.primary_key = false
114 |         @entity.fields[field.name] = field
115 |         field.freeze
116 |       end
117 | 
118 |       names.each do |name|
119 |         field = @entity[name].dup
120 |         field.primary_key = true
121 |         @entity.fields[name] = field
122 |         field.freeze
123 |       end
124 |     end
125 | 
126 |     # rubocop:enable MethodName
127 | 
128 |     def etc(size = 1)
129 |       @entity << Fields::HashField.new('**', size)
130 |     end
131 |   end
132 | 
133 |   # Raised when looking up a field on an entity which does not exist
134 |   class FieldNotFound < StandardError
135 |   end
136 | end
137 | 


--------------------------------------------------------------------------------
/spec/support/backend.rb:
--------------------------------------------------------------------------------
  1 | module NoSE
  2 |   RSpec.shared_examples 'backend processing' do |tag|
  3 |     let(:plans) { Plans::ExecutionPlans.load 'ebay' }
  4 | 
  5 |     # Insert a new entity for testing purposes
  6 |     def direct_insert(index_key, values)
  7 |       backend.indexes_ddl(true, true, true).to_a
  8 | 
  9 |       index = plans.schema.indexes[index_key]
 10 |       index = index.to_id_graph if backend.by_id_graph
 11 |       inserted_ids = backend.index_insert_chunk index, [values]
 12 |       inserted_ids.first
 13 |     end
 14 | 
 15 |     # Get a record from a particular index
 16 |     # @return [Hash]
 17 |     def direct_query(index_key)
 18 |       index = plans.schema.indexes[index_key]
 19 |       index = index.to_id_graph if backend.by_id_graph
 20 | 
 21 |       backend.index_sample(index, 1).first
 22 |     end
 23 | 
 24 |     # Execute an insert statement against the backend
 25 |     # @return [void]
 26 |     def insert(group, values)
 27 |       modify group, values, {}
 28 |     end
 29 | 
 30 |     # Execute an update statement against the backend
 31 |     # @return [void]
 32 |     def update(group, settings, conditions)
 33 |       modify group, settings, conditions
 34 |     end
 35 | 
 36 |     # Execute a modification statement with
 37 |     # the given settings and conditions
 38 |     # @return [void]
 39 |     def modify(group, settings, conditions)
 40 |       backend.indexes_ddl(true, true, true).to_a
 41 | 
 42 |       update_plans = plans.groups[group]
 43 | 
 44 |       update_plans.each do |plan|
 45 |         # Decide which fields should be set
 46 |         plan_settings = settings.map do |field_id, value|
 47 |           field = plan.index.all_fields.find { |f| f.id == field_id }
 48 |           FieldSetting.new field, value
 49 |         end
 50 | 
 51 |         # Generate any missing IDs
 52 |         (plan.index.hash_fields + plan.index.order_fields).each do |field|
 53 |           setting = plan_settings.find { |s| s.field == field }
 54 |           next unless setting.nil?
 55 | 
 56 |           plan_settings << FieldSetting.new(field, backend.generate_id) \
 57 |             if field.is_a? Fields::IDField
 58 |         end
 59 | 
 60 |         # Build the list of conditions
 61 |         plan_conditions = Hash[conditions.map do |field_id, value|
 62 |           field = plan.index.all_fields.find { |f| f.id == field_id }
 63 |           [field_id, Condition.new(field, :'=', value)]
 64 |         end]
 65 | 
 66 |         prepared = backend.prepare_update nil, [plan]
 67 |         prepared.each { |p| p.execute plan_settings, plan_conditions }
 68 |       end
 69 |     end
 70 | 
 71 |     # Execute a query against the backend and return the results
 72 |     # @return [Hash]
 73 |     def query(group, values)
 74 |       plan = plans.groups[group].first
 75 |       prepared = backend.prepare_query nil, plan.select_fields, plan.params,
 76 |                                        [plan.steps]
 77 | 
 78 |       prepared.execute Hash[values.map do |k, v|
 79 |         condition = plan.params[k]
 80 |         condition.instance_variable_set :@value, v
 81 |         [k, condition]
 82 |       end]
 83 |     end
 84 | 
 85 |     it 'can query for inserted entities', tag do
 86 |       id = direct_insert 'items_by_id', 'items_Title' => 'Foo',
 87 |                                         'items_Desc'  => 'A thing'
 88 |       id = id.first if id.is_a? Array
 89 | 
 90 |       result = query 'GetItem', 'items_ItemID' => id
 91 |       expect(result).to have(1).item
 92 |       expect(result.first['items_Title']).to eq('Foo')
 93 |     end
 94 | 
 95 |     it 'can insert new entities', tag do
 96 |       insert 'AddItem', 'items_Title' => 'Foo', 'items_Desc' => 'A thing'
 97 | 
 98 |       result = direct_query 'items_by_id'
 99 |       expect(result).to include 'items_Title' => 'Foo'
100 |     end
101 | 
102 |     it 'can update entities', tag do
103 |       id = direct_insert 'items_by_id', 'items_Title' => 'Foo',
104 |                                         'items_Desc'  => 'A thing'
105 |       id = id.first if id.is_a? Array
106 | 
107 |       update 'UpdateItemTitle',
108 |              { 'items_Title' => 'Bar' },
109 |              'items_ItemID' => id
110 | 
111 |       result = direct_query 'items_by_id'
112 |       expect(result).to include 'items_Title' => 'Bar'
113 |     end
114 |   end
115 | end
116 | 


--------------------------------------------------------------------------------
/spec/enumerator_spec.rb:
--------------------------------------------------------------------------------
  1 | module NoSE
  2 |   describe IndexEnumerator do
  3 |     include_context 'entities'
  4 | 
  5 |     subject(:enum) { IndexEnumerator.new workload }
  6 | 
  7 |     it 'produces a simple index for a filter' do
  8 |       query = Statement.parse 'SELECT User.Username FROM User ' \
  9 |                               'WHERE User.City = ?', workload.model
 10 |       indexes = enum.indexes_for_query query
 11 | 
 12 |       expect(indexes.to_a).to include \
 13 |         Index.new [user['City']], [user['UserId']], [user['Username']],
 14 |                   QueryGraph::Graph.from_path([user.id_field])
 15 |     end
 16 | 
 17 |     it 'produces a simple index for a foreign key join' do
 18 |       query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \
 19 |                               'WHERE User.City = ?', workload.model
 20 |       indexes = enum.indexes_for_query query
 21 | 
 22 |       expect(indexes).to include \
 23 |         Index.new [user['City']], [user['UserId'], tweet['TweetId']],
 24 |                   [tweet['Body']],
 25 |                   QueryGraph::Graph.from_path([user.id_field,
 26 |                                                user['Tweets']])
 27 |     end
 28 | 
 29 |     it 'produces an index for intermediate query steps' do
 30 |       query = Statement.parse 'SELECT Link.URL FROM Link.Tweets.User ' \
 31 |                               'WHERE User.Username = ?', workload.model
 32 |       indexes = enum.indexes_for_query query
 33 |       expect(indexes).to include \
 34 |         Index.new [user['UserId']], [tweet['TweetId']], [],
 35 |                   QueryGraph::Graph.from_path([tweet.id_field,
 36 |                                                tweet['User']])
 37 |     end
 38 | 
 39 |     it 'produces a simple index for a filter within a workload' do
 40 |       query = Statement.parse 'SELECT User.Username FROM User ' \
 41 |                               'WHERE User.City = ?', workload.model
 42 |       workload.add_statement query
 43 |       indexes = enum.indexes_for_workload
 44 | 
 45 |       expect(indexes.to_a).to include \
 46 |         Index.new [user['City']], [user['UserId']], [user['Username']],
 47 |                   QueryGraph::Graph.from_path([user.id_field])
 48 |     end
 49 | 
 50 |     it 'does not produce empty indexes' do
 51 |       query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \
 52 |                               'WHERE User.City = ?', workload.model
 53 |       workload.add_statement query
 54 |       indexes = enum.indexes_for_workload
 55 |       expect(indexes).to all(satisfy do |index|
 56 |         !index.order_fields.empty? || !index.extra.empty?
 57 |       end)
 58 |     end
 59 | 
 60 |     it 'includes no indexes for updates if nothing is updated' do
 61 |       # Use a fresh workload for this test
 62 |       model = workload.model
 63 |       workload = Workload.new model
 64 |       enum = IndexEnumerator.new workload
 65 |       update = Statement.parse 'UPDATE User SET Username = ? ' \
 66 |                                'WHERE User.City = ?', model
 67 |       workload.add_statement update
 68 |       indexes = enum.indexes_for_workload
 69 | 
 70 |       expect(indexes).to be_empty
 71 |     end
 72 | 
 73 |     it 'includes indexes enumerated from queries generated from updates' do
 74 |       # Use a fresh workload for this test
 75 |       model = workload.model
 76 |       workload = Workload.new model
 77 |       enum = IndexEnumerator.new workload
 78 | 
 79 |       update = Statement.parse 'UPDATE User SET Username = ? ' \
 80 |                                'WHERE User.City = ?', model
 81 |       workload.add_statement update
 82 | 
 83 |       query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \
 84 |                               'WHERE User.Username = ?', workload.model
 85 |       workload.add_statement query
 86 | 
 87 |       indexes = enum.indexes_for_workload
 88 | 
 89 |       expect(indexes.to_a).to include \
 90 |         Index.new [user['City']], [user['UserId']], [],
 91 |                   QueryGraph::Graph.from_path([user.id_field])
 92 | 
 93 |       expect(indexes.to_a).to include \
 94 |         Index.new [user['UserId']], [tweet['TweetId']],
 95 |                   [tweet['Body']],
 96 |                   QueryGraph::Graph.from_path([user.id_field,
 97 |                                                user['Tweets']])
 98 |     end
 99 |   end
100 | end
101 | 


--------------------------------------------------------------------------------
/lib/nose/plans.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # Statement planning and abstract models of execution steps
  5 |   module Plans
  6 |     # A single step in a statement plan
  7 |     class PlanStep
  8 |       include Supertype
  9 | 
 10 |       attr_accessor :state, :parent
 11 |       attr_reader :children, :cost, :fields
 12 | 
 13 |       def initialize
 14 |         @children = Set.new
 15 |         @parent = nil
 16 |         @fields = Set.new
 17 |       end
 18 | 
 19 |       # :nocov:
 20 |       def to_color
 21 |         # Split on capital letters and remove the last two parts (PlanStep)
 22 |         self.class.name.split('::').last.split(/(?=[A-Z])/)[0..-3] \
 23 |           .map(&:downcase).join(' ').capitalize
 24 |       end
 25 |       # :nocov:
 26 | 
 27 |       # Set the children of the current plan step
 28 |       # @return [void]
 29 |       def children=(children)
 30 |         @children = children.to_set
 31 | 
 32 |         # Track the parent step of each step
 33 |         children.each do |child|
 34 |           child.instance_variable_set(:@parent, self)
 35 |           fields = child.instance_variable_get(:@fields) + self.fields
 36 |           child.instance_variable_set(:@fields, fields)
 37 |         end
 38 |       end
 39 | 
 40 |       # Mark the fields in this index as fetched
 41 |       # @return [void]
 42 |       def add_fields_from_index(index)
 43 |         @fields += index.all_fields
 44 |       end
 45 | 
 46 |       # Get the list of steps which led us here
 47 |       # If a cost model is not provided, statement plans using
 48 |       # this step cannot be evaluated on the basis of cost
 49 |       #
 50 |       # (this is to support PlanStep#parent_index which does not need cost)
 51 |       # @return [QueryPlan]
 52 |       def parent_steps(cost_model = nil)
 53 |         steps = nil
 54 | 
 55 |         if @parent.nil?
 56 |           steps = QueryPlan.new state.query, cost_model
 57 |         else
 58 |           steps = @parent.parent_steps cost_model
 59 |           steps << self
 60 |         end
 61 | 
 62 |         steps
 63 |       end
 64 | 
 65 |       # Find the closest index to this step
 66 |       # @return [PlanStep]
 67 |       def parent_index
 68 |         step = parent_steps.to_a.reverse_each.find do |parent_step|
 69 |           parent_step.is_a? IndexLookupPlanStep
 70 |         end
 71 |         step.index unless step.nil?
 72 |       end
 73 | 
 74 |       # Calculate the cost of executing this step in the plan
 75 |       # @return [Integer]
 76 |       def calculate_cost(cost_model)
 77 |         @cost = cost_model.method((subtype_name + '_cost').to_sym).call self
 78 |       end
 79 | 
 80 |       # Add the Subtype module to all step classes
 81 |       # @return [void]
 82 |       def self.inherited(child_class)
 83 |         child_class.send(:include, Subtype)
 84 |       end
 85 |     end
 86 | 
 87 |     # A dummy step used to inspect failed statement plans
 88 |     class PrunedPlanStep < PlanStep
 89 |       def state
 90 |         OpenStruct.new answered?: true
 91 |       end
 92 |     end
 93 | 
 94 |     # The root of a tree of statement plans used as a placeholder
 95 |     class RootPlanStep < PlanStep
 96 |       def initialize(state)
 97 |         super()
 98 |         @state = state
 99 |         @cost = 0
100 |       end
101 |     end
102 | 
103 |     # This superclass defines what is necessary for manually defined
104 |     # and automatically generated plans to provide for execution
105 |     class AbstractPlan
106 |       attr_reader :group, :name, :weight
107 | 
108 |       # @abstract Subclasses should produce the steps for executing this query
109 |       def steps
110 |         fail NotImplementedError
111 |       end
112 | 
113 |       # @abstract Subclasses should produce the fields selected by this plan
114 |       def select_fields
115 |         []
116 |       end
117 | 
118 |       # @abstract Subclasses should produce the parameters
119 |       # necessary for this plan
120 |       def params
121 |         fail NotImplementedError
122 |       end
123 |     end
124 |   end
125 | end
126 | 
127 | require_relative 'plans/filter'
128 | require_relative 'plans/index_lookup'
129 | require_relative 'plans/limit'
130 | require_relative 'plans/sort'
131 | require_relative 'plans/update'
132 | 
133 | require_relative 'plans/query_planner'
134 | require_relative 'plans/update_planner'
135 | require_relative 'plans/execution_plan'
136 | 


--------------------------------------------------------------------------------
/spec/loader/mysql_loader_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'nose/loader/mysql'
  2 | 
  3 | module NoSE
  4 |   module Loader
  5 |     describe MysqlLoader do
  6 |       # Mock the client of a loader to return canned responses to SQL queries
  7 |       def mock_loader(responses, count)
  8 |         loader = MysqlLoader.new
  9 | 
 10 |         allow(loader).to receive(:new_client) do
 11 |           client = double('client')
 12 |           expect(client).to receive(:query) do |query|
 13 |             responses.each_pair.find { |k, _| k == query }.last
 14 |           end.exactly(count).times
 15 | 
 16 |           client
 17 |         end
 18 | 
 19 |         loader
 20 |       end
 21 | 
 22 |       it 'can generate a workload from a database' do
 23 |         # Simple Array subclass so we can use .each(as: :array)
 24 |         class EachArray < Array
 25 |           def each(*_args, **_options)
 26 |             super()
 27 |           end
 28 |         end
 29 | 
 30 |         loader = mock_loader(
 31 |           {
 32 |             'SHOW TABLES' => EachArray.new([['Foo']]),
 33 |             'SELECT COUNT(*) FROM Foo' => [{ 'COUNT()*)' => 10 }],
 34 |             'DESCRIBE Foo' => EachArray.new(
 35 |               [
 36 |                 ['FooId', 'int(10) unsigned', 'NO', 'PRI', 'NULL', ''],
 37 |                 ['Bar', 'int(10) unsigned', 'NO', '', 'NULL', ''],
 38 |                 ['Baz', 'float', 'NO', '', 'NULL', ''],
 39 |                 ['Quux', 'datetime', 'NO', '', 'NULL', ''],
 40 |                 ['Corge', 'text', 'NO', '', 'NULL', ''],
 41 |                 ['Garply', 'varchar(10)', 'NO', '', 'NULL', '']
 42 |               ]
 43 |             )
 44 |           }, 3
 45 |         )
 46 | 
 47 |         workload = loader.workload({})
 48 |         expect(workload.model.entities).to have(1).item
 49 | 
 50 |         entity = workload.model.entities.values.first
 51 |         expect(entity.name).to eq 'Foo'
 52 |         expect(entity.fields).to have(6).items
 53 | 
 54 |         expect(entity.fields.values[0]).to be_a Fields::IDField
 55 |         expect(entity.fields.values[1]).to be_a Fields::IntegerField
 56 |         expect(entity.fields.values[2]).to be_a Fields::FloatField
 57 |         expect(entity.fields.values[3]).to be_a Fields::DateField
 58 |         expect(entity.fields.values[4]).to be_a Fields::StringField
 59 |         expect(entity.fields.values[5]).to be_a Fields::StringField
 60 |       end
 61 | 
 62 |       context 'when loading into a backend', mysql: true do
 63 |         let(:workload) { Workload.load 'rubis' }
 64 |         let(:backend) do
 65 |           dummy = double('backend')
 66 |           allow(dummy).to receive(:by_id_graph).and_return(false)
 67 |           allow(dummy).to receive(:index_empty?).and_return(true)
 68 | 
 69 |           dummy
 70 |         end
 71 | 
 72 |         let(:config) do
 73 |           {
 74 |             host: '127.0.0.1',
 75 |             username: 'root',
 76 |             database: 'nose'
 77 |           }
 78 |         end
 79 | 
 80 |         let(:loader) do
 81 |           MysqlLoader.new workload, backend
 82 |         end
 83 | 
 84 |         it 'can load a simple ID index', mysql: true do
 85 |           user = workload.model['users']
 86 |           index = Index.new [user['id']], [], [user['nickname']],
 87 |                             QueryGraph::Graph.from_path([user['id']])
 88 |           expect(backend).to receive(:index_insert_chunk).with(
 89 |             index, [
 90 |               {
 91 |                 'users_id' => 2,
 92 |                 'users_nickname' => '08ec962a-fc56-40a3-9e07-1fca0520253c'
 93 |               }
 94 |             ]
 95 |           )
 96 |           loader.load([index], config, false, 1)
 97 |         end
 98 | 
 99 |         it 'can load an index across multiple entities', mysql: true do
100 |           user = workload.model['users']
101 |           item = workload.model['items']
102 |           index = Index.new [user['id']], [item['id']], [item['name']],
103 |                             QueryGraph::Graph.from_path(
104 |                               [user['id'], user['items_sold']]
105 |                             )
106 |           expect(backend).to receive(:index_insert_chunk).with(
107 |             index, [
108 |               {
109 |                 'users_id' => 1,
110 |                 'items_id' => 45,
111 |                 'items_name' => 'repellat alias consequatur'
112 |               }
113 |             ]
114 |           )
115 |           loader.load([index], config, false, 1)
116 |         end
117 |       end
118 |     end
119 |   end
120 | end
121 | 


--------------------------------------------------------------------------------
/spec/search_spec.rb:
--------------------------------------------------------------------------------
 1 | module NoSE
 2 |   module Search
 3 |     describe Search do
 4 |       include_context 'dummy cost model'
 5 |       include_context 'entities'
 6 | 
 7 |       it 'raises an exception if there is no space', solver: true do
 8 |         workload.add_statement 'SELECT Tweet.Body FROM Tweet ' \
 9 |                                'WHERE Tweet.TweetId = ?'
10 |         indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a
11 |         search = Search.new(workload, cost_model)
12 |         expect do
13 |           search.search_overlap(indexes, 1)
14 |         end.to raise_error(NoSolutionException)
15 |       end
16 | 
17 |       it 'produces a materialized view with sufficient space', solver: true do
18 |         query = Statement.parse 'SELECT User.UserId FROM User WHERE ' \
19 |                                 'User.City = ? ORDER BY User.Username',
20 |                                 workload.model
21 |         workload.add_statement query
22 | 
23 |         indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a
24 |         result = Search.new(workload, cost_model).search_overlap indexes
25 |         indexes = result.indexes
26 |         expect(indexes).to include query.materialize_view
27 |       end
28 | 
29 |       it 'can perform multiple lookups on a path segment', solver: true do
30 |         query = Statement.parse 'SELECT User.Username FROM User ' \
31 |                                 'WHERE User.City = ?', workload.model
32 |         workload.add_statement query
33 | 
34 |         indexes = [
35 |           Index.new([user['City']], [user['UserId']], [],
36 |                     QueryGraph::Graph.from_path([user.id_field])),
37 |           Index.new([user['UserId']], [], [user['Username']],
38 |                     QueryGraph::Graph.from_path([user.id_field]))
39 |         ]
40 |         search = Search.new(workload, cost_model)
41 |         expect do
42 |           search.search_overlap(indexes, indexes.first.size).to_set
43 |         end.to raise_error NoSolutionException
44 |       end
45 | 
46 |       it 'does not denormalize heavily updated data', solver: true do
47 |         workload.add_statement 'UPDATE User SET Username = ? ' \
48 |                                'WHERE User.UserId = ?', 0.98
49 |         workload.add_statement 'SELECT User.Username FROM User ' \
50 |                                'WHERE User.City = ?', 0.01
51 |         workload.add_statement 'SELECT User.Username FROM User ' \
52 |                                'WHERE User.Country = ?', 0.01
53 | 
54 |         # Enumerate the indexes and select those actually used
55 |         indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a
56 |         cost_model = Cost::EntityCountCost.new
57 |         result = Search.new(workload, cost_model).search_overlap indexes
58 |         indexes = result.indexes
59 | 
60 |         # Get the indexes actually used by the generated plans
61 |         planner = Plans::QueryPlanner.new workload, indexes, cost_model
62 |         plans = workload.queries.map { |query| planner.min_plan query }
63 |         indexes = plans.flat_map(&:indexes).to_set
64 | 
65 |         expect(indexes).to match_array [
66 |           Index.new([user['Country']], [user['UserId']], [],
67 |                     QueryGraph::Graph.from_path([user.id_field])),
68 |           Index.new([user['City']], [user['UserId']], [],
69 |                     QueryGraph::Graph.from_path([user.id_field])),
70 |           Index.new([user['UserId']], [], [user['Username']],
71 |                     QueryGraph::Graph.from_path([user.id_field]))
72 |         ]
73 |       end
74 | 
75 |       it 'increases the total cost when an update is added' do
76 |         query = Statement.parse 'SELECT User.UserId FROM User WHERE ' \
77 |                                 'User.City = ? ORDER BY User.Username', workload.model
78 | 
79 |         workload.add_statement query
80 |         indexes = IndexEnumerator.new(workload).indexes_for_workload.to_a
81 |         result = Search.new(workload, cost_model).search_overlap indexes
82 | 
83 |         workload.add_statement 'UPDATE User SET Username = ? ' \
84 |                                'WHERE User.UserId = ?', 0.98
85 | 
86 |         indexes_with_update = IndexEnumerator.new(workload).indexes_for_workload.to_a
87 |         result_with_update = Search.new(workload, cost_model).search_overlap indexes_with_update
88 | 
89 |         # total cost should be increased due to additional update statement
90 |         expect(result.total_cost).to be < result_with_update.total_cost
91 |       end
92 |     end
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/spec/util_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'stringio'
  2 | 
  3 | describe Enumerable do
  4 |   it 'can generate all possible prefixes' do
  5 |     expect([1, 2, 3].prefixes).to match_array [[1], [1, 2], [1, 2, 3]]
  6 |   end
  7 | 
  8 |   it 'generates an empty prefix list when there are no elements' do
  9 |     expect([].prefixes).to match_array []
 10 |   end
 11 | 
 12 |   it 'can generate all partitionings of itself' do
 13 |     expect([1, 2, 3, 4].partitions).to match_array [
 14 |       [[1, 2, 3, 4], []],
 15 |       [[1], [2, 3, 4]],
 16 |       [[1, 2], [3, 4]],
 17 |       [[1, 2, 3], [4]]]
 18 |   end
 19 | 
 20 |   it 'can compute the product based on a block' do
 21 |     expect([-1, 1].sum_by(&:abs)).to eq(2)
 22 |   end
 23 | 
 24 |   it 'can compute the product based on a block' do
 25 |     expect([-1, 1].product_by(&:abs)).to eq(1)
 26 |   end
 27 | end
 28 | 
 29 | describe Integer do
 30 |   it 'is finite' do
 31 |     expect(3.finite?).to be true
 32 |   end
 33 | end
 34 | 
 35 | describe Object do
 36 |   context 'for objects with a to_color method' do
 37 |     subject(:obj) do
 38 |       class Foo
 39 |         def to_color
 40 |           '[red]foo[/]'
 41 |         end
 42 |       end
 43 | 
 44 |       Foo.new
 45 |     end
 46 | 
 47 |     it 'should inspect colored output when stdout is a terminal' do
 48 |       old_stdout = STDOUT
 49 |       Object.instance_eval { remove_const 'STDOUT' }
 50 |       STDOUT = double('stdout', tty?: true, write: nil)
 51 | 
 52 |       expect(obj.inspect).to eq "\e[31mfoo\e[0m"
 53 | 
 54 |       Object.instance_eval { remove_const 'STDOUT' }
 55 |       STDOUT = old_stdout
 56 |     end
 57 | 
 58 |     it 'should inspect uncolored output when stdout is not a terminal' do
 59 |       old_stdout = STDOUT
 60 |       Object.instance_eval { remove_const 'STDOUT' }
 61 |       STDOUT = StringIO.new
 62 | 
 63 |       Object.instance_eval { remove_const 'STDOUT' }
 64 |       STDOUT = old_stdout
 65 |     end
 66 |   end
 67 | 
 68 |   context 'for objects without a to_color method' do
 69 |     subject(:obj) do
 70 |       class Bar
 71 |         def to_s
 72 |           'foo'
 73 |         end
 74 |       end
 75 | 
 76 |       Bar.new
 77 |     end
 78 | 
 79 |     it 'should use uncolored output' do
 80 |       expect(obj.to_s).to eq 'foo'
 81 |       expect(obj.to_color).to eq 'foo'
 82 |     end
 83 |   end
 84 | 
 85 |   describe 'Subtype' do
 86 |     subject(:obj) do
 87 |       class Foo
 88 |       end
 89 | 
 90 |       class BarBazFoo < Foo
 91 |         include Subtype
 92 |       end
 93 | 
 94 |       BarBazFoo.new
 95 |     end
 96 | 
 97 |     it 'can produce its name in snake case' do
 98 |       expect(obj.subtype_name).to eq 'bar_baz'
 99 |     end
100 | 
101 |     it 'can produce its name in camel case' do
102 |       expect(obj.subtype_name(name_case: :camel)).to eq 'BarBaz'
103 |     end
104 |   end
105 | 
106 |   describe 'Supertype' do
107 |     subject(:cls) do
108 |       class Foo
109 |         include Supertype
110 |       end
111 | 
112 |       class BarBazFoo < Foo
113 |       end
114 | 
115 |       Foo
116 |     end
117 | 
118 |     it 'can produce a subclass from a name in snake case' do
119 |       subclass = cls.subtype_class 'bar_baz'
120 |       expect(subclass).to be_a Class
121 |       expect(subclass.name).to eq 'BarBazFoo'
122 |     end
123 | 
124 |     it 'can produce a subclass from a name in camel case' do
125 |       subclass = cls.subtype_class 'BarBaz'
126 |       expect(subclass).to be_a Class
127 |       expect(subclass.name).to eq 'BarBazFoo'
128 |     end
129 |   end
130 | end
131 | 
132 | describe Cardinality do
133 |   include_context 'entities'
134 | 
135 |   it 'estimates one for a simple ID lookup' do
136 |     cardinality = Cardinality.filter tweet.count, [tweet['TweetId']], nil
137 | 
138 |     expect(cardinality).to eq(1)
139 |   end
140 | 
141 |   it 'correctly estimates based on field cardinality for equality' do
142 |     cardinality = Cardinality.filter user.count, [user['City']], nil
143 | 
144 |     expect(cardinality).to eq(2)
145 |   end
146 | 
147 |   it 'uses a static estimate for range filters' do
148 |     cardinality = Cardinality.filter tweet.count, [tweet['Body']],
149 |                                      tweet['Timestamp']
150 | 
151 |     expect(cardinality).to eq(20)
152 |   end
153 | end
154 | 
155 | describe Listing do
156 |   let(:superclass) do
157 |     class Super
158 |       include Listing
159 |     end
160 |   end
161 | 
162 |   let(:subclass) do
163 |     class Sub < Super
164 |     end
165 | 
166 |     Sub
167 |   end
168 | 
169 |   it 'allows tracking of subclasses' do
170 |     expect(superclass.subclasses).to eq({"Sub" => subclass})
171 |   end
172 | end
173 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to NoSE
  2 | 
  3 | This is adapted from the [Node.js contributor guidelines](https://github.com/nodejs/node/blob/master/CONTRIBUTING.md).
  4 | 
  5 | ## Issue Contributions
  6 | 
  7 | When reporting an issue with NoSE, please provide as much context as possible.
  8 | At minimum, try running the tests and post the output of any failed tests.
  9 | Also be sure to provide the version of the Ruby interpreter you are using.
 10 | 
 11 | ## Code Contributions
 12 | 
 13 | This document will guide you through the contribution process.
 14 | 
 15 | ### Step 1: Fork
 16 | 
 17 | Fork the project [on GitHub](https://github.com/michaelmior/NoSE) and check out your
 18 | copy locally.
 19 | 
 20 | ```text
 21 | $ git clone git@github.com:username/NoSE.git
 22 | $ cd NoSE
 23 | $ git remote add upstream git://github.com/michaelmior/NoSE.git
 24 | ```
 25 | 
 26 | #### Which branch?
 27 | 
 28 | For developing new features and bug fixes, the `master` branch should be pulled
 29 | and built upon.
 30 | 
 31 | ### Step 2: Branch
 32 | 
 33 | Create a feature branch and start hacking:
 34 | 
 35 | ```text
 36 | $ git checkout -b my-feature-branch -t origin/master
 37 | ```
 38 | 
 39 | ### Step 3: Commit
 40 | 
 41 | Make sure git knows your name and email address:
 42 | 
 43 | ```text
 44 | $ git config --global user.name "J. Random User"
 45 | $ git config --global user.email "j.random.user@example.com"
 46 | ```
 47 | 
 48 | Writing good commit logs is important. A commit log should describe what
 49 | changed and why. Follow these guidelines when writing one:
 50 | 
 51 | 1. The first line should be 50 characters or less and contain a short
 52 |    description of the change.
 53 | 2. Keep the second line blank.
 54 | 3. Wrap all other lines at 72 columns.
 55 | 
 56 | A good commit log can look something like this:
 57 | 
 58 | ```
 59 | explaining the commit in one line
 60 | 
 61 | Body of commit message is a few lines of text, explaining things
 62 | in more detail, possibly giving some background about the issue
 63 | being fixed, etc. etc.
 64 | 
 65 | The body of the commit message can be several paragraphs, and
 66 | please do proper word-wrap and keep columns shorter than about
 67 | 72 characters or so. That way `git log` will show things
 68 | nicely even when it is indented.
 69 | ```
 70 | 
 71 | The header line should be meaningful; it is what other people see when they
 72 | run `git shortlog` or `git log --oneline`.
 73 | 
 74 | ### Step 4: Rebase
 75 | 
 76 | Use `git rebase` (not `git merge`) to sync your work from time to time.
 77 | 
 78 | ```text
 79 | $ git fetch upstream
 80 | $ git rebase upstream/master
 81 | ```
 82 | 
 83 | ### Step 5: Test
 84 | 
 85 | Bug fixes and features **should come with tests**. Add your tests in the
 86 | `spec` directory. Look at other tests to see how they should be
 87 | structured.
 88 | 
 89 | ```text
 90 | $ bundle exec rspec
 91 | ```
 92 | 
 93 | Make sure that all tests pass. Please, do not submit
 94 | patches with failing tests.
 95 | 
 96 | If you are updating tests and just want to run a single test to check it, you
 97 | can use this syntax:
 98 | 
 99 | ```text
100 | $ bundle exec rspec spec/backend_spec.rb
101 | ```
102 | 
103 | ### Step 6: Push
104 | 
105 | ```text
106 | $ git push origin my-feature-branch
107 | ```
108 | 
109 | Go to `https://github.com/yourusername/NoSE` and select your feature branch.
110 | Click the 'Pull Request' button and fill out the form.
111 | 
112 | Pull requests are usually reviewed within a few days. If there are comments
113 | to address, apply your changes in a separate commit and push that to your
114 | feature branch. Post a comment in the pull request afterwards; GitHub does
115 | not send out notifications when you add commits.
116 | 
117 | 
118 | ## Developer's Certificate of Origin 1.1
119 | 
120 | By making a contribution to this project, I certify that:
121 | 
122 | * (a) The contribution was created in whole or in part by me and I
123 |   have the right to submit it under the open source license
124 |   indicated in the file; or
125 | 
126 | * (b) The contribution is based upon previous work that, to the best
127 |   of my knowledge, is covered under an appropriate open source
128 |   license and I have the right under that license to submit that
129 |   work with modifications, whether created in whole or in part
130 |   by me, under the same open source license (unless I am
131 |   permitted to submit under a different license), as indicated
132 |   in the file; or
133 | 
134 | * (c) The contribution was provided directly to me by some other
135 |   person who certified (a), (b) or (c) and I have not modified
136 |   it.
137 | 
138 | * (d) I understand and agree that this project and the contribution
139 |   are public and that a record of the contribution (including all
140 |   personal information I submit with it, including my sign-off) is
141 |   maintained indefinitely and may be redistributed consistent with
142 |   this project or the open source license(s) involved.
143 | 


--------------------------------------------------------------------------------
/spec/model_spec.rb:
--------------------------------------------------------------------------------
  1 | module NoSE
  2 |   describe Entity do
  3 |     subject(:entity) { Entity.new('Foo') }
  4 | 
  5 |     it 'can store fields' do
  6 |       entity << Fields::IntegerField.new('Bar')
  7 |       entity << Fields::IntegerField.new('Baz')
  8 | 
  9 |       expect(entity.fields.keys).to match_array %w(Bar Baz)
 10 |     end
 11 | 
 12 |     it 'can have foreign keys' do
 13 |       other = entity * 100
 14 |       field = Fields::ForeignKeyField.new('other', other)
 15 |       entity << field
 16 | 
 17 |       expect(field.entity).to be(other)
 18 |       expect(field.class.subtype_name).to eq('foreign_key')
 19 |       expect(field.relationship).to eq(:one)
 20 |       expect(field.cardinality).to eq(100)
 21 |     end
 22 | 
 23 |     it 'can tell fields when they are added' do
 24 |       field = Fields::IntegerField.new('Bar')
 25 | 
 26 |       expect(field.parent).to be_nil
 27 | 
 28 |       entity << field
 29 | 
 30 |       expect(field.parent).to be(entity)
 31 |     end
 32 | 
 33 |     it 'can create entities using a DSL' do
 34 |       entity = Entity.new 'Foo' do
 35 |         ID 'Bar'
 36 |         Integer 'Baz'
 37 |         String 'Quux', 20
 38 |         etc
 39 |       end
 40 | 
 41 |       expect(entity.fields).to have(4).items
 42 |       expect(entity.fields['Quux'].size).to eq 20
 43 |       expect(entity['**'].class).to be Fields::HashField
 44 |     end
 45 | 
 46 |     it 'raises an exception for nonexistent fields' do
 47 |       expect { entity['Bar'] }.to raise_error FieldNotFound
 48 |     end
 49 | 
 50 |     it 'can generate random entities' do
 51 |       entity << Fields::IntegerField.new('Bar')
 52 |       expect(entity.random_entity).to be_a Hash
 53 |       expect(entity.random_entity.keys).to match_array ['Foo_Bar']
 54 |     end
 55 |   end
 56 | 
 57 |   describe Fields::Field do
 58 |     subject(:field) do
 59 |       Fields::IDField.new 'Bar'
 60 |     end
 61 | 
 62 |     it 'has an ID based on the entity and name' do
 63 |       Entity.new('Foo') << field
 64 |       expect(field.id).to eq 'Foo_Bar'
 65 |     end
 66 | 
 67 |     it 'can have its cardinality updated by multiplication' do
 68 |       expect((field * 5).cardinality).to eq 5
 69 |     end
 70 |   end
 71 | 
 72 |   describe Fields::IntegerField do
 73 |     it 'can convert string literals' do
 74 |       expect(Fields::IntegerField.value_from_string '42').to eq 42
 75 |     end
 76 | 
 77 |     it 'can produce random integers' do
 78 |       field = Fields::IntegerField.new 'Foo', count: 10
 79 |       expect(field.random_value).to be_a Integer
 80 |       expect(field.random_value).to be_between(0, field.cardinality)
 81 |     end
 82 |   end
 83 | 
 84 |   describe Fields::FloatField do
 85 |     it 'can convert string literals' do
 86 |       expect(Fields::FloatField.value_from_string '3.14159').to eq 3.14159
 87 |     end
 88 | 
 89 |     it 'can produce random floats' do
 90 |       field = Fields::FloatField.new 'Foo', count: 10
 91 |       expect(field.random_value).to be_a Float
 92 |       expect(field.random_value).to be_between(0, field.cardinality)
 93 |     end
 94 |   end
 95 | 
 96 |   describe Fields::StringField do
 97 |     it 'can convert string literals' do
 98 |       expect(Fields::StringField.value_from_string 'pudding').to eq 'pudding'
 99 |     end
100 | 
101 |     it 'can produce random strings' do
102 |       field = Fields::StringField.new 'Foo', 10
103 |       expect(field.random_value).to be_a String
104 |       expect(field.random_value).to have(10).characters
105 |     end
106 |   end
107 | 
108 |   describe Fields::DateField do
109 |     it 'can convert string literals' do
110 |       date = Fields::DateField.value_from_string '2001-02-03T04:05:06+07:00'
111 |       expect(date).to eq DateTime.new(2001, 2, 3, 4, 5, 6, '+7').to_time
112 |     end
113 | 
114 |     it 'can produce random dates' do
115 |       field = Fields::DateField.new 'Foo'
116 |       expect(field.random_value).to be_a Time
117 |     end
118 |   end
119 | 
120 |   describe Fields::BooleanField do
121 |     it 'can convert boolean strings' do
122 |       expect(Fields::BooleanField.value_from_string 'false').to be_falsey
123 |     end
124 | 
125 |     it 'can convert integers in strings' do
126 |       expect(Fields::BooleanField.value_from_string '1').to be_truthy
127 |     end
128 | 
129 |     it 'can produce random booleans' do
130 |       field = Fields::BooleanField.new 'Foo'
131 |       expect(field.random_value).to satisfy { |v| [true, false].include? v }
132 |     end
133 |   end
134 | 
135 |   describe Model do
136 |     let(:model) do
137 |       Model.new do
138 |         Entity 'Foo' do
139 |           ID 'FooID'
140 |         end
141 | 
142 |         Entity 'Bar' do
143 |           ID 'BarID'
144 |         end
145 | 
146 |         HasOne 'foo',    'bars',
147 |                {'Bar'  => 'Foo'}
148 |       end
149 |     end
150 | 
151 |     it 'can create a to-one relationship' do
152 |       bar_key = model.entities['Bar']['foo']
153 |       expect(bar_key.relationship).to eq(:one)
154 |     end
155 | 
156 |     it 'can create a to-many relationship' do
157 |       foo_key = model.entities['Foo']['bars']
158 |       expect(foo_key.relationship).to eq(:many)
159 |     end
160 |   end
161 | end
162 | 


--------------------------------------------------------------------------------
/lib/nose/statements/insert.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # A representation of an insert in the workload
  5 |   class Insert < Statement
  6 |     include StatementConditions
  7 |     include StatementSettings
  8 |     include StatementSupportQuery
  9 | 
 10 |     def initialize(params, text, group: nil, label: nil)
 11 |       super params, text, group: group, label: label
 12 | 
 13 |       @settings = params[:settings]
 14 |       fail InvalidStatementException, 'Must insert primary key' \
 15 |         unless @settings.map(&:field).include?(entity.id_field)
 16 | 
 17 |       populate_conditions params
 18 |     end
 19 | 
 20 |     # Build a new insert from a provided parse tree
 21 |     # @return [Insert]
 22 |     def self.parse(tree, params, text, group: nil, label: nil)
 23 |       settings_from_tree tree, params
 24 |       conditions_from_tree tree, params
 25 | 
 26 |       Insert.new params, text, group: group, label: label
 27 |     end
 28 | 
 29 |     # Extract conditions from a parse tree
 30 |     # @return [Hash]
 31 |     def self.conditions_from_tree(tree, params)
 32 |       connections = tree[:connections] || []
 33 |       connections = connections.map do |connection|
 34 |         field = params[:entity][connection[:target].to_s]
 35 |         value = connection[:target_pk]
 36 | 
 37 |         type = field.class.const_get 'TYPE'
 38 |         value = field.class.value_from_string(value.to_s) \
 39 |           unless type.nil? || value.nil?
 40 | 
 41 |         connection.delete :value
 42 |         Condition.new field, :'=', value
 43 |       end
 44 | 
 45 |       params[:conditions] = Hash[connections.map do |connection|
 46 |         [connection.field.id, connection]
 47 |       end]
 48 |     end
 49 |     private_class_method :conditions_from_tree
 50 | 
 51 |     # Produce the SQL text corresponding to this insert
 52 |     # @return [String]
 53 |     def unparse
 54 |       insert = "INSERT INTO #{entity.name} "
 55 |       insert += settings_clause
 56 | 
 57 |       insert << ' AND CONNECT TO ' << @conditions.values.map do |condition|
 58 |         value = maybe_quote condition.value, condition.field
 59 |         "#{condition.field.name}(#{value})"
 60 |       end.join(', ') unless @conditions.empty?
 61 | 
 62 |       insert
 63 |     end
 64 | 
 65 |     def ==(other)
 66 |       other.is_a?(Insert) &&
 67 |         @graph == other.graph &&
 68 |         entity == other.entity &&
 69 |         @settings == other.settings &&
 70 |         @conditions == other.conditions
 71 |     end
 72 |     alias eql? ==
 73 | 
 74 |     def hash
 75 |       @hash ||= [@graph, entity, @settings, @conditions].hash
 76 |     end
 77 | 
 78 |     # Determine if this insert modifies an index
 79 |     def modifies_index?(index)
 80 |       return true if modifies_single_entity_index?(index)
 81 |       return false if index.graph.size == 1
 82 |       return false unless index.graph.entities.include? entity
 83 | 
 84 |       # Check if the index crosses all of the connection keys
 85 |       keys = @conditions.each_value.map(&:field)
 86 |       index.graph.keys_from_entity(entity).all? { |k| keys.include? k }
 87 |     end
 88 | 
 89 |     # Specifies that inserts require insertion
 90 |     def requires_insert?(_index)
 91 |       true
 92 |     end
 93 | 
 94 |     # Support queries are required for index insertion with connection
 95 |     # to select attributes of the other related entities
 96 |     # @return [Array<SupportQuery>]
 97 |     def support_queries(index)
 98 |       return [] unless modifies_index?(index) &&
 99 |                        !modifies_single_entity_index?(index)
100 | 
101 |       # Get all fields which need to be selected by support queries
102 |       select = index.all_fields -
103 |                @settings.map(&:field).to_set -
104 |                @conditions.each_value.map do |condition|
105 |                  condition.field.entity.id_field
106 |                end.to_set
107 |       return [] if select.empty?
108 | 
109 |       index.graph.split(entity).map do |graph|
110 |         support_fields = select.select do |field|
111 |           graph.entities.include? field.parent
112 |         end.to_set
113 | 
114 |         # Build conditions by traversing the foreign keys
115 |         conditions = @conditions.each_value.map do |c|
116 |           next unless graph.entities.include? c.field.entity
117 | 
118 |           Condition.new c.field.entity.id_field, c.operator, c.value
119 |         end.compact
120 |         conditions = Hash[conditions.map do |condition|
121 |           [condition.field.id, condition]
122 |         end]
123 | 
124 |         split_entity = split_entity graph, index.graph, entity
125 |         build_support_query split_entity, index, graph, support_fields,
126 |                             conditions
127 |       end.compact
128 |     end
129 | 
130 |     # The settings fields are provided with the insertion
131 |     def given_fields
132 |       @settings.map(&:field) + @conditions.each_value.map do |condition|
133 |         condition.field.entity.id_field
134 |       end
135 |     end
136 | 
137 |     private
138 | 
139 |     # Check if the insert modifies a single entity index
140 |     # @return [Boolean]
141 |     def modifies_single_entity_index?(index)
142 |       !(@settings.map(&:field).to_set & index.all_fields).empty? &&
143 |         index.graph.size == 1 && index.graph.entities.first == entity
144 |     end
145 |   end
146 | end
147 | 


--------------------------------------------------------------------------------
/lib/nose/statements/query.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # A representation of a query in the workload
  5 |   class Query < Statement
  6 |     include StatementConditions
  7 | 
  8 |     attr_reader :select, :order, :limit
  9 | 
 10 |     def initialize(params, text, group: nil, label: nil)
 11 |       super params, text, group: group, label: label
 12 | 
 13 |       populate_conditions params
 14 |       @select = params[:select]
 15 |       @order = params[:order] || []
 16 | 
 17 |       fail InvalidStatementException, 'can\'t order by IDs' \
 18 |         if @order.any? { |f| f.is_a? Fields::IDField }
 19 | 
 20 |       if join_order.first != @key_path.entities.first
 21 |         @key_path = @key_path.reverse
 22 |       end
 23 | 
 24 |       fail InvalidStatementException, 'must have an equality predicate' \
 25 |         if @conditions.empty? || @conditions.values.all?(&:is_range)
 26 | 
 27 |       @limit = params[:limit]
 28 |     end
 29 | 
 30 |     # Build a new query from a provided parse tree
 31 |     # @return [Query]
 32 |     def self.parse(tree, params, text, group: nil, label: nil)
 33 |       conditions_from_tree tree, params
 34 |       fields_from_tree tree, params
 35 |       order_from_tree tree, params
 36 |       params[:limit] = tree[:limit].to_i if tree[:limit]
 37 | 
 38 |       new params, text, group: group, label: label
 39 |     end
 40 | 
 41 |     # Produce the SQL text corresponding to this query
 42 |     # @return [String]
 43 |     def unparse
 44 |       field_namer = -> (f) { field_path f }
 45 | 
 46 |       query = 'SELECT ' + @select.map(&field_namer).join(', ')
 47 |       query << " FROM #{from_path @graph.longest_path}"
 48 |       query << where_clause(field_namer)
 49 | 
 50 |       query << ' ORDER BY ' << @order.map(&field_namer).join(', ') \
 51 |         unless @order.empty?
 52 |       query << " LIMIT #{@limit}" unless @limit.nil?
 53 |       query << " -- #{@comment}" unless @comment.nil?
 54 | 
 55 |       query
 56 |     end
 57 | 
 58 |     def ==(other)
 59 |       other.is_a?(Query) &&
 60 |         @graph == other.graph &&
 61 |         @select == other.select &&
 62 |         @conditions == other.conditions &&
 63 |         @order == other.order &&
 64 |         @limit == other.limit &&
 65 |         @comment == other.comment
 66 |     end
 67 |     alias eql? ==
 68 | 
 69 |     def hash
 70 |       @hash ||= [@graph, @select, @conditions, @order, @limit, @comment].hash
 71 |     end
 72 | 
 73 |     # The order entities should be joined according to the query graph
 74 |     # @return [Array<Entity>]
 75 |     def join_order
 76 |       @graph.join_order(@eq_fields)
 77 |     end
 78 | 
 79 |     # Specifies that queries don't modify data
 80 |     def read_only?
 81 |       true
 82 |     end
 83 | 
 84 |     # All fields referenced anywhere in the query
 85 |     # @return [Set<Fields::Field>]
 86 |     def all_fields
 87 |       (@select + @conditions.each_value.map(&:field) + @order).to_set
 88 |     end
 89 | 
 90 |     # Extract fields to be selected from a parse tree
 91 |     # @return [Set<Field>]
 92 |     def self.fields_from_tree(tree, params)
 93 |       params[:select] = tree[:select].flat_map do |field|
 94 |         if field.last == '*'
 95 |           # Find the entity along the path
 96 |           entity = params[:key_path].entities[tree[:path].index(field.first)]
 97 |           entity.fields.values
 98 |         else
 99 |           field = add_field_with_prefix tree[:path], field, params
100 | 
101 |           fail InvalidStatementException, 'Foreign keys cannot be selected' \
102 |             if field.is_a? Fields::ForeignKeyField
103 | 
104 |           field
105 |         end
106 |       end.to_set
107 |     end
108 |     private_class_method :fields_from_tree
109 | 
110 |     # Extract ordering fields from a parse tree
111 |     # @return [Array<Field>]
112 |     def self.order_from_tree(tree, params)
113 |       return params[:order] = [] if tree[:order].nil?
114 | 
115 |       params[:order] = tree[:order][:fields].each_slice(2).map do |field|
116 |         field = field.first if field.first.is_a?(Array)
117 |         add_field_with_prefix tree[:path], field, params
118 |       end
119 |     end
120 |     private_class_method :order_from_tree
121 | 
122 |     private
123 | 
124 |     def field_path(field)
125 |       path = @graph.path_between @graph.longest_path.entities.first,
126 |                                  field.parent
127 |       path = path.drop_while { |k| @graph.longest_path.include? k } << path[-1]
128 |       path = KeyPath.new(path) unless path.is_a?(KeyPath)
129 | 
130 |       from_path path, @graph.longest_path, field
131 |     end
132 |   end
133 | 
134 |   # A query required to support an update
135 |   class SupportQuery < Query
136 |     attr_reader :statement, :index, :entity
137 | 
138 |     def initialize(entity, params, text, group: nil, label: nil)
139 |       super params, text, group: group, label: label
140 | 
141 |       @entity = entity
142 |     end
143 | 
144 |     # Support queries must also have their statement and index checked
145 |     def ==(other)
146 |       other.is_a?(SupportQuery) && @statement == other.statement &&
147 |         @index == other.index && @comment == other.comment
148 |     end
149 |     alias eql? ==
150 | 
151 |     def hash
152 |       @hash ||= Zlib.crc32_combine super, @index.hash, @index.hash_str.length
153 |     end
154 | 
155 |     # :nocov:
156 |     def to_color
157 |       super.to_color + ' for [magenta]' + @index.key + '[/]'
158 |     end
159 |     # :nocov:
160 |   end
161 | end
162 | 


--------------------------------------------------------------------------------
/lib/nose/statements/connection.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   # Superclass for connect and disconnect statements
  5 |   class Connection < Statement
  6 |     include StatementSupportQuery
  7 | 
  8 |     attr_reader :source_pk, :target, :target_pk, :conditions
  9 |     alias source entity
 10 | 
 11 |     def initialize(params, text, group: nil, label: nil)
 12 |       super params, text, group: group, label: label
 13 |       fail InvalidStatementException, 'Incorrect connection initialization' \
 14 |         unless text.split.first == self.class.name.split('::').last.upcase
 15 | 
 16 |       populate_conditions params
 17 |     end
 18 | 
 19 |     # Build a new disconnect from a provided parse tree
 20 |     # @return [Connection]
 21 |     def self.parse(tree, params, text, group: nil, label: nil)
 22 |       keys_from_tree tree, params
 23 | 
 24 |       new params, text, group: group, label: label
 25 |     end
 26 | 
 27 |     # @return[void]
 28 |     def self.keys_from_tree(tree, params)
 29 |       params[:source_pk] = tree[:source_pk]
 30 |       params[:target] = params[:entity].foreign_keys[tree[:target].to_s]
 31 |       params[:target_pk] = tree[:target_pk]
 32 |     end
 33 | 
 34 |     # Produce the SQL text corresponding to this connection
 35 |     # @return [String]
 36 |     def unparse
 37 |       "CONNECT #{source.name}(\"#{source_pk}\") TO " \
 38 |         "#{target.name}(\"#{target_pk}\")"
 39 |     end
 40 | 
 41 |     def ==(other)
 42 |       self.class == other.class &&
 43 |         @graph == other.graph &&
 44 |         @source == other.source &&
 45 |         @target == other.target &&
 46 |         @conditions == other.conditions
 47 |     end
 48 |     alias eql? ==
 49 | 
 50 |     def hash
 51 |       @hash ||= [@graph, @source, @target, @conditions].hash
 52 |     end
 53 | 
 54 |     # A connection modifies an index if the relationship is in the path
 55 |     def modifies_index?(index)
 56 |       index.path.include?(@target) || index.path.include?(@target.reverse)
 57 |     end
 58 | 
 59 |     # Get the support queries for updating an index
 60 |     def support_queries(index)
 61 |       return [] unless modifies_index?(index)
 62 | 
 63 |       select = index.all_fields - @conditions.each_value.map(&:field).to_set
 64 |       return [] if select.empty?
 65 | 
 66 |       index.graph.split(entity).map do |graph|
 67 |         support_fields = select.select do |field|
 68 |           graph.entities.include? field.parent
 69 |         end.to_set
 70 |         conditions = @conditions.select do |_, c|
 71 |           graph.entities.include? c.field.parent
 72 |         end
 73 | 
 74 |         split_entity = split_entity graph, index.graph, entity
 75 |         build_support_query split_entity, index, graph, support_fields,
 76 |                             conditions
 77 |       end.compact
 78 |     end
 79 | 
 80 |     protected
 81 | 
 82 |     # The two key fields are provided with the connection
 83 |     def given_fields
 84 |       [@target.parent.id_field, @target.entity.id_field]
 85 |     end
 86 | 
 87 |     private
 88 | 
 89 |     # Validate the types of the primary keys
 90 |     # @return [void]
 91 |     def validate_keys
 92 |       # XXX Only works for non-composite PKs
 93 |       source_type = source.id_field.class.const_get 'TYPE'
 94 |       fail TypeError unless source_type.nil? || source_pk.is_a?(type)
 95 | 
 96 |       target_type = @target.class.const_get 'TYPE'
 97 |       fail TypeError unless target_type.nil? || target_pk.is_a?(type)
 98 |     end
 99 | 
100 |     # Populate the list of condition objects
101 |     # @return [void]
102 |     def populate_conditions(params)
103 |       @source_pk = params[:source_pk]
104 |       @target = params[:target]
105 |       @target_pk = params[:target_pk]
106 | 
107 |       validate_keys
108 | 
109 |       # This is needed later when planning updates
110 |       @eq_fields = [@target.parent.id_field,
111 |                     @target.entity.id_field]
112 | 
113 |       source_id = source.id_field
114 |       target_id = @target.entity.id_field
115 |       @conditions = {
116 |         source_id.id => Condition.new(source_id, :'=', @source_pk),
117 |         target_id.id => Condition.new(target_id, :'=', @target_pk)
118 |       }
119 |     end
120 | 
121 |     # Get the where clause for a support query over the given path
122 |     # @return [String]
123 |     def support_query_condition_for_path(path, reversed)
124 |       key = (reversed ? target.entity : target.parent).id_field
125 |       path = path.reverse if path.entities.last != key.entity
126 |       eq_key = path.entries[-1]
127 |       if eq_key.is_a? Fields::ForeignKeyField
128 |         where = "WHERE #{eq_key.name}.#{eq_key.entity.id_field.name} = ?"
129 |       else
130 |         where = "WHERE #{eq_key.parent.name}." \
131 |                 "#{eq_key.parent.id_field.name} = ?"
132 |       end
133 | 
134 |       where
135 |     end
136 |   end
137 | 
138 |   # A representation of a connect in the workload
139 |   class Connect < Connection
140 |     # Specifies that connections require insertion
141 |     def requires_insert?(_index)
142 |       true
143 |     end
144 |   end
145 | 
146 |   # A representation of a disconnect in the workload
147 |   class Disconnect < Connection
148 |     # Produce the SQL text corresponding to this disconnection
149 |     # @return [String]
150 |     def unparse
151 |       "DISCONNECT #{source.name}(\"#{source_pk}\") FROM " \
152 |         "#{target.name}(\"#{target_pk}\")"
153 |     end
154 | 
155 |     # Specifies that disconnections require deletion
156 |     def requires_delete?(_index)
157 |       true
158 |     end
159 |   end
160 | end
161 | 


--------------------------------------------------------------------------------
/lib/nose/search/constraints.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   module Search
  5 |     # Base class for constraints
  6 |     class Constraint
  7 |       # If this is not overridden, apply query-specific constraints
  8 |       # @return [void]
  9 |       def self.apply(problem)
 10 |         problem.queries.each_with_index do |query, q|
 11 |           apply_query query, q, problem
 12 |         end
 13 |       end
 14 | 
 15 |       # To be implemented in subclasses for query-specific constraints
 16 |       # @return [void]
 17 |       def self.apply_query(*_args)
 18 |       end
 19 |     end
 20 | 
 21 |     # Constraints which force indexes to be present to be used
 22 |     class IndexPresenceConstraints < Constraint
 23 |       # Add constraint for indices being present
 24 |       def self.apply(problem)
 25 |         problem.indexes.each do |index|
 26 |           problem.queries.each_with_index do |query, q|
 27 |             name = "q#{q}_#{index.key}_avail" if ENV['NOSE_LOG'] == 'debug'
 28 |             constr = MIPPeR::Constraint.new problem.query_vars[index][query] +
 29 |                                             problem.index_vars[index] * -1,
 30 |                                             :<=, 0, name
 31 |             problem.model << constr
 32 |           end
 33 |         end
 34 |       end
 35 |     end
 36 | 
 37 |     # The single constraint used to enforce a maximum storage cost
 38 |     class SpaceConstraint < Constraint
 39 |       # Add space constraint if needed
 40 |       def self.apply(problem)
 41 |         return unless problem.data[:max_space].finite?
 42 | 
 43 |         fail 'Space constraint not supported when grouping by ID graph' \
 44 |           if problem.data[:by_id_graph]
 45 | 
 46 |         space = problem.total_size
 47 |         constr = MIPPeR::Constraint.new space, :<=,
 48 |                                         problem.data[:max_space] * 1.0,
 49 |                                         'max_space'
 50 |         problem.model << constr
 51 |       end
 52 |     end
 53 | 
 54 |     # Constraints that force each query to have an available plan
 55 |     class CompletePlanConstraints < Constraint
 56 |       # Add the discovered constraints to the problem
 57 |       def self.add_query_constraints(query, q, constraints, problem)
 58 |         constraints.each do |entities, constraint|
 59 |           name = "q#{q}_#{entities.map(&:name).join '_'}" \
 60 |               if ENV['NOSE_LOG'] == 'debug'
 61 | 
 62 |           # If this is a support query, then we might not need a plan
 63 |           if query.is_a? SupportQuery
 64 |             # Find the index associated with the support query and make
 65 |             # the requirement of a plan conditional on this index
 66 |             index_var = if problem.data[:by_id_graph]
 67 |                           problem.index_vars[query.index.to_id_graph]
 68 |                         else
 69 |                           problem.index_vars[query.index]
 70 |                         end
 71 |             next if index_var.nil?
 72 | 
 73 |             constr = MIPPeR::Constraint.new constraint + index_var * -1.0,
 74 |                                             :==, 0, name
 75 |           else
 76 |             constr = MIPPeR::Constraint.new constraint, :==, 1, name
 77 |           end
 78 | 
 79 |           problem.model << constr
 80 |         end
 81 |       end
 82 | 
 83 |       # Add complete query plan constraints
 84 |       def self.apply_query(query, q, problem)
 85 |         entities = query.join_order
 86 |         query_constraints = Hash[entities.each_cons(2).map do |e, next_e|
 87 |           [[e, next_e], MIPPeR::LinExpr.new]
 88 |         end]
 89 | 
 90 |         # Add the sentinel entities at the end and beginning
 91 |         last = Entity.new '__LAST__'
 92 |         query_constraints[[entities.last, last]] = MIPPeR::LinExpr.new
 93 |         first = Entity.new '__FIRST__'
 94 |         query_constraints[[entities.first, first]] = MIPPeR::LinExpr.new
 95 | 
 96 |         problem.data[:costs][query].each do |index, (steps, _)|
 97 |           # All indexes should advance a step if possible unless
 98 |           # this is either the last step from IDs to entity
 99 |           # data or the first step going from data to IDs
100 |           index_step = steps.first
101 |           fail if entities.length > 1 && index.graph.size == 1 && \
102 |                   !(steps.last.state.answered? ||
103 |                     index_step.parent.is_a?(Plans::RootPlanStep))
104 | 
105 |           # Join each step in the query graph
106 |           index_var = problem.query_vars[index][query]
107 |           index_entities = index.graph.entities.sort_by do |entity|
108 |             entities.index entity
109 |           end
110 |           index_entities.each_cons(2) do |entity, next_entity|
111 |             # Make sure the constraints go in the correct direction
112 |             if query_constraints.key?([entity, next_entity])
113 |               query_constraints[[entity, next_entity]] += index_var
114 |             else
115 |               query_constraints[[next_entity, entity]] += index_var
116 |             end
117 |           end
118 | 
119 |           # If this query has been answered, add the jump to the last step
120 |           query_constraints[[entities.last, last]] += index_var \
121 |             if steps.last.state.answered?
122 | 
123 |           # If this index is the first step, add this index to the beginning
124 |           query_constraints[[entities.first, first]] += index_var \
125 |             if index_step.parent.is_a?(Plans::RootPlanStep)
126 | 
127 |           # Ensure the previous index is available
128 |           parent_index = index_step.parent.parent_index
129 |           next if parent_index.nil?
130 | 
131 |           parent_var = problem.query_vars[parent_index][query]
132 |           name = "q#{q}_#{index.key}_parent" if ENV['NOSE_LOG'] == 'debug'
133 |           constr = MIPPeR::Constraint.new index_var * 1.0 + parent_var * -1.0,
134 |                                           :<=, 0, name
135 |           problem.model << constr
136 |         end
137 | 
138 |         # Ensure we have exactly one index on each component of the query graph
139 |         add_query_constraints query, q, query_constraints, problem
140 |       end
141 |     end
142 |   end
143 | end
144 | 


--------------------------------------------------------------------------------
/spec/backend/cassandra_backend_spec.rb:
--------------------------------------------------------------------------------
  1 | require 'nose/backend/cassandra'
  2 | 
  3 | module NoSE
  4 |   module Backend
  5 |     shared_context 'dummy Cassandra backend' do
  6 |       include_context 'dummy cost model'
  7 |       include_context 'entities'
  8 | 
  9 |       let(:backend) { CassandraBackend.new workload, [index], [], [], {} }
 10 |     end
 11 | 
 12 |     describe CassandraBackend do
 13 |       include_examples 'backend processing', cassandra: true do
 14 |         let(:config) do
 15 |           {
 16 |             name: 'cassandra',
 17 |             hosts: ['127.0.0.1'],
 18 |             port: 9042,
 19 |             keyspace: 'nose'
 20 |           }
 21 |         end
 22 | 
 23 |         let(:backend) do
 24 |           CassandraBackend.new plans.schema.model, plans.schema.indexes.values,
 25 |                                [], [], config
 26 |         end
 27 | 
 28 |         before(:all) do
 29 |           next if RSpec.configuration.exclusion_filter[:cassandra]
 30 |           cluster = Cassandra.cluster hosts: ['127.0.0.1'], port: 9042,
 31 |                                       timeout: nil
 32 | 
 33 |           keyspace_definition = <<-KEYSPACE_CQL
 34 |             CREATE KEYSPACE "nose"
 35 |             WITH replication = {
 36 |               'class': 'SimpleStrategy',
 37 |               'replication_factor': 1
 38 |             }
 39 |           KEYSPACE_CQL
 40 | 
 41 |           session = cluster.connect
 42 | 
 43 |           keyspace = cluster.has_keyspace? 'nose'
 44 |           session.execute 'DROP KEYSPACE "nose"' if keyspace
 45 | 
 46 |           session.execute keyspace_definition
 47 |         end
 48 |       end
 49 | 
 50 |       it 'is a type of backend' do
 51 |         expect(CassandraBackend.subtype_name).to eq 'cassandra'
 52 |       end
 53 |     end
 54 | 
 55 |     describe CassandraBackend do
 56 |       context 'when not connected' do
 57 |         include_context 'dummy Cassandra backend'
 58 | 
 59 |         it 'can generate DDL for a simple index' do
 60 |           expect(backend.indexes_ddl).to match_array [
 61 |             'CREATE COLUMNFAMILY "TweetIndex" ("User_Username" text, ' \
 62 |             '"Tweet_Timestamp" timestamp, "User_UserId" uuid, '\
 63 |             '"Tweet_TweetId" uuid, ' \
 64 |             '"Tweet_Body" text, PRIMARY KEY(("User_Username"), ' \
 65 |             '"Tweet_Timestamp", "User_UserId", "Tweet_TweetId"));'
 66 |           ]
 67 |         end
 68 |       end
 69 |     end
 70 | 
 71 |     describe CassandraBackend::IndexLookupStatementStep do
 72 |       include_context 'dummy Cassandra backend'
 73 | 
 74 |       it 'can lookup data for an index based on a plan' do
 75 |         # Materialize a view for the given query
 76 |         query = Statement.parse 'SELECT Tweet.Body FROM Tweet.User ' \
 77 |                                 'WHERE User.Username = "Bob" ' \
 78 |                                 'ORDER BY Tweet.Timestamp LIMIT 10',
 79 |                                 workload.model
 80 |         index = query.materialize_view
 81 |         planner = Plans::QueryPlanner.new workload.model, [index], cost_model
 82 |         step = planner.min_plan(query).first
 83 | 
 84 |         # Validate the expected CQL query
 85 |         client = double('client')
 86 |         backend_query = 'SELECT "Tweet_Body", "User_Username", ' \
 87 |                         '"Tweet_Timestamp" ' + "FROM \"#{index.key}\" " \
 88 |                         'WHERE "User_Username" = ? ' \
 89 |                         'ORDER BY "Tweet_Timestamp" LIMIT 10'
 90 |         expect(client).to receive(:prepare).with(backend_query) \
 91 |           .and_return(backend_query)
 92 | 
 93 |         # Define a simple array providing empty results
 94 |         results = []
 95 |         def results.last_page?
 96 |           true
 97 |         end
 98 |         expect(client).to receive(:execute) do |query, values|
 99 |           expect(query).to eq backend_query
100 |           expect(values[:arguments][0]).to eq 'Bob'
101 |         end.and_return(results)
102 | 
103 |         step_class = CassandraBackend::IndexLookupStatementStep
104 |         prepared = step_class.new client, query.all_fields, query.conditions,
105 |                                   step, nil, step.parent
106 |         prepared.process query.conditions, nil
107 |       end
108 |     end
109 | 
110 |     describe CassandraBackend::InsertStatementStep do
111 |       include_context 'dummy Cassandra backend'
112 | 
113 |       it 'can insert into an index' do
114 |         client = double('client')
115 |         index = link.simple_index
116 |         values = [{
117 |           'Link_LinkId' => nil,
118 |           'Link_URL' => 'http://www.example.com/'
119 |         }]
120 |         backend_insert = "INSERT INTO #{index.key} (\"Link_LinkId\", " \
121 |                          '"Link_URL") VALUES (?, ?)'
122 |         expect(client).to receive(:prepare).with(backend_insert) \
123 |           .and_return(backend_insert)
124 |         expect(client).to receive(:execute) do |query, values|
125 |           expect(query).to eq backend_insert
126 |           expect(values[:arguments][0]).to be_a Cassandra::Uuid
127 |           expect(values[:arguments][1]).to eq 'http://www.example.com/'
128 |         end
129 | 
130 |         step_class = CassandraBackend::InsertStatementStep
131 |         prepared = step_class.new client, index, [link['LinkId'], link['URL']]
132 |         prepared.process values
133 |       end
134 |     end
135 | 
136 |     describe CassandraBackend::DeleteStatementStep do
137 |       include_context 'dummy Cassandra backend'
138 | 
139 |       it 'can delete from an index' do
140 |         client = double('client')
141 |         index = link.simple_index
142 |         backend_delete = "DELETE FROM #{index.key} WHERE \"Link_LinkId\" = ?"
143 |         expect(client).to receive(:prepare).with(backend_delete) \
144 |           .and_return(backend_delete)
145 |         expect(client).to receive(:execute) do |query, values|
146 |           expect(query).to eq backend_delete
147 |           expect(values[:arguments][0]).to be_a Cassandra::Uuid
148 |         end
149 | 
150 |         step_class = CassandraBackend::DeleteStatementStep
151 |         prepared = step_class.new client, index
152 |         prepared.process [links.first['Link_LinkId']]
153 |       end
154 |     end
155 |   end
156 | end
157 | 


--------------------------------------------------------------------------------
/spec/indexes_spec.rb:
--------------------------------------------------------------------------------
  1 | module NoSE
  2 |   describe Index do
  3 |     include_context 'entities'
  4 | 
  5 |     let(:equality_query) do
  6 |       Statement.parse 'SELECT Tweet.Body FROM Tweet WHERE Tweet.TweetId = ?',
  7 |                       workload.model
  8 |     end
  9 |     let(:combo_query) do
 10 |       Statement.parse 'SELECT Tweet.Body FROM Tweet ' \
 11 |                       'WHERE Tweet.Timestamp > ? ' \
 12 |                       'AND Tweet.TweetId = ?', workload.model
 13 |     end
 14 |     let(:order_query) do
 15 |       Statement.parse 'SELECT Tweet.Body FROM Tweet WHERE Tweet.TweetId = ? ' \
 16 |                       'ORDER BY Tweet.Timestamp', workload.model
 17 |     end
 18 | 
 19 |     before(:each) do
 20 |       workload.add_statement equality_query
 21 |       workload.add_statement combo_query
 22 |       workload.add_statement order_query
 23 |     end
 24 | 
 25 |     it 'can return fields by field ID' do
 26 |       expect(index['Tweet_Body']).to eq(tweet['Body'])
 27 |     end
 28 | 
 29 |     it 'contains fields' do
 30 |       index = Index.new [tweet['TweetId']], [], [tweet['Body']],
 31 |                         QueryGraph::Graph.from_path([tweet.id_field])
 32 |       expect(index.contains_field? tweet['TweetId']).to be true
 33 |     end
 34 | 
 35 |     it 'can store additional fields' do
 36 |       index = Index.new [tweet['TweetId']], [], [tweet['Body']],
 37 |                         QueryGraph::Graph.from_path([tweet.id_field])
 38 |       expect(index.contains_field? tweet['Body']).to be true
 39 |     end
 40 | 
 41 |     it 'can calculate its size' do
 42 |       index = Index.new [tweet['TweetId']], [], [tweet['Body']],
 43 |                         QueryGraph::Graph.from_path([tweet.id_field])
 44 |       entry_size = tweet['TweetId'].size + tweet['Body'].size
 45 |       expect(index.entry_size).to eq(entry_size)
 46 |       expect(index.size).to eq(entry_size * tweet.count)
 47 |     end
 48 | 
 49 |     context 'when materializing views' do
 50 |       it 'supports equality predicates' do
 51 |         index = equality_query.materialize_view
 52 |         expect(index.hash_fields).to eq([tweet['TweetId']].to_set)
 53 |       end
 54 | 
 55 |       it 'support range queries' do
 56 |         index = combo_query.materialize_view
 57 |         expect(index.order_fields).to eq([tweet['Timestamp']])
 58 |       end
 59 | 
 60 |       it 'supports multiple predicates' do
 61 |         index = combo_query.materialize_view
 62 |         expect(index.hash_fields).to eq([tweet['TweetId']].to_set)
 63 |         expect(index.order_fields).to eq([tweet['Timestamp']])
 64 |       end
 65 | 
 66 |       it 'supports order by' do
 67 |         index = order_query.materialize_view
 68 |         expect(index.order_fields).to eq([tweet['Timestamp']])
 69 |       end
 70 | 
 71 |       it 'keeps a static key' do
 72 |         index = combo_query.materialize_view
 73 |         expect(index.key).to eq 'i1632091596'
 74 |       end
 75 | 
 76 |       it 'includes only one entity in the hash fields' do
 77 |         query = Statement.parse 'SELECT Tweet.TweetId FROM Tweet.User ' \
 78 |                                 'WHERE Tweet.Timestamp = ? AND User.City = ?',
 79 |                                 workload.model
 80 |         index = query.materialize_view
 81 |         expect(index.hash_fields.map(&:parent).uniq).to have(1).item
 82 |       end
 83 |     end
 84 | 
 85 |     it 'can tell if it maps identities for a field' do
 86 |       index = Index.new [tweet['TweetId']], [], [tweet['Body']],
 87 |                         QueryGraph::Graph.from_path([tweet.id_field])
 88 |       expect(index.identity?).to be true
 89 |     end
 90 | 
 91 |     it 'can be created to map entity fields by id' do
 92 |       index = tweet.simple_index
 93 |       expect(index.hash_fields).to eq([tweet['TweetId']].to_set)
 94 |       expect(index.order_fields).to eq([])
 95 |       expect(index.extra).to eq([
 96 |         tweet['Body'],
 97 |         tweet['Timestamp'],
 98 |         tweet['Retweets']
 99 |       ].to_set)
100 |       expect(index.key).to eq 'Tweet'
101 |     end
102 | 
103 |     context 'when checking validity' do
104 |       it 'cannot have empty hash fields' do
105 |         expect do
106 |           Index.new [], [], [tweet['TweetId']],
107 |                     QueryGraph::Graph.from_path([tweet.id_field])
108 |         end.to raise_error InvalidIndexException
109 |       end
110 | 
111 |       it 'cannot have hash fields involving multiple entities' do
112 |         expect do
113 |           Index.new [tweet['Body'], user['City']],
114 |                     [tweet.id_field, user.id_field], [],
115 |                     QueryGraph::Graph.from_path([tweet.id_field,
116 |                                                  tweet['User']])
117 |         end.to raise_error InvalidIndexException
118 |       end
119 | 
120 |       it 'must have fields at the start of the path' do
121 |         expect do
122 |           Index.new [tweet['TweetId']], [], [],
123 |                     QueryGraph::Graph.from_path([tweet.id_field,
124 |                                                  tweet['User']])
125 |         end.to raise_error InvalidIndexException
126 |       end
127 | 
128 |       it 'must have fields at the end of the path' do
129 |         expect do
130 |           Index.new [user['City']], [], [],
131 |                     QueryGraph::Graph.from_path([tweet.id_field,
132 |                                                  tweet['User']])
133 |         end.to raise_error InvalidIndexException
134 |       end
135 |     end
136 | 
137 |     context 'when reducing to an ID graph' do
138 |       it 'moves non-ID fields to extra data' do
139 |         index = Index.new [user['City']], [user['UserId']], [],
140 |                           QueryGraph::Graph.from_path([user.id_field])
141 |         id_graph = index.to_id_graph
142 | 
143 |         expect(id_graph.hash_fields).to match_array [user['UserId']]
144 |         expect(id_graph.order_fields).to be_empty
145 |         expect(id_graph.extra).to match_array [user['City']]
146 |       end
147 | 
148 |       it 'does not change indexes which are already ID paths' do
149 |         index = Index.new [user['UserId']], [tweet['TweetId']],
150 |                           [tweet['Body']], QueryGraph::Graph.from_path(
151 |                             [user.id_field, user['Tweets']]
152 |                           )
153 |         id_graph = index.to_id_graph
154 | 
155 |         expect(id_graph).to eq(index)
156 |       end
157 |     end
158 |   end
159 | end
160 | 


--------------------------------------------------------------------------------
/lib/nose/backend/file.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | module NoSE
  4 |   module Backend
  5 |     # Simple backend which persists data to a file
  6 |     class FileBackend < Backend
  7 |       include Subtype
  8 | 
  9 |       def initialize(model, indexes, plans, update_plans, config)
 10 |         super
 11 | 
 12 |         # Try to load data from file or start fresh
 13 |         @index_data = if !config[:file].nil? && File.file?(config[:file])
 14 |                         Marshal.load File.open(config[:file])
 15 |                       else
 16 |                         {}
 17 |                       end
 18 | 
 19 |         # Ensure the data is saved when we exit
 20 |         ObjectSpace.define_finalizer self, self.class.finalize(@index_data,
 21 |                                                                config[:file])
 22 |       end
 23 | 
 24 |       # Save data when the object is destroyed
 25 |       def self.finalize(index_data, file)
 26 |         proc do
 27 |           if !file.nil?
 28 |             Marshal.dump(index_data, File.open(file, 'w'))
 29 |           end
 30 |         end
 31 |       end
 32 | 
 33 |       # Check for an empty array for the data
 34 |       def index_empty?(index)
 35 |         !index_exists?(index) || @index_data[index.key].empty?
 36 |       end
 37 | 
 38 |       # Check if we have prepared space for this index
 39 |       def index_exists?(index)
 40 |         @index_data.key? index.key
 41 |       end
 42 | 
 43 |       # @abstract Subclasses implement to allow inserting
 44 |       def index_insert_chunk(index, chunk)
 45 |         @index_data[index.key].concat chunk
 46 |       end
 47 | 
 48 |       # Generate a simple UUID
 49 |       def generate_id
 50 |         SecureRandom.uuid
 51 |       end
 52 | 
 53 |       # Allocate space for data on the new indexes
 54 |       def indexes_ddl(execute = false, skip_existing = false,
 55 |                       drop_existing = false)
 56 |         @indexes.each do |index|
 57 |           # Do the appropriate behaviour based on the flags passed in
 58 |           if index_exists?(index)
 59 |             next if skip_existing
 60 |             fail unless drop_existing
 61 |           end
 62 | 
 63 |           @index_data[index.key] = []
 64 |         end if execute
 65 | 
 66 |         # We just use the original index definition as DDL
 67 |         @indexes.map(&:inspect)
 68 |       end
 69 | 
 70 |       # Sample a number of values from the given index
 71 |       def index_sample(index, count)
 72 |         data = @index_data[index.key]
 73 |         data.nil? ? [] : data.sample(count)
 74 |       end
 75 | 
 76 |       # We just produce the data here which can be manipulated as needed
 77 |       # @return [Hash]
 78 |       def client
 79 |         @index_data
 80 |       end
 81 | 
 82 |       # Provide some helper functions which allow the matching of rows
 83 |       # based on a set of list of conditions
 84 |       module RowMatcher
 85 |         # Check if a row matches the given condition
 86 |         # @return [Boolean]
 87 |         def row_matches?(row, conditions)
 88 |           row_matches_eq?(row, conditions) &&
 89 |             row_matches_range?(row, conditions)
 90 |         end
 91 | 
 92 |         # Check if a row matches the given condition on equality predicates
 93 |         # @return [Boolean]
 94 |         def row_matches_eq?(row, conditions)
 95 |           @eq_fields.all? do |field|
 96 |             row[field.id] == conditions.find { |c| c.field == field }.value
 97 |           end
 98 |         end
 99 | 
100 |         # Check if a row matches the given condition on the range predicate
101 |         # @return [Boolean]
102 |         def row_matches_range?(row, conditions)
103 |           return true if @range_field.nil?
104 | 
105 |           range_cond = conditions.find { |c| c.field == @range_field }
106 |           row[@range_field.id].send range_cond.operator, range_cond.value
107 |         end
108 |       end
109 | 
110 |       # Look up data on an index in the backend
111 |       class IndexLookupStatementStep < Backend::IndexLookupStatementStep
112 |         include RowMatcher
113 | 
114 |         # Filter all the rows in the specified index to those requested
115 |         def process(conditions, results)
116 |           # Get the set of conditions we need to process
117 |           results = initial_results(conditions) if results.nil?
118 |           condition_list = result_conditions conditions, results
119 | 
120 |           # Loop through all rows to find the matching ones
121 |           rows = @client[@index.key] || []
122 |           selected = condition_list.flat_map do |condition|
123 |             rows.select { |row| row_matches? row, condition }
124 |           end.compact
125 | 
126 |           # Apply the limit and only return selected fields
127 |           field_ids = Set.new @step.fields.map(&:id).to_set
128 |           selected[0..(@step.limit.nil? ? -1 : @step.limit)].map do |row|
129 |             row.select { |k, _| field_ids.include? k }
130 |           end
131 |         end
132 |       end
133 | 
134 |       # Insert data into an index on the backend
135 |       class InsertStatementStep < Backend::InsertStatementStep
136 |         # Add new rows to the index
137 |         def process(results)
138 |           key_ids = (@index.hash_fields + @index.order_fields).map(&:id).to_set
139 | 
140 |           results.each do |row|
141 |             # Pick out primary key fields we can use to match
142 |             conditions = row.select do |field_id|
143 |               key_ids.include? field_id
144 |             end
145 | 
146 |             # If we have all the primary keys, check for a match
147 |             if conditions.length == key_ids.length
148 |               # Try to find a row with this ID and update it
149 |               matching_row = @client[index.key].find do |index_row|
150 |                 index_row.merge(conditions) == index_row
151 |               end
152 | 
153 |               unless matching_row.nil?
154 |                 matching_row.merge! row
155 |                 next
156 |               end
157 |             end
158 | 
159 |             # Populate IDs as needed
160 |             key_ids.each do |key_id|
161 |               row[key_id] = SecureRandom.uuid if row[key_id].nil?
162 |             end
163 | 
164 |             @client[index.key] << row
165 |           end
166 |         end
167 |       end
168 | 
169 |       # Delete data from an index on the backend
170 |       class DeleteStatementStep < Backend::DeleteStatementStep
171 |         include RowMatcher
172 | 
173 |         # Remove rows matching the results from the dataset
174 |         def process(results)
175 |           # Loop over all rows
176 |           @client[index.key].reject! do |row|
177 |             # Check against all results
178 |             results.any? do |result|
179 |               # If all fields match, drop the row
180 |               result.all? do |field, value|
181 |                 row[field] == value
182 |               end
183 |             end
184 |           end
185 |         end
186 |       end
187 |     end
188 |   end
189 | end
190 | 


--------------------------------------------------------------------------------
/lib/nose/loader/mysql.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | # This is optional so other things can run under JRuby,
  4 | # however this loader won't work so we need to use MRI
  5 | begin
  6 |   require 'mysql2'
  7 | rescue LoadError
  8 |   require 'mysql'
  9 | end
 10 | 
 11 | module NoSE
 12 |   module Loader
 13 |     # Load data from a MySQL database into a backend
 14 |     class MysqlLoader < LoaderBase
 15 |       def initialize(workload = nil, backend = nil)
 16 |         @logger = Logging.logger['nose::loader::mysqlloader']
 17 | 
 18 |         @workload = workload
 19 |         @backend = backend
 20 |       end
 21 | 
 22 |       # Load a generated set of indexes with data from MySQL
 23 |       def load(indexes, config, show_progress = false, limit = nil,
 24 |                skip_existing = true)
 25 |         indexes.map!(&:to_id_graph).uniq! if @backend.by_id_graph
 26 | 
 27 |         # XXX Assuming backend is thread-safe
 28 |         Parallel.each(indexes, in_threads: 2) do |index|
 29 |           load_index index, config, show_progress, limit, skip_existing
 30 |         end
 31 |       end
 32 | 
 33 |       # Read all tables in the database and construct a workload object
 34 |       def workload(config)
 35 |         client = new_client config
 36 | 
 37 |         workload = Workload.new
 38 |         results = if @array_options
 39 |                     client.query('SHOW TABLES').each(**@array_options)
 40 |                   else
 41 |                     client.query('SHOW TABLES').each
 42 |                   end
 43 | 
 44 |         results.each do |table, *|
 45 |           # TODO: Handle foreign keys
 46 |           workload << entity_for_table(client, table)
 47 |         end
 48 | 
 49 |         workload
 50 |       end
 51 | 
 52 |       private
 53 | 
 54 |       # Create a new client from the given configuration
 55 |       def new_client(config)
 56 |         if Object.const_defined?(:Mysql2)
 57 |           @query_options = { stream: true, cache_rows: false }
 58 |           @array_options = { as: :array }
 59 |           Mysql2::Client.new host: config[:host],
 60 |                              username: config[:username],
 61 |                              password: config[:password],
 62 |                              database: config[:database]
 63 |         else
 64 |           @query_options = false
 65 |           @array_options = false
 66 |           Mysql.connect config[:host], config[:username], config[:password],
 67 |                         config[:database]
 68 |         end
 69 |       end
 70 | 
 71 |       # Load a single index into the backend
 72 |       # @return [void]
 73 |       def load_index(index, config, show_progress, limit, skip_existing)
 74 |         client = new_client config
 75 | 
 76 |         # Skip this index if it's not empty
 77 |         if skip_existing && !@backend.index_empty?(index)
 78 |           @logger.info "Skipping index #{index.inspect}" if show_progress
 79 |           return
 80 |         end
 81 |         @logger.info index.inspect if show_progress
 82 | 
 83 |         sql, fields = index_sql index, limit
 84 |         results = if @query_options
 85 |                     client.query(sql, **@query_options)
 86 |                   else
 87 |                     client.query(sql).map { |row| hash_from_row row, fields }
 88 |                   end
 89 | 
 90 |         result_chunk = []
 91 |         results.each do |result|
 92 |           result_chunk.push result
 93 |           next if result_chunk.length < 1000
 94 | 
 95 |           @backend.index_insert_chunk index, result_chunk
 96 |           result_chunk = []
 97 |         end
 98 |         @backend.index_insert_chunk index, result_chunk \
 99 |           unless result_chunk.empty?
100 |       end
101 | 
102 |       # Construct a hash from the given row returned by the client
103 |       # @return [Hash]
104 |       def hash_from_row(row, fields)
105 |         row_hash = {}
106 |         fields.each_with_index do |field, i|
107 |           value = field.class.value_from_string row[i]
108 |           row_hash[field.id] = value
109 |         end
110 | 
111 |         row_hash
112 |       end
113 | 
114 |       # Get all the fields selected by this index
115 |       def index_sql_select(index)
116 |         fields = index.hash_fields.to_a + index.order_fields + index.extra.to_a
117 | 
118 |         [fields, fields.map do |field|
119 |           "#{field.parent.name}.#{field.name} AS " \
120 |           "#{field.parent.name}_#{field.name}"
121 |         end]
122 |       end
123 | 
124 |       # Get the list of tables along with the join condition
125 |       # for a query to fetch index data
126 |       # @return [String]
127 |       def index_sql_tables(index)
128 |         # Create JOIN statements
129 |         tables = index.graph.entities.map(&:name).join ' JOIN '
130 |         return tables if index.graph.size == 1
131 | 
132 |         tables << ' WHERE '
133 |         tables << index.path.each_cons(2).map do |_prev_key, key|
134 |           key = key.reverse if key.relationship == :many
135 |           "#{key.parent.name}.#{key.name}=" \
136 |             "#{key.entity.name}.#{key.entity.id_field.name}"
137 |         end.join(' AND ')
138 | 
139 |         tables
140 |       end
141 | 
142 |       # Construct a SQL statement to fetch the data to populate this index
143 |       # @return [String]
144 |       def index_sql(index, limit = nil)
145 |         # Get all the necessary fields
146 |         fields, select = index_sql_select index
147 | 
148 |         # Construct the join condition
149 |         tables = index_sql_tables index
150 | 
151 |         query = "SELECT #{select.join ', '} FROM #{tables}"
152 |         query += " LIMIT #{limit}" unless limit.nil?
153 | 
154 |         @logger.debug query
155 |         [query, fields]
156 |       end
157 | 
158 |       # Generate an entity definition from a given table
159 |       # @return [Entity]
160 |       def entity_for_table(client, table)
161 |         entity = Entity.new table
162 |         count = client.query("SELECT COUNT(*) FROM #{table}").first
163 |         entity.count = count.is_a?(Hash) ? count.values.first : count
164 | 
165 |         describe = if @array_options
166 |                      client.query("DESCRIBE #{table}").each(**@array_options)
167 |                    else
168 |                      client.query("DESCRIBE #{table}").each
169 |                    end
170 | 
171 |         describe.each do |name, type, _, key|
172 |           field_class = key == 'PRI' ? Fields::IDField : field_class(type)
173 |           entity << field_class.new(name)
174 |         end
175 | 
176 |         entity
177 |       end
178 | 
179 |       # Produce the Ruby class used to represent a MySQL type
180 |       # @return [Class]
181 |       def field_class(type)
182 |         case type
183 |         when /datetime/
184 |           Fields::DateField
185 |         when /float/
186 |           Fields::FloatField
187 |         when /text/
188 |           # TODO: Get length
189 |           Fields::StringField
190 |         when /varchar\(([0-9]+)\)/
191 |           # TODO: Use length
192 |           Fields::StringField
193 |         when /(tiny)?int/
194 |           Fields::IntegerField
195 |         end
196 |       end
197 |     end
198 |   end
199 | end
200 | 


--------------------------------------------------------------------------------
/workloads/rubis.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | NoSE::Workload.new do
  4 |   Model 'rubis'
  5 | 
  6 |   # Define queries and their relative weights, weights taken from below
  7 |   # http://rubis.ow2.org/results/SB-BMP/Bidding/JBoss-SB-BMP-Bi-1500/perf.html#run_stat
  8 |   # http://rubis.ow2.org/results/SB-BMP/Browsing/JBoss-SB-BMP-Br-1500/perf.html#run_stat
  9 |   DefaultMix :browsing
 10 | 
 11 |   Group 'BrowseCategories', browsing: 4.44,
 12 |                             bidding: 7.65,
 13 |                             write_medium: 7.65,
 14 |                             write_heavy: 7.65 do
 15 |     Q 'SELECT users.nickname, users.password FROM users WHERE users.id = ? -- 1'
 16 |     # XXX Must have at least one equality predicate
 17 |     Q 'SELECT categories.id, categories.name FROM categories WHERE ' \
 18 |       'categories.dummy = 1 -- 2'
 19 |   end
 20 | 
 21 |   Group 'ViewBidHistory', browsing: 2.38,
 22 |                           bidding: 1.54,
 23 |                           write_medium: 1.54,
 24 |                           write_heavy: 1.54 do
 25 |     Q 'SELECT items.name FROM items WHERE items.id = ? -- 3'
 26 |     Q 'SELECT users.id, users.nickname, bids.id, item.id, bids.qty, ' \
 27 |       'bids.bid, bids.date FROM users.bids.item WHERE item.id = ? ' \
 28 |       'ORDER BY bids.date -- 4'
 29 |   end
 30 | 
 31 |   Group 'ViewItem', browsing: 22.95,
 32 |                     bidding: 14.17,
 33 |                     write_medium: 14.17,
 34 |                     write_heavy: 14.17 do
 35 |     Q 'SELECT items.* FROM items WHERE items.id = ? -- 5'
 36 |     Q 'SELECT bids.* FROM items.bids WHERE items.id = ? -- 6'
 37 |   end
 38 | 
 39 |   Group 'SearchItemsByCategory', browsing: 27.77,
 40 |                                  bidding: 15.94,
 41 |                                  write_medium: 15.94,
 42 |                                  write_heavy: 15.94 do
 43 |     Q 'SELECT items.id, items.name, items.initial_price, items.max_bid, ' \
 44 |       'items.nb_of_bids, items.end_date FROM items.category WHERE ' \
 45 |       'category.id = ? AND items.end_date >= ? LIMIT 25 -- 7'
 46 |   end
 47 | 
 48 |   Group 'ViewUserInfo', browsing: 4.41,
 49 |                         bidding: 2.48,
 50 |                         write_medium: 2.48,
 51 |                         write_heavy: 2.48 do
 52 |     # XXX Not including region name below
 53 |     Q 'SELECT users.* FROM users WHERE users.id = ? -- 8'
 54 |     Q 'SELECT comments.id, comments.rating, comments.date, comments.comment ' \
 55 |       'FROM comments.to_user WHERE to_user.id = ? -- 9'
 56 |   end
 57 | 
 58 |   Group 'RegisterItem', bidding: 0.53,
 59 |                         write_medium: 0.53 * 10,
 60 |                         write_heavy: 0.53 * 100 do
 61 |     Q 'INSERT INTO items SET id=?, name=?, description=?, initial_price=?, ' \
 62 |       'quantity=?, reserve_price=?, buy_now=?, nb_of_bids=0, max_bid=0, ' \
 63 |       'start_date=?, end_date=? AND CONNECT TO category(?), seller(?) -- 10'
 64 |   end
 65 | 
 66 |   Group 'RegisterUser', bidding: 1.07,
 67 |                         write_medium: 1.07 * 10,
 68 |                         write_heavy: 1.07 * 100 do
 69 |     Q 'INSERT INTO users SET id=?, firstname=?, lastname=?, nickname=?, ' \
 70 |       'password=?, email=?, rating=0, balance=0, creation_date=? ' \
 71 |       'AND CONNECT TO region(?) -- 11'
 72 |   end
 73 | 
 74 |   Group 'BuyNow', bidding: 1.16,
 75 |                   write_medium: 1.16,
 76 |                   write_heavy: 1.16 do
 77 |     Q 'SELECT users.nickname FROM users WHERE users.id=? -- 12'
 78 |     Q 'SELECT items.* FROM items WHERE items.id=? -- 13'
 79 |   end
 80 | 
 81 |   Group 'StoreBuyNow', bidding: 1.10,
 82 |                        write_medium: 1.10 * 10,
 83 |                        write_heavy: 1.10 * 100 do
 84 |     Q 'SELECT items.quantity, items.nb_of_bids, items.end_date FROM items ' \
 85 |       'WHERE items.id=? -- 14'
 86 |     Q 'UPDATE items SET quantity=?, nb_of_bids=?, end_date=? WHERE items.id=? -- 15'
 87 |     Q 'INSERT INTO buynow SET id=?, qty=?, date=? ' \
 88 |       'AND CONNECT TO item(?), buyer(?) -- 16'
 89 |   end
 90 | 
 91 |   Group 'PutBid', bidding: 5.40,
 92 |                   write_medium: 5.40,
 93 |                   write_heavy: 5.40 do
 94 |     Q 'SELECT users.nickname, users.password FROM users WHERE users.id=? -- 17'
 95 |     Q 'SELECT items.* FROM items WHERE items.id=? -- 18'
 96 |     Q 'SELECT bids.qty, bids.date FROM bids.item WHERE item.id=? ' \
 97 |       'ORDER BY bids.bid LIMIT 2 -- 19'
 98 |   end
 99 | 
100 |   Group 'StoreBid', bidding: 3.74,
101 |                     write_medium: 3.74 * 10,
102 |                     write_heavy: 3.74 * 100 do
103 |     Q 'INSERT INTO bids SET id=?, qty=?, bid=?, date=? ' \
104 |       'AND CONNECT TO item(?), user(?) -- 20'
105 |     Q 'SELECT items.nb_of_bids, items.max_bid FROM items WHERE items.id=? -- 21'
106 |     Q 'UPDATE items SET nb_of_bids=?, max_bid=? WHERE items.id=? -- 22'
107 |   end
108 | 
109 |   Group 'PutComment', bidding: 0.46,
110 |                       write_medium: 0.46,
111 |                       write_heavy: 0.46 do
112 |     Q 'SELECT users.nickname, users.password FROM users WHERE users.id=? -- 23'
113 |     Q 'SELECT items.* FROM items WHERE items.id=? -- 24'
114 |     Q 'SELECT users.* FROM users WHERE users.id=? -- 25'
115 |   end
116 | 
117 |   Group 'StoreComment', bidding: 0.45,
118 |                         write_medium: 0.45 * 10,
119 |                         write_heavy: 0.45 * 100 do
120 |     Q 'SELECT users.rating FROM users WHERE users.id=? -- 26'
121 |     Q 'UPDATE users SET rating=? WHERE users.id=? -- 27'
122 |     Q 'INSERT INTO comments SET id=?, rating=?, date=?, comment=? ' \
123 |       'AND CONNECT TO to_user(?), from_user(?), item(?) -- 28'
124 |   end
125 | 
126 |   Group 'AboutMe', bidding: 1.71,
127 |                    write_medium: 1.71,
128 |                    write_heavy: 1.71 do
129 |     Q 'SELECT users.* FROM users WHERE users.id=? -- 29'
130 |     Q 'SELECT comments_received.* FROM users.comments_received ' \
131 |       'WHERE users.id = ? -- 30'
132 |     Q 'SELECT from_user.nickname FROM comments.from_user WHERE comments.id = ? -- 31'
133 |     Q 'SELECT bought_now.*, items.* FROM items.bought_now.buyer ' \
134 |       'WHERE buyer.id = ? AND bought_now.date>=? -- 32'
135 |     Q 'SELECT items.* FROM items.seller WHERE seller.id=? AND ' \
136 |       'items.end_date >=? -- 33'
137 |     Q 'SELECT items.* FROM items.bids.user WHERE user.id=? AND ' \
138 |       'items.end_date>=? -- 34'
139 |   end
140 | 
141 |   Group 'SearchItemsByRegion', browsing: 8.26,
142 |                                bidding: 6.34,
143 |                                write_medium: 6.34,
144 |                                write_heavy: 6.34 do
145 |     Q 'SELECT items.id, items.name, items.initial_price, items.max_bid, ' \
146 |       'items.nb_of_bids, items.end_date FROM ' \
147 |       'items.seller WHERE seller.region.id = ? AND items.category.id = ? ' \
148 |       'AND items.end_date >= ? LIMIT 25 -- 35'
149 |   end
150 | 
151 |   Group 'BrowseRegions', browsing: 3.21,
152 |                          bidding: 5.39,
153 |                          write_medium: 5.39,
154 |                          write_heavy: 5.39 do
155 |     # XXX Must have at least one equality predicate
156 |     Q 'SELECT regions.id, regions.name FROM regions ' \
157 |       'WHERE regions.dummy = 1 -- 36'
158 |   end
159 | end
160 | 


--------------------------------------------------------------------------------
/experiments/rubis/rubis-schema.sql:
--------------------------------------------------------------------------------
  1 | -- MySQL dump 10.13  Distrib 5.7.13-6, for debian-linux-gnu (x86_64)
  2 | --
  3 | -- Host: localhost    Database: rubis_big
  4 | -- ------------------------------------------------------
  5 | -- Server version	5.7.13-6
  6 | 
  7 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
  8 | /*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
  9 | /*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
 10 | /*!40101 SET NAMES utf8 */;
 11 | /*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
 12 | /*!40103 SET TIME_ZONE='+00:00' */;
 13 | /*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
 14 | /*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
 15 | /*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
 16 | /*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
 17 | 
 18 | --
 19 | -- Table structure for table `bids`
 20 | --
 21 | 
 22 | DROP TABLE IF EXISTS `bids`;
 23 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 24 | /*!40101 SET character_set_client = utf8 */;
 25 | CREATE TABLE `bids` (
 26 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
 27 |   `user_id` int(10) unsigned NOT NULL,
 28 |   `item_id` int(10) unsigned NOT NULL,
 29 |   `qty` int(10) unsigned NOT NULL,
 30 |   `bid` float unsigned NOT NULL,
 31 |   `max_bid` float unsigned NOT NULL,
 32 |   `date` datetime DEFAULT NULL,
 33 |   PRIMARY KEY (`id`),
 34 |   UNIQUE KEY `id` (`id`)
 35 | ) ENGINE=InnoDB AUTO_INCREMENT=20000001 DEFAULT CHARSET=latin1;
 36 | /*!40101 SET character_set_client = @saved_cs_client */;
 37 | 
 38 | --
 39 | -- Table structure for table `buy_now`
 40 | --
 41 | 
 42 | DROP TABLE IF EXISTS `buy_now`;
 43 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 44 | /*!40101 SET character_set_client = utf8 */;
 45 | CREATE TABLE `buy_now` (
 46 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
 47 |   `buyer_id` int(10) unsigned NOT NULL,
 48 |   `item_id` int(10) unsigned NOT NULL,
 49 |   `qty` int(10) unsigned NOT NULL,
 50 |   `date` datetime DEFAULT NULL,
 51 |   PRIMARY KEY (`id`),
 52 |   UNIQUE KEY `id` (`id`)
 53 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 54 | /*!40101 SET character_set_client = @saved_cs_client */;
 55 | 
 56 | --
 57 | -- Table structure for table `categories`
 58 | --
 59 | 
 60 | DROP TABLE IF EXISTS `categories`;
 61 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 62 | /*!40101 SET character_set_client = utf8 */;
 63 | CREATE TABLE `categories` (
 64 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
 65 |   `name` varchar(50) DEFAULT NULL,
 66 |   PRIMARY KEY (`id`),
 67 |   UNIQUE KEY `id` (`id`)
 68 | ) ENGINE=InnoDB AUTO_INCREMENT=51 DEFAULT CHARSET=latin1;
 69 | /*!40101 SET character_set_client = @saved_cs_client */;
 70 | 
 71 | --
 72 | -- Table structure for table `comments`
 73 | --
 74 | 
 75 | DROP TABLE IF EXISTS `comments`;
 76 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 77 | /*!40101 SET character_set_client = utf8 */;
 78 | CREATE TABLE `comments` (
 79 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
 80 |   `from_user_id` int(10) unsigned NOT NULL,
 81 |   `to_user_id` int(10) unsigned NOT NULL,
 82 |   `item_id` int(10) unsigned NOT NULL,
 83 |   `rating` int(11) DEFAULT NULL,
 84 |   `date` datetime DEFAULT NULL,
 85 |   `comment` text,
 86 |   PRIMARY KEY (`id`),
 87 |   UNIQUE KEY `id` (`id`)
 88 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
 89 | /*!40101 SET character_set_client = @saved_cs_client */;
 90 | 
 91 | --
 92 | -- Table structure for table `items`
 93 | --
 94 | 
 95 | DROP TABLE IF EXISTS `items`;
 96 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
 97 | /*!40101 SET character_set_client = utf8 */;
 98 | CREATE TABLE `items` (
 99 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
100 |   `name` varchar(100) DEFAULT NULL,
101 |   `description` text,
102 |   `initial_price` float unsigned NOT NULL,
103 |   `quantity` int(10) unsigned NOT NULL,
104 |   `reserve_price` float unsigned DEFAULT '0',
105 |   `buy_now` float unsigned DEFAULT '0',
106 |   `nb_of_bids` int(10) unsigned DEFAULT '0',
107 |   `max_bid` float unsigned DEFAULT '0',
108 |   `start_date` datetime DEFAULT NULL,
109 |   `end_date` datetime DEFAULT NULL,
110 |   `seller` int(10) unsigned NOT NULL,
111 |   `category` int(10) unsigned NOT NULL,
112 |   PRIMARY KEY (`id`),
113 |   UNIQUE KEY `id` (`id`)
114 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
115 | /*!40101 SET character_set_client = @saved_cs_client */;
116 | 
117 | --
118 | -- Table structure for table `old_items`
119 | --
120 | 
121 | DROP TABLE IF EXISTS `old_items`;
122 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
123 | /*!40101 SET character_set_client = utf8 */;
124 | CREATE TABLE `old_items` (
125 |   `id` int(10) unsigned NOT NULL,
126 |   `name` varchar(100) DEFAULT NULL,
127 |   `description` text,
128 |   `initial_price` float unsigned NOT NULL,
129 |   `quantity` int(10) unsigned NOT NULL,
130 |   `reserve_price` float unsigned DEFAULT '0',
131 |   `buy_now` float unsigned DEFAULT '0',
132 |   `nb_of_bids` int(10) unsigned DEFAULT '0',
133 |   `max_bid` float unsigned DEFAULT '0',
134 |   `start_date` datetime DEFAULT NULL,
135 |   `end_date` datetime DEFAULT NULL,
136 |   `seller` int(10) unsigned NOT NULL,
137 |   `category` int(10) unsigned NOT NULL,
138 |   PRIMARY KEY (`id`),
139 |   UNIQUE KEY `id` (`id`)
140 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
141 | /*!40101 SET character_set_client = @saved_cs_client */;
142 | 
143 | --
144 | -- Table structure for table `regions`
145 | --
146 | 
147 | DROP TABLE IF EXISTS `regions`;
148 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
149 | /*!40101 SET character_set_client = utf8 */;
150 | CREATE TABLE `regions` (
151 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
152 |   `name` varchar(25) DEFAULT NULL,
153 |   PRIMARY KEY (`id`),
154 |   UNIQUE KEY `id` (`id`)
155 | ) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=latin1;
156 | /*!40101 SET character_set_client = @saved_cs_client */;
157 | 
158 | --
159 | -- Table structure for table `users`
160 | --
161 | 
162 | DROP TABLE IF EXISTS `users`;
163 | /*!40101 SET @saved_cs_client     = @@character_set_client */;
164 | /*!40101 SET character_set_client = utf8 */;
165 | CREATE TABLE `users` (
166 |   `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
167 |   `firstname` varchar(20) DEFAULT NULL,
168 |   `lastname` varchar(20) DEFAULT NULL,
169 |   `nickname` varchar(20) NOT NULL,
170 |   `password` varchar(20) NOT NULL,
171 |   `email` varchar(50) NOT NULL,
172 |   `rating` int(11) DEFAULT NULL,
173 |   `balance` float DEFAULT NULL,
174 |   `creation_date` datetime DEFAULT NULL,
175 |   `region` int(10) unsigned NOT NULL,
176 |   PRIMARY KEY (`id`),
177 |   UNIQUE KEY `id` (`id`)
178 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1;
179 | /*!40101 SET character_set_client = @saved_cs_client */;
180 | /*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
181 | 
182 | /*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
183 | /*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
184 | /*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
185 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
186 | /*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
187 | /*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
188 | /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
189 | 
190 | -- Dump completed on 2016-08-20 23:00:10
191 | 


--------------------------------------------------------------------------------
/lib/nose/enumerator.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'logging'
  4 | 
  5 | module NoSE
  6 |   # Produces potential indices to be used in schemas
  7 |   class IndexEnumerator
  8 |     def initialize(workload)
  9 |       @logger = Logging.logger['nose::enumerator']
 10 | 
 11 |       @workload = workload
 12 |     end
 13 | 
 14 |     # Produce all possible indices for a given query
 15 |     # @return [Array<Index>]
 16 |     def indexes_for_query(query)
 17 |       @logger.debug "Enumerating indexes for query #{query.text}"
 18 | 
 19 |       range = if query.range_field.nil?
 20 |                 query.order
 21 |               else
 22 |                 [query.range_field] + query.order
 23 |               end
 24 | 
 25 |       eq = query.eq_fields.group_by(&:parent)
 26 |       eq.default_proc = ->(*) { [] }
 27 | 
 28 |       range = range.group_by(&:parent)
 29 |       range.default_proc = ->(*) { [] }
 30 | 
 31 |       query.graph.subgraphs.flat_map do |graph|
 32 |         indexes_for_graph graph, query.select, eq, range
 33 |       end.uniq << query.materialize_view
 34 |     end
 35 | 
 36 |     # Produce all possible indices for a given workload
 37 |     # @return [Set<Index>]
 38 |     def indexes_for_workload(additional_indexes = [], by_id_graph = false)
 39 |       queries = @workload.queries
 40 |       indexes = Parallel.map(queries) do |query|
 41 |         indexes_for_query(query).to_a
 42 |       end.inject(additional_indexes, &:+)
 43 | 
 44 |       # Add indexes generated for support queries
 45 |       supporting = support_indexes indexes, by_id_graph
 46 |       supporting += support_indexes supporting, by_id_graph
 47 |       indexes += supporting
 48 | 
 49 |       # Deduplicate indexes, combine them and deduplicate again
 50 |       indexes.uniq!
 51 |       combine_indexes indexes
 52 |       indexes.uniq!
 53 | 
 54 |       @logger.debug do
 55 |         "Indexes for workload:\n" + indexes.map.with_index do |index, i|
 56 |           "#{i} #{index.inspect}"
 57 |         end.join("\n")
 58 |       end
 59 | 
 60 |       indexes
 61 |     end
 62 | 
 63 |     private
 64 | 
 65 |     # Produce the indexes necessary for support queries for these indexes
 66 |     # @return [Array<Index>]
 67 |     def support_indexes(indexes, by_id_graph)
 68 |       # If indexes are grouped by ID graph, convert them before updating
 69 |       # since other updates will be managed automatically by index maintenance
 70 |       indexes = indexes.map(&:to_id_graph).uniq if by_id_graph
 71 | 
 72 |       # Collect all possible support queries
 73 |       queries = indexes.flat_map do |index|
 74 |         @workload.updates.flat_map do |update|
 75 |           update.support_queries(index)
 76 |         end
 77 |       end
 78 | 
 79 |       # Enumerate indexes for each support query
 80 |       queries.uniq!
 81 |       queries.flat_map do |query|
 82 |         indexes_for_query(query).to_a
 83 |       end
 84 |     end
 85 | 
 86 |     # Combine the data of indices based on matching hash fields
 87 |     def combine_indexes(indexes)
 88 |       no_order_indexes = indexes.select do |index|
 89 |         index.order_fields.empty?
 90 |       end
 91 |       no_order_indexes = no_order_indexes.group_by do |index|
 92 |         [index.hash_fields, index.graph]
 93 |       end
 94 | 
 95 |       no_order_indexes.each do |(hash_fields, graph), hash_indexes|
 96 |         extra_choices = hash_indexes.map(&:extra).uniq
 97 | 
 98 |         # XXX More combos?
 99 |         combos = extra_choices.combination(2)
100 | 
101 |         combos.map do |combo|
102 |           indexes << Index.new(hash_fields, [], combo.inject(Set.new, &:+),
103 |                                graph)
104 |           @logger.debug "Enumerated combined index #{indexes.last.inspect}"
105 |         end
106 |       end
107 |     end
108 | 
109 |     # Get all possible choices of fields to use for equality
110 |     # @return [Array<Array>]
111 |     def eq_choices(graph, eq)
112 |       entity_choices = graph.entities.flat_map do |entity|
113 |         # Get the fields for the entity and add in the IDs
114 |         entity_fields = eq[entity] << entity.id_field
115 |         entity_fields.uniq!
116 |         1.upto(entity_fields.count).flat_map do |n|
117 |           entity_fields.permutation(n).to_a
118 |         end
119 |       end
120 | 
121 |       2.upto(graph.entities.length).flat_map do |n|
122 |         entity_choices.permutation(n).map(&:flatten).to_a
123 |       end + entity_choices
124 |     end
125 | 
126 |     # Get fields which should be included in an index for the given graph
127 |     # @return [Array<Array>]
128 |     def extra_choices(graph, select, eq, range)
129 |       choices = eq.values + range.values << select.to_a
130 | 
131 |       choices.each do |choice|
132 |         choice.select { |field| graph.entities.include?(field.parent) }
133 |       end
134 | 
135 |       choices.reject(&:empty?) << []
136 |     end
137 | 
138 |     # Get all possible indices which jump a given piece of a query graph
139 |     # @return [Array<Index>]
140 |     def indexes_for_graph(graph, select, eq, range)
141 |       eq_choices = eq_choices graph, eq
142 |       range_fields = graph.entities.map { |entity| range[entity] }.reduce(&:+)
143 |       range_fields.uniq!
144 |       order_choices = range_fields.prefixes.flat_map do |fields|
145 |         fields.permutation.to_a
146 |       end.uniq << []
147 |       extra_choices = extra_choices graph, select, eq, range
148 |       extra_choices = 1.upto(extra_choices.length).flat_map do |n|
149 |         extra_choices.combination(n).map(&:flatten).map(&:uniq)
150 |       end.uniq
151 | 
152 |       # Generate all possible indices based on the field choices
153 |       choices = eq_choices.product(extra_choices)
154 |       indexes = choices.map! do |index, extra|
155 |         indexes = []
156 | 
157 |         order_choices.each do |order|
158 |           # Append the primary key of the entities in the graph if needed
159 |           order += graph.entities.sort_by(&:name).map(&:id_field) -
160 |                    (index + order)
161 | 
162 |           # Partition into the ordering portion
163 |           index.partitions.each do |index_prefix, order_prefix|
164 |             hash_fields = index_prefix.take_while do |field|
165 |               field.parent == index.first.parent
166 |             end
167 |             order_fields = index_prefix[hash_fields.length..-1] + \
168 |                            order_prefix + order
169 |             extra_fields = extra - hash_fields - order_fields
170 |             next if order_fields.empty? && extra_fields.empty?
171 | 
172 |             new_index = generate_index hash_fields, order_fields, extra_fields,
173 |                                        graph
174 |             indexes << new_index unless new_index.nil?
175 |           end
176 |         end
177 | 
178 |         indexes
179 |       end.inject([], &:+)
180 |       indexes.flatten!
181 | 
182 |       indexes
183 |     end
184 | 
185 |     # Generate a new index and ignore if invalid
186 |     # @return [Index]
187 |     def generate_index(hash, order, extra, graph)
188 |       begin
189 |         index = Index.new hash, order.uniq, extra, graph
190 |         @logger.debug { "Enumerated #{index.inspect}" }
191 |       rescue InvalidIndexException
192 |         # This combination of fields is not valid, that's ok
193 |         index = nil
194 |       end
195 | 
196 |       index
197 |     end
198 |   end
199 | end
200 | 


--------------------------------------------------------------------------------