├── .gitignore
├── README.md
├── TODO.md
├── exps
├── .gitignore
├── README.md
├── __init__.py
├── api
│ ├── __init__.py
│ ├── abstractcoordinator.py
│ ├── abstractworker.py
│ ├── directchannel.py
│ ├── message.py
│ ├── messageprocessor.py
│ ├── mongostat.py
│ └── results.py
├── benchmark.py
├── benchmarks
│ ├── __init__.py
│ ├── blog
│ │ ├── __init__.py
│ │ ├── blogcoordinator.py
│ │ ├── blogworker.py
│ │ ├── constants.py
│ │ ├── maxnumofcomments.py
│ │ └── util
│ │ │ ├── __init__.py
│ │ │ ├── rand.py
│ │ │ └── zipf.py
│ ├── replay
│ │ ├── __init__.py
│ │ ├── dbcombiner.py
│ │ ├── dbdenormalizer.py
│ │ ├── dbmigrator.py
│ │ ├── denormalizer.py
│ │ ├── replaycoordinator.py
│ │ ├── replayworker.py
│ │ └── unittest
│ │ │ └── test_combiner.py
│ └── tpcc
│ │ ├── __init__.py
│ │ ├── constants.py
│ │ ├── drivers
│ │ ├── __init__.py
│ │ ├── abstractdriver.py
│ │ └── mongodbdriver.py
│ │ ├── runtime
│ │ ├── __init__.py
│ │ ├── executor.py
│ │ ├── loader.py
│ │ ├── nurand.py
│ │ ├── rand.py
│ │ └── scaleparameters.py
│ │ ├── tpcc.sql
│ │ ├── tpcccoordinator.py
│ │ └── tpccworker.py
└── tools
│ ├── __init__.py
│ ├── dba-export.py
│ ├── design_deserializer.py
│ ├── dump-csv.py
│ ├── duplicator.py
│ ├── load-csv.py
│ └── spencerdesign2json.py
├── libs
├── argparse
│ ├── __init__.py
│ └── argparse.py
├── mongokit
│ ├── __init__.py
│ ├── auth.py
│ ├── collection.py
│ ├── connection.py
│ ├── cursor.py
│ ├── database.py
│ ├── document.py
│ ├── grid.py
│ ├── helpers.py
│ ├── master_slave_connection.py
│ ├── migration.py
│ ├── mongo_exceptions.py
│ ├── operators.py
│ ├── schema_document.py
│ └── versioned_document.py
└── sqlparse
│ ├── __init__.py
│ ├── engine
│ ├── __init__.py
│ ├── filter.py
│ └── grouping.py
│ ├── exceptions.py
│ ├── filters.py
│ ├── formatter.py
│ ├── functions.py
│ ├── keywords.py
│ ├── lexer.py
│ ├── pipeline.py
│ ├── sql.py
│ ├── tokens.py
│ └── utils.py
├── src
├── OVERVIEW.md
├── README.md
├── catalog
│ ├── __init__.py
│ ├── collection.py
│ └── utilmethods.py
├── costmodel
│ ├── __init__.py
│ ├── abstractcostcomponent.py
│ ├── costmodel.py
│ ├── disk
│ │ ├── __init__.py
│ │ ├── diskcostcomponent.py
│ │ ├── fastlrubuffer.py
│ │ ├── fastlrubufferusingwindow.py
│ │ └── lrubuffer.py
│ ├── network
│ │ ├── __init__.py
│ │ └── networkcostcomponent.py
│ ├── nodeestimator.py
│ ├── skew
│ │ ├── __init__.py
│ │ └── skewcostcomponent.py
│ └── state.py
├── d4.py
├── inputs
│ ├── __init__.py
│ ├── abstractconverter.py
│ ├── mongodb
│ │ ├── README
│ │ ├── __init__.py
│ │ ├── dependencyfinder.py
│ │ ├── mongosniffconverter.py
│ │ ├── normalizer.py
│ │ ├── parser.py
│ │ ├── reconstructor.py
│ │ ├── salt_crack.py
│ │ ├── sample.txt
│ │ ├── samplecreator.py
│ │ ├── sessionizer.py
│ │ └── workload_info.py
│ └── mysql
│ │ ├── __init__.py
│ │ ├── mysqlconverter.py
│ │ ├── sql2mongo.py
│ │ └── utilmethods.py
├── multithreaded
│ ├── __init__.py
│ ├── message.py
│ ├── messageprocessor.py
│ ├── multi_search.py
│ ├── multi_search_coordinator.py
│ └── multi_search_worker.py
├── sanitizer
│ ├── __init__.py
│ ├── anonymize.py
│ ├── anonymized-sample.txt
│ ├── out.txt
│ ├── sample-anonymize.txt
│ └── sample.dat
├── search
│ ├── __init__.py
│ ├── abstractdesigner.py
│ ├── bbsearch.py
│ ├── design.py
│ ├── designcandidates.py
│ ├── designer.py
│ ├── initialdesigner.py
│ ├── lnsdesigner.py
│ ├── randomdesigner.py
│ └── utilmethods.py
├── util
│ ├── __init__.py
│ ├── configutil.py
│ ├── constants.py
│ ├── histogram.py
│ ├── mathutil.py
│ ├── termcolor.py
│ └── utilmethods.py
└── workload
│ ├── __init__.py
│ ├── ophasher.py
│ ├── session.py
│ ├── utilmethods.py
│ └── workloadcombiner.py
└── tests
├── README
├── __init__.py
├── api
└── unittest_results.py
├── catalog
└── unittest_utilmethods.py
├── costmodel
├── costmodeltestcase.py
├── costmodeltestcase_guessIndex.py
├── costmodeltestcase_index.py
├── costmodeltestcase_index_withprojection.py
├── disk
│ ├── unittest_diskcostcomponent_guessIndex.py
│ ├── unittest_diskcostcomponent_indexinsertionpenalty.py
│ ├── unittest_diskcostcomponentindexes.py
│ ├── unittest_diskcostcomponentindexes_withprojection.py
│ └── unittest_fastlrubuffer.py
├── network
│ ├── unittest_networkcostcomponent.py
│ └── unittest_networkcostcomponenttpcc.py
├── skew
│ └── unittest_skewcostcomponent.py
├── unittest_costmodel.py
├── unittest_costmodel_denormalization.py
├── unittest_lrubuffer.py
└── unittest_nodeestimator.py
├── exps
├── replay
│ ├── unittest_denormalizer.py
│ └── workloadgenerator.py
└── tools
│ └── unittest_design_deserializer.py
├── inputs
├── mongodb
│ └── unittest_reconstructor.py
├── mysql
│ └── unittest_sql2mongo.py
└── unittest_abstractconverter.py
├── mongodbtestcase.py
├── runTests.sh
├── sanitizer
├── trace-anon.out
├── trace-clean.out
└── unittest_sanitizer.py
├── search
├── bbsearch-test.py
├── unittest_bbsearch.py
├── unittest_bbsearch_CompoundKeyIterator.py
├── unittest_bbsearch_ShardKeyIterator.py
├── unittest_design.py
├── unittest_findExpectedDesign.py
├── unittest_initialdesigner.py
├── unittest_lnsdesigner.py
└── unittest_utilmethods.py
├── tpcctestcase.py
├── util
├── unittest_configutil.py
├── unittest_histogram.py
├── unittest_mathutil.py
└── unittest_utilmethods.py
└── workload
├── unittest_ophasher.py
├── unittest_utilmethods.py
├── unittest_workloadcombiner.py
├── unittest_workloadcombinerwithtpcc.py
└── workloadcombinersetup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.pyo
3 | .#*
4 | *.kate-swp
5 | *.config
6 | .idea
7 | nosetests.xml
8 | src/*.png
9 | *~
10 | *.log
11 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # mongodb-d4
2 |
3 | **D4** is an automated tool for a generating **d**istributed **d**ocument **d**atabase **d**esigns for applications
4 | running on MongoDB. This tool specifically targets applications running highly concurrent workloads, and thus its
5 | designs are tailored to the unique properties of large-scale, Web-based applications. It can also be used to assist
6 | in porting MySQL-based applications to MongoDB.
7 |
8 | Using a sample workload trace from a either a document-oriented or relational database application, **D4** will compute
9 | the best a database design that optimizes the throughput and latency of a document DBMS. The three design elements that
10 | D4 can select for an application are:
11 |
12 | + Sharding Keys
13 | + Indexes
14 | + Collection (De)normalization
15 |
16 | For More Information:
17 |
18 | ## Dependencies
19 | + python-pymongo
20 | + python-yaml
21 | + python-MySQLdb (optional)
22 |
23 | ## Authors
24 | + [Andy Pavlo](http://www.cs.brown.edu/~pavlo)
25 | + [Yang Zou](http://www.cs.brown.edu/~yang)
26 | + [Michail Michailidis](http://www.cs.brown.edu/~mmichail)
27 | + [Stan Zdonik](http://www.cs.brown.edu/~sbz)
28 |
29 | ## Past Contributors
30 | + [Christopher Keith](http://www.linkedin.com/pub/christopher-keith/38/882/81a)
31 | + [Emanuel Buzek](http://www.linkedin.com/pub/emanuel-buzek/2/655/b04)
32 |
33 | ## Acknowledgements
34 | This work is supported (in part) by an [Amazon AWS Research Grant](http://aws.amazon.com/education/).
35 | Additional assistance is also provided by [10gen, Inc.](http://10gen.com)
--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
1 | ## What Jian Have Done?
2 |
3 | * Computation of touched node for range shard keys
4 |
5 | First partition every field into ranges in the workload analysis stage according to its distinct values. For example, if a field has distinct values [1,2,3,4,5,6,7,8], and we have 4 shards, the ranges arrays will be [1,3,5,7]. The number in the ranges array indicate the minimum value for that range.
6 |
7 | Then we use the ranges information generated in the workload analysis stage to compute the touched node for range shard keys. If a query contains a equality by key A with value 6 in the above example, this query will access shard number 2(starts from 0).
8 |
9 |
10 | * Candidate generation for shard keys
11 |
12 | We only choose shard keys with high cardinality and high referenced count. According to cardinality and referenced count, d4 generates a score for each key, then d4 sorts all keys by those scores. We set a threshold to filter out keys with low score.
13 |
14 | When iterating on the combination of shard keys, the compound keys with more keys have higher priority to evaluate.
15 |
16 |
17 | * Estimation of number of shards
18 |
19 | Though the number of shards is set by user, however, not all collections could use all shards. For example, collections are sharded by keys with low cardinality or collections has small document size will be only sharded into subset of shards. So we need to estimate the number of shards for each collection with each design. Then use this number to calculate the cost.
20 |
21 |
22 | * Latencies report for replay framework
23 |
24 | Add latencies report for replay framework, also output the top slowest queries for debugging usage.
25 |
26 | * Lots of bug fixes
27 |
28 | Fixes bugs for input module, search algorithms, cost models and benchmark modules
29 |
30 | ## Future Work:
31 |
32 | * [Issue 37](https://github.com/cmu-db/mongodb-d4/issues/37)
33 | * [Issue 38](https://github.com/cmu-db/mongodb-d4/issues/38)
34 | * [Issue 39](https://github.com/cmu-db/mongodb-d4/issues/37)
35 |
--------------------------------------------------------------------------------
/exps/.gitignore:
--------------------------------------------------------------------------------
1 | mongostat
--------------------------------------------------------------------------------
/exps/README.md:
--------------------------------------------------------------------------------
1 | # MongoDB Benchmark Framework
2 |
3 | This framework is able to run different benchmarks using MongoDB. It was originally based
4 | on my TPC-C benchmark framework that I used in my NoSQL course in spring 2011. It was then
5 | forked by one of my students in the summer of 2011. I then grabbed his changes and modified
6 | it further to support the different types of experiments that we will need for this work.
7 |
8 | **TLDR:**
9 | This code is based on https://github.com/yanglu/BigBenchmark
10 | which was originally based on: https://github.com/apavlo/py-tpcc
11 |
12 |
13 | ## Dependencies:
14 | + python-execnet
15 |
16 | ## Example Usage
17 |
18 | 1. Create a configuration file for the benchmark that you are going to run.
19 | For this example, we will use the `blog` benchmark.
20 |
21 | ./benchmark.py --print-config blog > blog.config
22 |
23 | Modify the configuration file to change the parameters according to your environment setup.
24 |
25 | 2. Load in the benchmark database into MongoDB. The `--no-execute` option will prevent
26 | the framework from executing the workload portion of the benchmark, while the `--reset` option
27 | will clear out the contents of the database if it already exists.
28 |
29 | ./benchmark.py --config=blog.config --no-execute --reset blog
30 |
31 | 3. Now execute the workload driver to perform the experiment. The final throughput results
32 | will be printed at the end. Note here that the `--no-load` option will prevent the framework
33 | from repeating the loading step.
34 |
35 | ./benchmark.py --config=blog.config --no-load blog
36 |
37 |
38 | ## Configuration
39 |
40 | + **logfile**:
41 | This controls where the worker threads will write their log messages out to.
42 | It will not be overwritten on each invocation.
--------------------------------------------------------------------------------
/exps/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __all__ = ["benchmark"]
--------------------------------------------------------------------------------
/exps/api/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __all__ = [ "messageprocessor", "message", "results" ]
--------------------------------------------------------------------------------
/exps/api/directchannel.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012
4 | # Andy Pavlo - http://www.cs.brown.edu/~pavlo/
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 | import logging
26 |
27 | from messageprocessor import *
28 | from message import *
29 |
30 | LOG = logging.getLogger(__name__)
31 |
32 | class DirectChannel:
33 |
34 | def __init__(self):
35 | self.gateway = None # Needed by message.py
36 | self.queue = [ ]
37 | self.processor = MessageProcessor(self)
38 |
39 | m = Message(MSG_NOOP, True)
40 | self.defaultResponse = pickle.dumps(m, -1)
41 | self.response = None
42 |
43 | pass
44 |
45 | def __iter__(self):
46 | return self
47 |
48 | def next(self):
49 | if len(self.queue) == 0:
50 | raise StopIteration
51 | return self.queue.pop(0)
52 |
53 | def send(self, msg):
54 | m = getMessage(msg)
55 | if m.header in [ MSG_INIT_COMPLETED, MSG_LOAD_COMPLETED, MSG_EXECUTE_COMPLETED ]:
56 | self.response = msg
57 | else:
58 | self.queue.append(msg)
59 | self.processor.processMessage()
60 | ## DEF
61 |
62 | def receive(self):
63 | r = None
64 | if self.response != None:
65 | r = self.response
66 | self.response = None
67 | else:
68 | r = self.defaultResponse
69 | return r
70 | ## CLASS
71 |
72 |
--------------------------------------------------------------------------------
/exps/api/mongostat.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012
4 | # Andy Pavlo - http://www.cs.brown.edu/~pavlo/
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 | import os
26 | import threading
27 | import subprocess
28 | import shlex
29 | import logging
30 |
31 | LOG = logging.getLogger(__name__)
32 |
33 | class MongoStatCollector(threading.Thread):
34 |
35 | def __init__(self, host, outputFile, outputInterval=10, showAll=True):
36 | threading.Thread.__init__(self)
37 | self.host = host
38 | self.outputFile = outputFile
39 | self.outputInterval = outputInterval
40 | self.showAll = outputFile
41 | self.daemon = True
42 | self.process = None
43 | self.record = False
44 | self.stopThread = False
45 | ## DEF
46 |
47 | def startRecording(self):
48 | LOG.info("Starting stat data collection [%s]", self.outputFile)
49 | self.record = True
50 |
51 | def stopRecording(self):
52 | LOG.info("Stopping stat data collection [%s]", self.outputFile)
53 | self.record = False
54 |
55 | def run(self):
56 | command = "mongostat --host %s" % self.host
57 | if self.showAll: command += " --all"
58 | command += " %d" % self.outputInterval
59 |
60 | args = shlex.split(command)
61 | LOG.info("Forking command: %s" % args)
62 | self.process = subprocess.Popen(args,
63 | stdout=subprocess.PIPE,
64 | stderr=subprocess.STDOUT,
65 | shell=False,
66 | )
67 | LOG.info("Writing MongoStat output to '%s'" % self.outputFile)
68 | header = None
69 | headerHash = None
70 | writeHeader = True
71 | with open(self.outputFile, "w") as fd:
72 | while not self.stopThread:
73 | self.process.stdout.flush()
74 | line = self.process.stdout.readline()
75 | if header is None and line.find("flushes") != -1:
76 | header = line
77 | headerHash = hash(header)
78 | if self.record:
79 | if writeHeader and not header is None:
80 | fd.write(header)
81 | writeHeader = False
82 | if hash(line) != headerHash:
83 | fd.write(line)
84 | fd.flush()
85 | # WHILE
86 | LOG.debug("MongoStatCollection thread is stopping")
87 | ## DEF
88 |
89 | def stop(self):
90 | if not self.process is None:
91 | LOG.debug("Killing MongoStatCollection process %d [%s]", self.process.pid, self.outputFile)
92 | self.stopThread = True
93 | self.process.kill()
94 | ## DEF
95 |
96 | ## CLASS
97 |
--------------------------------------------------------------------------------
/exps/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/exps/benchmarks/blog/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = ["blogcoordinator", "blogworker"]
4 |
--------------------------------------------------------------------------------
/exps/benchmarks/blog/constants.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012
4 | # Andy Pavlo - http://www.cs.brown.edu/~pavlo/
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 |
26 | from datetime import datetime
27 |
28 | #DB_NAME = 'microblog'
29 | ARTICLE_COLL = 'articles'
30 | COMMENT_COLL = 'comments'
31 |
32 | NUM_AUTHORS = 1024
33 | NUM_TAGS = 6000
34 | NUM_TAGS_PER_ARTICLE = 40
35 |
36 | ARTICLE_TITLE_SIZE = 200
37 | ARTICLE_CONTENT_SIZE = 4096
38 | COMMENT_CONTENT_SIZE = 512
39 | MAX_COMMENT_RATING = 100
40 | NUM_ARTICLES = 10000 # this is multiplied by the scale factor
41 | NUMBER_OF_DATE_SUBRANGES = 8 # this breaks the interval between START_DATE and STOP_DATE in X segments
42 |
43 | # Special atomic counter
44 | NEXT_ARTICLE_CTR_ID = -9999
45 | NEXT_ARTICLE_CTR_KEY = "nextArticleId"
46 |
47 | #deprecated
48 | #AUTHOR_NAME_SIZE = 20
49 | #MAX_AUTHOR_SIZE = 20
50 | #MAX_TITLE_SIZE = 200
51 | #MAX_CONTENT_SIZE = 102400
52 | #MAX_COMMENT_SIZE = 1024
53 | #MAX_NUM_COMMENTS = 100
54 |
55 |
56 |
57 |
58 | WORKLOAD_READ_PERCENT = 90
59 | WORKLOAD_WRITE_PERCENT = 10
60 | assert (WORKLOAD_READ_PERCENT+WORKLOAD_WRITE_PERCENT) == 100
61 |
62 | START_DATE = datetime.strptime('11/1/2011 1:30 PM', '%m/%d/%Y %I:%M %p')
63 | STOP_DATE = datetime.strptime('1/1/2012 1:30 PM', '%m/%d/%Y %I:%M %p')
64 |
65 | # Experiment Type Codes
66 | EXP_SHARDING = "sharding"
67 | EXP_DENORMALIZATION = "denormalization"
68 | EXP_INDEXING = "indexing"
69 | EXP_ALL = [ EXP_SHARDING, EXP_DENORMALIZATION, EXP_INDEXING ]
70 |
71 | # Sharding Config Types
72 | SHARDEXP_RANGE = 0
73 | SHARDEXP_HASH = 1
74 | SHARDEXP_ALL = [SHARDEXP_RANGE, SHARDEXP_HASH]
75 |
76 | # Indexing Config Types
77 |
78 | INDEXEXP_8020 = 0 # 80% reads / 20% writes
79 | INDEXEXP_9010 = 1 # 90% reads / 10% writes
80 | INDEXEXP_ALL = [INDEXEXP_8020, INDEXEXP_9010]
81 |
--------------------------------------------------------------------------------
/exps/benchmarks/blog/maxnumofcomments.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sys
3 | import os
4 | import string
5 | import re
6 | import logging
7 | import traceback
8 | import pymongo
9 | import constants
10 | from util import *
11 | from pprint import pprint, pformat
12 |
13 |
14 | # quick and dirty
15 |
16 | def test():
17 | LOG = logging.getLogger(__name__)
18 | conn = None
19 | targetHost = "bronze.cs.brown.edu"
20 | targetPort = 27017
21 | try:
22 | conn = pymongo.Connection(targetHost, targetPort)
23 | except:
24 | LOG.error("Failed to connect to target MongoDB at %s:%s" % (targetHost, targetPort))
25 | raise
26 | #assert conn
27 | db = conn["test"]
28 | titleSize = 150
29 | contentSize = 6000
30 | numComments = 100000000
31 | articleId = 1
32 | articleDate = randomDate(constants.START_DATE, constants.STOP_DATE)
33 | title = randomString(titleSize)
34 | slug = list(title.replace(" ", ""))
35 | if len(slug) > 64: slug = slug[:64]
36 | for idx in xrange(0, len(slug)):
37 | if random.randint(0, 10) == 0:
38 | slug[idx] = "-"
39 | ## FOR
40 | slug = "".join(slug)
41 | article = {
42 | "id": articleId,
43 | "title": title,
44 | "date": articleDate,
45 | "author": 1,
46 | "slug": slug,
47 | "content": randomString(contentSize),
48 | "numComments": numComments,
49 | }
50 | db[constants.ARTICLE_COLL].insert(article)
51 | print("perasa");
52 | commentCtr=0
53 | lastDate = articleDate
54 | for ii in xrange(0, numComments):
55 | lastDate = randomDate(lastDate, constants.STOP_DATE)
56 | commentAuthor = randomString(15)
57 | commentSize = 1024
58 | commentContent = randomString(commentSize)
59 |
60 | comment = {
61 | "id": commentCtr,
62 | "article": articleId,
63 | "date": lastDate,
64 | "author": commentAuthor,
65 | "comment": commentContent,
66 | "rating": 100
67 | }
68 | commentCtr += 1
69 | db[constants.ARTICLE_COLL].update({"id": articleId},{"$push":{"comments":comment}},safe=True)
70 | if commentCtr==1 or commentCtr%1000==0:
71 | print(commentCtr)
72 | # def
73 | if __name__ == '__main__':
74 | #executed as script
75 | # do something
76 | test()
77 |
--------------------------------------------------------------------------------
/exps/benchmarks/blog/util/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from rand import *
4 | from zipf import *
--------------------------------------------------------------------------------
/exps/benchmarks/blog/util/rand.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012
4 | # Andy Pavlo - http://www.cs.brown.edu/~pavlo/
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 |
26 | import random
27 | import string
28 | from datetime import timedelta
29 |
30 | def randomString(size, chars=string.ascii_uppercase + string.digits):
31 | return ''.join(random.choice(chars) for x in range(size))
32 | ## DEF
33 |
34 | #Discrete Date Generator
35 | def randomDate(start, end):
36 | """This returns a random datetime between two datetime objects but the time is the same."""
37 | delta = end - start
38 | #int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
39 | #random_second = random.randrange(int_delta)
40 | random_days = random.randrange(delta.days)
41 | return (start + timedelta(days=random_days))
42 | ## DEF
--------------------------------------------------------------------------------
/exps/benchmarks/blog/util/zipf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import math
4 | import random
5 | import bisect
6 | import numpy as np
7 |
8 |
9 | ## -----------------------------------------------------
10 | ## Zipfian Distribution Generator
11 | ## -----------------------------------------------------
12 | class ZipfGenerator:
13 |
14 | def __init__(self, n, skewin = 0.8):
15 | #if alpha <= 1.000:
16 | # self.alph = 1.001
17 | #else:
18 | # self.alph = alpha
19 | self.skew = skewin
20 | self.num = n #expected returned numbers 0...31 (e.g for n=32 authors)
21 |
22 | def next(self):
23 | #while 1:
24 | # tobereturned = np.random.zipf(self.alph)
25 | # if tobereturned <= self.num:
26 | # break
27 | #return tobereturned - 1;
28 | randomnum = random.random()
29 | if self.skew == 1.0:
30 | return 0
31 | if randomnum >= (1-self.skew): #80% of
32 | selected = random.randrange(1, int((1-self.skew)*self.num))
33 | returnnum = selected * (1/(1-self.skew))
34 | #print("80%=>"+str(int((1-self.skew)*self.num)))
35 | elif randomnum < (1-self.skew): #20% of times
36 | selected = random.randrange(1,int(self.skew*self.num))
37 | returnnum = selected * (1/self.skew)
38 | #print("20%=>"+str(int(self.skew*self.num)))
39 | return int(returnnum-1)
40 | ## CLASS
41 |
42 |
--------------------------------------------------------------------------------
/exps/benchmarks/replay/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = ["replaycoordinator", "replayworker"]
4 |
--------------------------------------------------------------------------------
/exps/benchmarks/replay/dbmigrator.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import logging
4 | from pprint import pformat
5 | import time
6 | import copy
7 |
8 | # Third-Party Dependencies
9 | basedir = os.getcwd()
10 | sys.path.append(os.path.join(basedir, "../../../libs"))
11 |
12 | # mongodb-d4
13 | sys.path.append(os.path.join(basedir, "../../../src"))
14 | sys.path.append(os.path.join(basedir, "../../tools"))
15 |
16 | from util import Histogram
17 | from util import constants
18 | import copy
19 |
20 | LOG = logging.getLogger(__name__)
21 |
22 | class DBMigrator:
23 | def __init__(self, ori_db, new_db):
24 | self.debug = LOG.isEnabledFor(logging.DEBUG)
25 |
26 | self.ori_db = ori_db
27 | self.new_db = new_db
28 | ## DEF
29 |
30 | ## DEF
31 | def copyData(self, doc, cur_name, parent_keys, docs=[]):
32 | '''
33 | doc is a dict
34 | '''
35 | #self.new_db[cur_name].insert(doc)
36 | #docs = self.new_db[cur_name].find().sort('_id',-1).limit(1)
37 | #for tmp in docs:
38 | # doc = tmp
39 |
40 | for key in doc.keys():
41 | # Insert into new collection and add the parent's id
42 | if isinstance(doc[key], dict) and not parent_keys[key] is None and not parent_keys[key][cur_name] is None:
43 |
44 | ## For
45 | # set the foreign key of the child doc
46 | for f_id in parent_keys[key][cur_name]:
47 | doc[key][f_id] = doc[parent_keys[key][cur_name][f_id]]
48 | ## END FOR
49 |
50 | self.copyData(doc[key], str(key), parent_keys, docs)
51 | del doc[key]
52 | elif isinstance(doc[key], list):
53 | for obj in doc[key]:
54 | if isinstance(obj, dict) and not parent_keys[key] is None and not parent_keys[key][cur_name] is None:
55 | ## FOR
56 | # set the foreign key of the child doc
57 | for f_id in parent_keys[key][cur_name]:
58 | obj[f_id] = doc[parent_keys[key][cur_name][f_id]]
59 | self.copyData(obj, str(key), parent_keys, docs)
60 | ## END FOR
61 |
62 | newlist = [x for x in doc[key] if not isinstance(x, dict)]
63 | doc[key] = newlist
64 | if len(doc[key]) == 0:
65 | del doc[key]
66 |
67 | docs.append(doc)
68 | ## DEF
69 |
70 | ## DEF
71 | def migrate(self, parent_keys):
72 | # Normalization
73 | LOG.info("Migrating data from old db to new db")
74 | # TOFIX: collection_names(False):cannot take two arguments?
75 | for col_name in self.ori_db.collection_names():
76 | if col_name == 'system.indexes':
77 | continue
78 | col = self.ori_db[col_name]
79 | cnt = 1
80 | docs = []
81 | for doc in col.find({},{'_id':False}, timeout=False):
82 | #if cnt == 1000:
83 | # break
84 | self.copyData(doc, col_name, parent_keys, docs)
85 | if cnt % 1000 == 0:
86 | self.new_db[col_name].insert(docs)
87 | docs = []
88 | cnt += 1
89 | if len(docs) != 0:
90 | self.new_db[col_name].insert(docs)
91 |
92 |
--------------------------------------------------------------------------------
/exps/benchmarks/replay/unittest/test_combiner.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pymongo
4 |
5 | basedir = os.getcwd()
6 |
7 | # Third-party Dependencies
8 | sys.path.append(os.path.join(basedir, "../../../../libs"))
9 | sys.path.append(os.path.join(basedir, "../../../../src"))
10 | sys.path.append(os.path.join(basedir, "../../../tools"))
11 | sys.path.append(os.path.join(basedir, "../../../../src/search"))
12 |
13 | # mongo-d4-benchmark-replay
14 | sys.path.append(os.path.join(basedir, ".."))
15 |
16 | from dbcombiner import DBCombiner
17 | from dbdenormalizer import DBDenormalizer
18 | from design_deserializer import Deserializer
19 | from design import Design
20 |
21 | def test_combine_deletes(combiner, operations):
22 | return combiner.combineDeletes(operations)
23 |
24 | if __name__=="__main__":
25 | design_path = r"/home/ruiz1/mongodb-d4/exps/tpcc_design"
26 | print design_path
27 | deserializer = Deserializer()
28 | deserializer.loadDesignFile(design_path)
29 | design = Design()
30 | design.data = deserializer.json_doc
31 | print design.data
32 |
33 | dm = DBDenormalizer(None, None, None, None, design)
34 | graph = dm.constructGraph()
35 | dm.metadata_db = pymongo.Connection('localhost:27017')['tpcc_meta']
36 | parent_keys = dm.readSchema('schema')
37 |
38 | combiner = DBCombiner(None, design, graph, parent_keys)
39 |
40 | operations = []
41 | for i in range(5):
42 | op = dict()
43 | op['query_content'] = []
44 | op['query_fields'] = None
45 | op['collection'] = 'order_line'
46 | op['query_content'].append({'ol_o_id':i,'ol_id':i+1})
47 | op['predicates'] = {'ol_o_id':'eq','ol_id':'eq'}
48 | operations.append(op)
49 |
50 | for i in range(3):
51 | op = dict()
52 | op['query_content'] = []
53 | op['query_fields'] = None
54 | op['collection'] = 'oorder'
55 | op['query_content'].append({'o_id':i})
56 | op['predicates'] = {'o_id':'eq'}
57 | operations.append(op)
58 |
59 |
60 | print "---Test combining deletes---"
61 | print "----------------------------"
62 | ret, error, updates = test_combine_deletes(combiner, operations)
63 | print ret
64 | print "----------------------------"
65 | print updates
66 | print "----------------------------"
67 |
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = ["tpcccoordinator", "tpccworker"]
4 |
5 | import runtime
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/drivers/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/drivers/abstractdriver.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2011
4 | # Andy Pavlo
5 | # http://www.cs.brown.edu/~pavlo/
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining
8 | # a copy of this software and associated documentation files (the
9 | # "Software"), to deal in the Software without restriction, including
10 | # without limitation the rights to use, copy, modify, merge, publish,
11 | # distribute, sublicense, and/or sell copies of the Software, and to
12 | # permit persons to whom the Software is furnished to do so, subject to
13 | # the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be
16 | # included in all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
21 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | # OTHER DEALINGS IN THE SOFTWARE.
25 | # -----------------------------------------------------------------------
26 |
27 | from datetime import datetime
28 |
29 | import constants
30 |
31 | ## ==============================================
32 | ## AbstractDriver
33 | ## ==============================================
34 | class AbstractDriver(object):
35 | def __init__(self, name, ddl):
36 | self.name = name
37 | self.driver_name = "%sDriver" % self.name.title()
38 | self.ddl = ddl
39 |
40 | def __str__(self):
41 | return self.driver_name
42 |
43 | def loadStart(self):
44 | """Optional callback to indicate to the driver that the data loading phase is about to begin."""
45 | return None
46 |
47 | def loadFinish(self):
48 | """Optional callback to indicate to the driver that the data loading phase is finished."""
49 | return None
50 |
51 | def loadFinishItem(self):
52 | """Optional callback to indicate to the driver that the ITEM data has been passed to the driver."""
53 | return None
54 |
55 | def loadFinishWarehouse(self, w_id):
56 | """Optional callback to indicate to the driver that the data for the given warehouse is finished."""
57 | return None
58 |
59 | def loadFinishDistrict(self, w_id, d_id):
60 | """Optional callback to indicate to the driver that the data for the given district is finished."""
61 | return None
62 |
63 | def loadTuples(self, tableName, tuples):
64 | """Load a list of tuples into the target table"""
65 | raise NotImplementedError("%s does not implement loadTuples" % (self.driver_name))
66 |
67 | def executeStart(self):
68 | """Optional callback before the execution phase starts"""
69 | return None
70 |
71 | def executeFinish(self):
72 | """Callback after the execution phase finishes"""
73 | return None
74 | ## CLASS
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/runtime/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __all__ = ["executor", "loader"]
4 |
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/runtime/nurand.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2011
4 | # Andy Pavlo
5 | # http://www.cs.brown.edu/~pavlo/
6 | #
7 | # Original Java Version:
8 | # Copyright (C) 2008
9 | # Evan Jones
10 | # Massachusetts Institute of Technology
11 | #
12 | # Permission is hereby granted, free of charge, to any person obtaining
13 | # a copy of this software and associated documentation files (the
14 | # "Software"), to deal in the Software without restriction, including
15 | # without limitation the rights to use, copy, modify, merge, publish,
16 | # distribute, sublicense, and/or sell copies of the Software, and to
17 | # permit persons to whom the Software is furnished to do so, subject to
18 | # the following conditions:
19 | #
20 | # The above copyright notice and this permission notice shall be
21 | # included in all copies or substantial portions of the Software.
22 | #
23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
26 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
27 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
28 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
29 | # OTHER DEALINGS IN THE SOFTWARE.
30 | # -----------------------------------------------------------------------
31 |
32 | import rand
33 |
34 | def makeForLoad():
35 | """Create random NURand constants, appropriate for loading the database."""
36 | cLast = rand.number(0, 255)
37 | cId = rand.number(0, 1023)
38 | orderLineItemId = rand.number(0, 8191)
39 | return NURandC(cLast, cId, orderLineItemId)
40 |
41 | def validCRun(cRun, cLoad):
42 | """Returns true if the cRun value is valid for running. See TPC-C 2.1.6.1 (page 20)"""
43 | cDelta = abs(cRun - cLoad)
44 | return 65 <= cDelta and cDelta <= 119 and cDelta != 96 and cDelta != 112
45 |
46 | def makeForRun(loadC):
47 | """Create random NURand constants for running TPC-C. TPC-C 2.1.6.1. (page 20) specifies the valid range for these constants."""
48 | cRun = rand.number(0, 255)
49 | while validCRun(cRun, loadC.cLast) == False:
50 | cRun = rand.number(0, 255)
51 | assert validCRun(cRun, loadC.cLast)
52 |
53 | cId = rand.number(0, 1023)
54 | orderLineItemId = rand.number(0, 8191)
55 | return NURandC(cRun, cId, orderLineItemId)
56 |
57 | class NURandC:
58 | def __init__(self, cLast, cId, orderLineItemId):
59 | self.cLast = cLast
60 | self.cId = cId
61 | self.orderLineItemId = orderLineItemId
62 |
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/runtime/scaleparameters.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # -----------------------------------------------------------------------
4 | # Copyright (C) 2011
5 | # Andy Pavlo
6 | # http://www.cs.brown.edu/~pavlo/
7 | #
8 | # Original Java Version:
9 | # Copyright (C) 2008
10 | # Evan Jones
11 | # Massachusetts Institute of Technology
12 | #
13 | # Permission is hereby granted, free of charge, to any person obtaining
14 | # a copy of this software and associated documentation files (the
15 | # "Software"), to deal in the Software without restriction, including
16 | # without limitation the rights to use, copy, modify, merge, publish,
17 | # distribute, sublicense, and/or sell copies of the Software, and to
18 | # permit persons to whom the Software is furnished to do so, subject to
19 | # the following conditions:
20 | #
21 | # The above copyright notice and this permission notice shall be
22 | # included in all copies or substantial portions of the Software.
23 | #
24 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
27 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
28 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
29 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
30 | # OTHER DEALINGS IN THE SOFTWARE.
31 | # -----------------------------------------------------------------------
32 |
33 | import constants
34 |
35 | def makeDefault(warehouses):
36 | return ScaleParameters(constants.NUM_ITEMS, \
37 | warehouses, \
38 | constants.DISTRICTS_PER_WAREHOUSE, \
39 | constants.CUSTOMERS_PER_DISTRICT, \
40 | constants.INITIAL_NEW_ORDERS_PER_DISTRICT)
41 | ## DEF
42 |
43 | def makeWithScaleFactor(warehouses, scaleFactor):
44 | items = int(constants.NUM_ITEMS*scaleFactor)
45 | if items <= 0: items = 1
46 | districts = int(max(constants.DISTRICTS_PER_WAREHOUSE, 1))
47 | customers = int(max(constants.CUSTOMERS_PER_DISTRICT*scaleFactor, 1))
48 | newOrders = int(max(constants.INITIAL_NEW_ORDERS_PER_DISTRICT*scaleFactor, 0))
49 |
50 | return ScaleParameters(items, warehouses, districts, customers, newOrders)
51 | ## DEF
52 |
53 | class ScaleParameters:
54 |
55 | def __init__(self, items, warehouses, districtsPerWarehouse, customersPerDistrict, newOrdersPerDistrict):
56 | assert 1 <= items and items <= constants.NUM_ITEMS
57 | self.items = items
58 | assert warehouses > 0
59 | self.warehouses = warehouses
60 | self.starting_warehouse = 1
61 | assert 1 <= districtsPerWarehouse and districtsPerWarehouse <= constants.DISTRICTS_PER_WAREHOUSE
62 | self.districtsPerWarehouse = districtsPerWarehouse
63 | assert 1 <= customersPerDistrict and customersPerDistrict <= constants.CUSTOMERS_PER_DISTRICT
64 | self.customersPerDistrict = customersPerDistrict
65 | assert 0 <= newOrdersPerDistrict and newOrdersPerDistrict <= constants.CUSTOMERS_PER_DISTRICT
66 | assert newOrdersPerDistrict <= constants.INITIAL_NEW_ORDERS_PER_DISTRICT
67 | self.newOrdersPerDistrict = newOrdersPerDistrict
68 | self.ending_warehouse = (self.warehouses + self.starting_warehouse - 1)
69 | ## DEF
70 |
71 | def __str__(self):
72 | out = "%d items\n" % self.items
73 | out += "%d warehouses\n" % self.warehouses
74 | out += "%d districts/warehouse\n" % self.districtsPerWarehouse
75 | out += "%d customers/district\n" % self.customersPerDistrict
76 | out += "%d initial new orders/district" % self.newOrdersPerDistrict
77 | return out
78 | ## DEF
79 |
80 | ## CLASS
--------------------------------------------------------------------------------
/exps/benchmarks/tpcc/tpcccoordinator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # -----------------------------------------------------------------------
4 | # Copyright (C) 2011
5 | # Andy Pavlo & Yang Lu
6 | # http://www.cs.brown.edu/~pavlo/
7 | #
8 | # Permission is hereby granted, free of charge, to any person obtaining
9 | # a copy of this software and associated documentation files (the
10 | # "Software"), to deal in the Software without restriction, including
11 | # without limitation the rights to use, copy, modify, merge, publish,
12 | # distribute, sublicense, and/or sell copies of the Software, and to
13 | # permit persons to whom the Software is furnished to do so, subject to
14 | # the following conditions:
15 | #
16 | # The above copyright notice and this permission notice shall be
17 | # included in all copies or substantial portions of the Software.
18 | #
19 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
22 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25 | # OTHER DEALINGS IN THE SOFTWARE.
26 | # -----------------------------------------------------------------------
27 |
28 | import sys
29 | import os
30 | import string
31 | import re
32 | import glob
33 | import time
34 | import execnet
35 | import logging
36 | from pprint import pprint, pformat
37 |
38 | from api.abstractcoordinator import AbstractCoordinator
39 | from api.message import *
40 |
41 | import drivers
42 | from runtime import scaleparameters
43 |
44 | LOG = logging.getLogger(__name__)
45 |
46 | class TpccCoordinator(AbstractCoordinator) :
47 | DEFAULT_CONFIG = [
48 | ("warehouses", "The number of warehouses to use in the benchmark run", 4),
49 | ("denormalize", "If set to true, then the CUSTOMER data will be denormalized into a single document", True),
50 | ]
51 |
52 | def benchmarkConfigImpl(self):
53 | return self.DEFAULT_CONFIG
54 | ## DEF
55 |
56 | def initImpl(self, config, channels):
57 | ## Create our ScaleParameter stuff that we're going to need
58 | num_warehouses = int(config[self.name]['warehouses'])
59 | self.scaleParameters = scaleparameters.makeWithScaleFactor(num_warehouses, config['default']["scalefactor"])
60 | return dict([(channels[i], None) for i in xrange(len(channels))])
61 | ## DEF
62 |
63 | def loadImpl(self, config, channels) :
64 | '''divide loading to several clients'''
65 | procs = len(channels)
66 | w_ids = map(lambda x:[], range(procs))
67 | for w_id in range(self.scaleParameters.starting_warehouse, self.scaleParameters.ending_warehouse+1):
68 | idx = w_id % procs
69 | w_ids[idx].append(w_id)
70 | messages = dict([(channels[i], w_ids[i]) for i in xrange(procs)])
71 | LOG.debug("TPC-C Load Messages:\n%s", pformat(messages))
72 | return messages
73 | ## DEF
74 |
75 | def executeImpl(self, config, channels):
76 | return None
77 |
78 | ## CLASS
79 |
--------------------------------------------------------------------------------
/exps/tools/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __all__ = ["design_deserializer"]
--------------------------------------------------------------------------------
/exps/tools/design_deserializer.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | import json
4 | import os
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../src/search"))
8 |
9 | from design import Design
10 |
11 | class Deserializer:
12 | def __init__(self, json_string=None):
13 | if not json_string:
14 | self.json_doc = None
15 | else:
16 | self.json_doc = json.loads(json_string)
17 | ## DEF
18 |
19 | def loadDesignFile(self, file_path):
20 | f = open(file_path, 'r')
21 | content = f.read()
22 | f.close()
23 |
24 | self.json_doc = json.loads(content)
25 | ## DEF
26 |
27 | def Deserialize(self):
28 | d = Design()
29 | self.__deserialize__(self.json_doc, d)
30 | return d
31 | ## DEF
32 |
33 | def __deserialize__(self, doc, design):
34 | """
35 | Just populate the given data into a design instance
36 | """
37 | for key, value in doc.iteritems():
38 | design.addCollection(key)
39 | for index in value['indexes']:
40 | design.addIndex(key, index)
41 | design.addShardKey(key, value['shardKeys'])
42 | design.setDenormalizationParent(key, value['denorm'])
43 | ## FOR
44 |
45 | ## CLASS
46 |
--------------------------------------------------------------------------------
/exps/tools/dump-csv.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # -----------------------------------------------------------------------
4 | # Copyright (C) 2012 by Brown University
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 | from __future__ import division
26 | from __future__ import with_statement
27 |
28 | import os, sys
29 | import re
30 | import subprocess
31 |
32 | ## ==============================================
33 | ## main
34 | ## ==============================================
35 | if __name__ == '__main__':
36 | if len(sys.argv) != 2:
37 | raise Exception("ERROR: Missing database name")
38 |
39 | db_name = sys.argv[1]
40 | cmd = "mongo %s --eval 'db.getCollectionNames()'" % db_name
41 | output = subprocess.check_output(cmd, shell=True)
42 |
43 | collections = set()
44 | for line in output.strip().split("\n"):
45 | if line.find("system.indexes") != -1:
46 | map(collections.add, line.split(","))
47 | collections.remove("system.indexes")
48 |
49 | os.mkdir(db_name)
50 | for c in collections:
51 | output = os.path.join(db_name, "%s.json" % c)
52 | cmd = "mongoexport --db %s --collection %s --out %s" % (db_name, c, output)
53 | subprocess.check_call(cmd, shell=True)
54 | print output
55 | ## IF
--------------------------------------------------------------------------------
/exps/tools/duplicator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # -----------------------------------------------------------------------
4 | # Copyright (C) 2012 by Brown University
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 | from __future__ import division
26 | from __future__ import with_statement
27 |
28 | import os, sys
29 | import logging
30 | import random
31 | import re
32 | import string
33 | import json
34 | import glob
35 | import codecs
36 | from pprint import pformat
37 | from ConfigParser import RawConfigParser
38 |
39 | # Third-Party Dependencies
40 | basedir = os.path.realpath(os.path.dirname(__file__))
41 | sys.path.append(os.path.join(basedir, "../../src"))
42 | sys.path.append(os.path.join(basedir, "../../libs"))
43 | import mongokit
44 | import argparse
45 |
46 | # mongodb-d4
47 | import catalog
48 | import workload
49 | from search import Designer
50 | from util import configutil
51 | from util import constants
52 | from util.histogram import Histogram
53 |
54 | logging.basicConfig(
55 | level = logging.INFO,
56 | format="%(asctime)s [%(filename)s:%(lineno)03d] %(levelname)-5s: %(message)s",
57 | datefmt="%m-%d-%Y %H:%M:%S",
58 | stream = sys.stdout
59 | )
60 |
61 | LOG = logging.getLogger(__name__)
62 |
63 | ## ==============================================
64 | ## main
65 | ## ==============================================
66 | if __name__ == '__main__':
67 | aparser = argparse.ArgumentParser(description="CSV File Duplicator")
68 | aparser.add_argument('input', help='CSV Input Data Dump Directory')
69 | aparser.add_argument('output', help='CSV Output Data Dump Directory')
70 | aparser.add_argument('multiplier', type=int, help='Data Duplicator Multiplier')
71 | aparser.add_argument('--debug', action='store_true', help='Enable debug log messages.')
72 | args = vars(aparser.parse_args())
73 | if args['debug']: LOG.setLevel(logging.DEBUG)
74 |
75 | if not os.path.exists(args["output"]):
76 | os.mkdir(args["output"])
77 | for dataFile in glob.glob(os.path.join(args["input"], "*.json")):
78 | newDataFile = os.path.join(args["output"], os.path.basename(dataFile))
79 | with codecs.open(newDataFile, encoding='utf-8', mode='w+') as out:
80 | with codecs.open(dataFile, encoding='utf-8') as fd:
81 | new_ctr = 0
82 | orig_ctr = 0
83 | for line in fd:
84 | try:
85 | row = json.loads(line.encode('utf-8'))
86 | except:
87 | LOG.error(row)
88 | raise
89 | id = row["_id"]["$oid"]
90 | orig_ctr += 1
91 | new_ctr += 1
92 | for i in xrange(args['multiplier']):
93 | # Just update the _id field
94 | new_id = '%04x%s' % (i, id[4:])
95 | # print id, "->", new_id
96 | out.write(line.replace(id, new_id))
97 | new_ctr += 1
98 | ## FOR
99 | ## FOR
100 | ## WITH
101 | LOG.info("DUPLICATED %s -> ORIG:%d / NEW:%d", newDataFile, orig_ctr, new_ctr)
102 | ## WITH
103 | ## FOR
104 |
105 |
106 | ## MAIN
107 |
--------------------------------------------------------------------------------
/exps/tools/load-csv.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # -----------------------------------------------------------------------
4 | # Copyright (C) 2012 by Brown University
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining
7 | # a copy of this software and associated documentation files (the
8 | # "Software"), to deal in the Software without restriction, including
9 | # without limitation the rights to use, copy, modify, merge, publish,
10 | # distribute, sublicense, and/or sell copies of the Software, and to
11 | # permit persons to whom the Software is furnished to do so, subject to
12 | # the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be
15 | # included in all copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
20 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 | # -----------------------------------------------------------------------
25 |
26 | import os, sys
27 | import subprocess
28 | import logging
29 | import glob
30 | from ConfigParser import RawConfigParser
31 |
32 | # Third-Party Dependencies
33 | basedir = os.path.realpath(os.path.dirname(__file__))
34 | sys.path.append(os.path.join(basedir, "../../src"))
35 | sys.path.append(os.path.join(basedir, "../../libs"))
36 | import argparse
37 |
38 | from util import constants
39 | from util import configutil
40 |
41 | logging.basicConfig(
42 | level = logging.INFO,
43 | format="%(asctime)s [%(filename)s:%(lineno)03d] %(levelname)-5s: %(message)s",
44 | datefmt="%m-%d-%Y %H:%M:%S",
45 | stream = sys.stdout
46 | )
47 | LOG = logging.getLogger(__name__)
48 |
49 | ## ==============================================
50 | ## main
51 | ## ==============================================
52 | if __name__ == '__main__':
53 | aparser = argparse.ArgumentParser(description="CSV File Loader")
54 | aparser.add_argument('input', help='CSV Input Data Dump Directory')
55 | aparser.add_argument('--config', type=file, help='Path to %s configuration file' % constants.PROJECT_NAME)
56 | aparser.add_argument('--debug', action='store_true', help='Enable debug log messages.')
57 | args = vars(aparser.parse_args())
58 | if args['debug']: LOG.setLevel(logging.DEBUG)
59 |
60 | if not args['config']:
61 | LOG.error("Missing configuration file")
62 | print
63 | aparser.print_usage()
64 | sys.exit(1)
65 | LOG.debug("Loading configuration file '%s'" % args['config'])
66 | config = RawConfigParser()
67 | configutil.setDefaultValues(config)
68 | config.read(os.path.realpath(args['config'].name))
69 |
70 | db_host = config.get(configutil.SECT_MONGODB, 'host')
71 | db_name = config.get(configutil.SECT_MONGODB, 'dataset_db')
72 | for dataFile in glob.glob(os.path.join(args["input"], "*.json")):
73 | collection = os.path.basename(dataFile).replace(".csv", "")
74 | cmd = "mongoimport --host=%s --db %s --collection %s --file %s --type json" % (db_host, db_name, collection, dataFile)
75 | subprocess.check_call(cmd, shell=True)
76 | LOG.info("Loaded %s.%s", db_name, collection)
77 | ## FOR
78 | ## IF
--------------------------------------------------------------------------------
/libs/argparse/__init__.py:
--------------------------------------------------------------------------------
1 | from argparse import *
--------------------------------------------------------------------------------
/libs/mongokit/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2011, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | __version__ = "0.8.1"
29 |
30 | from bson.dbref import DBRef
31 | from cursor import Cursor
32 | from operators import *
33 | from schema_document import *
34 | from mongo_exceptions import *
35 | from document import Document, ObjectId
36 | from versioned_document import VersionedDocument
37 | from database import Database
38 | from collection import Collection
39 | from connection import Connection
40 | from master_slave_connection import MasterSlaveConnection
41 | from pymongo import ASCENDING as INDEX_ASCENDING,\
42 | DESCENDING as INDEX_DESCENDING,\
43 | ALL as INDEX_ALL,\
44 | GEO2D as INDEX_GEO2D,\
45 | OFF as INDEX_OFF
46 | from migration import DocumentMigration
47 |
48 |
--------------------------------------------------------------------------------
/libs/mongokit/auth.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2011, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | from mongokit import Document
29 | import hashlib, os
30 |
31 | class User(Document):
32 | structure = {
33 | "_id":unicode,
34 | "user":{
35 | "login":unicode,
36 | "password":unicode, # TODO validator
37 | "email":unicode,
38 | }
39 | }
40 | required_fields = ['user.password', 'user.email'] # what if openid ? password is None
41 |
42 | def set_login(self, login):
43 | self['_id'] = login
44 | self['user']['login'] = login
45 |
46 | def get_login(self):
47 | return self['_id']
48 |
49 | def del_login(self):
50 | self['_id'] = None
51 | self['user']['login'] = None
52 |
53 | login = property(get_login, set_login, del_login)
54 |
55 | def set_password(self, password):
56 | """ Hash password on the fly """
57 | if isinstance(password, unicode):
58 | password = password.encode('utf-8')
59 | password_salt = hashlib.sha1(os.urandom(60)).hexdigest()
60 | crypt = hashlib.sha1(password + password_salt).hexdigest()
61 | self['user']['password'] = unicode(password_salt + crypt, 'utf-8')
62 |
63 | def get_password(self):
64 | """ Return the password hashed """
65 | return self['user']['password']
66 |
67 | def del_password(self):
68 | self['user']['password'] = None
69 |
70 | password = property(get_password, set_password, del_password)
71 |
72 | def verify_password(self, password):
73 | """ Check the password against existing credentials """
74 | if isinstance(password, unicode):
75 | password = password.encode('utf-8')
76 | password_salt = self['user']['password'][:40]
77 | crypt_pass = hashlib.sha1(password + password_salt).hexdigest()
78 | if crypt_pass == self['user']['password'][40:]:
79 | return True
80 | else:
81 | return False
82 |
83 | def get_email(self):
84 | return self['user']['email']
85 |
86 | def set_email(self, email):
87 | # TODO check if it's a well formated email
88 | self['user']['email'] = email
89 |
90 | def del_email(self):
91 | self['user']['email'] = None
92 |
93 | email = property(get_email, set_email, del_email)
94 |
95 | def save(self, *args, **kwargs):
96 | assert self['_id'] == self['user']['login']
97 | super(User, self).save(*args, **kwargs)
98 |
--------------------------------------------------------------------------------
/libs/mongokit/cursor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2011, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | from pymongo.cursor import Cursor as PymongoCursor
29 | from collections import deque
30 |
31 | class Cursor(PymongoCursor):
32 | def __init__(self, *args, **kwargs):
33 | self.__wrap = None
34 | if kwargs:
35 | self.__wrap = kwargs.pop('wrap', None)
36 | super(Cursor, self).__init__(*args, **kwargs)
37 |
38 | def next(self):
39 | if self._Cursor__empty:
40 | raise StopIteration
41 | db = self._Cursor__collection.database
42 | if len(self.__data) or self._refresh():
43 | if isinstance(self._Cursor__data, deque):
44 | item = self._Cursor__data.popleft()
45 | else:
46 | item = self._Cursor__data.pop(0)
47 | if self._Cursor__manipulate:
48 | son = db._fix_outgoing(item, self._Cursor__collection)
49 | else:
50 | son = item
51 | if self.__wrap is not None:
52 | return self.__wrap(son, collection=self._Cursor__collection)
53 | else:
54 | return son
55 | else:
56 | raise StopIteration
57 |
58 | def __getitem__(self, index):
59 | obj = super(Cursor, self).__getitem__(index)
60 | if (self.__wrap is not None) and isinstance(obj, dict):
61 | return self.__wrap(obj)
62 | return obj
63 |
--------------------------------------------------------------------------------
/libs/mongokit/database.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2011, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | from pymongo.database import Database as PymongoDatabase
29 | from bson.dbref import DBRef
30 | from mongokit.document import Document
31 | from collection import Collection
32 |
33 | class Database(PymongoDatabase):
34 |
35 | def __init__(self, *args, **kwargs):
36 | self._collections = {}
37 | super(Database, self).__init__(*args, **kwargs)
38 |
39 | def __getattr__(self, key):
40 | if key in self.connection._registered_documents:
41 | document = self.connection._registered_documents[key]
42 | return getattr(self[document.__collection__], key)
43 | else:
44 | if not key in self._collections:
45 | self._collections[key] = Collection(self, key)
46 | return self._collections[key]
47 |
48 | def dereference(self, dbref, model = None):
49 | if model is None:
50 | return super(Database, self).dereference(dbref)
51 | if not isinstance(dbref, DBRef):
52 | raise TypeError("first argument must be a DBRef")
53 | if dbref.database is not None and dbref.database != self.name:
54 | raise ValueError("trying to dereference a DBRef that points to "
55 | "another database (%r not %r)" % (dbref.database, self._Database__name))
56 | if not issubclass(model, Document):
57 | raise TypeError("second argument must be a Document")
58 | return getattr(self[dbref.collection], model.__name__).one({'_id': dbref.id})
59 |
--------------------------------------------------------------------------------
/libs/mongokit/master_slave_connection.py:
--------------------------------------------------------------------------------
1 | """
2 | Master-Slave integration with for MongoKit
3 | Andreas Jung, info@zopyx.com
4 | (same license as Mongokit)
5 | """
6 |
7 | from pymongo.master_slave_connection import MasterSlaveConnection as PymongoMasterSlaveConnection
8 | from pymongo import Connection as PyMongoConnection
9 |
10 | from mongokit.database import Database
11 | from mongokit.connection import CallableMixin, _iterables
12 |
13 | class MasterSlaveConnection(PymongoMasterSlaveConnection):
14 | """ Master-Slave support for MongoKit """
15 |
16 | def __init__(self, master, slaves=[]):
17 | """ The MasterSlaveConnection is a wrapper around the
18 | pymongo.master_slave_connection implementation. The constructor accepts
19 | the connection parameter for the master MongoDB server and a non-empty
20 | list of connection parameters for one or more slaves. The connection
21 | parameters are expressed as a dictionary where the keys match the
22 | signature of the constructor of a standard
23 | pymongo.connection.Connection instance ('host', 'port' etc.). For the
24 | 'slaves' it is not necessary to specify the 'slave_okay' parameter
25 | (will be added internally automatically).
26 |
27 | The purpose of the MasterSlaveConnection is to hide a master-slave
28 | setup with one master and several slave servers. The slave
29 | server(s) will be used for read and write will be made to the
30 | master (and re-synced to the slave automatically as part of the
31 | master-slave setup).
32 | """
33 |
34 | self._databases = {}
35 | self._registered_documents = {}
36 |
37 | # I am the master
38 | if not isinstance(master, dict):
39 | raise TypeError('"master" must be a dict containing pymongo.Connection parameters')
40 | master_connection = PyMongoConnection(**master)
41 |
42 | # You are my dirty slaves
43 | if not slaves:
44 | raise ValueError('You must specify at least one slave connection')
45 |
46 | slave_connections = list()
47 | for slave in slaves:
48 | if not isinstance(slave, dict):
49 | raise TypeError('"slaves" must be list of dicts containing pymongo.Connection parameters')
50 | slave['slave_okay'] = True
51 | slave_connections.append(PyMongoConnection(**slave))
52 |
53 | super(MasterSlaveConnection, self).__init__(master_connection, slave_connections)
54 |
55 | def register(self, obj_list):
56 | decorator = None
57 | if not isinstance(obj_list, _iterables):
58 | # we assume that the user used this as a decorator
59 | # using @register syntax or using conn.register(SomeDoc)
60 | # we stock the class object in order to return it later
61 | decorator = obj_list
62 | obj_list = [obj_list]
63 | # cleanup
64 | for dbname, db in self._databases.items():
65 | for colname, col in db._collections.items():
66 | for docname, doc in col._documents.items():
67 | del col._documents[docname]
68 | for obj_name in [obj.__name__ for obj in obj_list]:
69 | if obj_name in col._registered_documents:
70 | del col._registered_documents[obj_name]
71 | # register
72 | for obj in obj_list:
73 | CallableDocument = type(
74 | "Callable%s" % obj.__name__,
75 | (obj, CallableMixin),
76 | {"_obj_class":obj, "__repr__":object.__repr__}
77 | )
78 | self._registered_documents[obj.__name__] = CallableDocument
79 | # if the class object is stored, it means the user used a decorator and
80 | # we must return the class object
81 | if decorator is not None:
82 | return decorator
83 |
84 | def __getattr__(self, key):
85 | if key not in self._databases:
86 | self._databases[key] = Database(self, key)
87 | return self._databases[key]
88 |
89 |
--------------------------------------------------------------------------------
/libs/mongokit/mongo_exceptions.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2011, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | from bson import InvalidDocument
29 | from pymongo.errors import OperationFailure
30 | class ConnectionError(Exception):pass
31 | class MongoAuthException(Exception):pass
32 | class MultipleResultsFound(Exception):pass
33 | class BadIndexError(Exception):pass
34 | class AutoReferenceError(Exception):pass
35 | class MaxDocumentSizeError(Exception):pass
36 | class OptionConflictError(Exception):pass
37 | class UpdateQueryError(Exception):pass
38 |
--------------------------------------------------------------------------------
/libs/mongokit/operators.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | # Copyright (c) 2009-2010, Nicolas Clairon
4 | # All rights reserved.
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # * Redistributions of source code must retain the above copyright
9 | # notice, this list of conditions and the following disclaimer.
10 | # * Redistributions in binary form must reproduce the above copyright
11 | # notice, this list of conditions and the following disclaimer in the
12 | # documentation and/or other materials provided with the distribution.
13 | # * Neither the name of the University of California, Berkeley nor the
14 | # names of its contributors may be used to endorse or promote products
15 | # derived from this software without specific prior written permission.
16 | #
17 | # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
18 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | # DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
21 | # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 | # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 | # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 | # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | class SchemaOperator(object):
29 | repr = None
30 |
31 | def __init__(self, *args):
32 | assert self.repr is not None
33 | self._operands = list(args)
34 |
35 | def __repr__(self):
36 | return str(self)
37 |
38 | def __iter__(self):
39 | for operand in self._operands:
40 | yield operand
41 |
42 | def __eq__(self, other):
43 | return type(self) == type(other) and self._operands == other._operands
44 |
45 | def validate(self, value):
46 | raise NotImplementedError
47 |
48 | class OR(SchemaOperator):
49 | repr = 'or'
50 |
51 | def __init__(self, *args):
52 | super(OR, self).__init__(*args)
53 |
54 | def __str__(self):
55 | repr = ' %s ' % self.repr
56 | return '<'+ repr.join([i.__name__ for i in self._operands]) + '>'
57 |
58 | def validate(self, value):
59 | if type(value) in self._operands:
60 | return True
61 | return False
62 |
63 | class NOT(SchemaOperator):
64 | repr = 'not'
65 |
66 | def __init__(self, *args):
67 | super(NOT, self).__init__(*args)
68 |
69 | def __str__(self):
70 | repr = ', %s ' % self.repr
71 | return ''
72 |
73 | def validate(self, value):
74 | if type(value) in self._operands:
75 | return False
76 | return True
77 |
78 | class IS(SchemaOperator):
79 | repr = 'is'
80 |
81 | def __init__(self, *args):
82 | super(IS, self).__init__(*args)
83 |
84 | def __str__(self):
85 | representation = ' or %s ' % self.repr
86 | return ''
87 |
88 | def validate(self, value):
89 | if value in self._operands:
90 | for op in self._operands:
91 | if value == op and isinstance(value, type(op)):
92 | return True
93 | return False
94 |
95 |
--------------------------------------------------------------------------------
/libs/sqlparse/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
2 | #
3 | # This module is part of python-sqlparse and is released under
4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
5 |
6 | """Parse SQL statements."""
7 |
8 |
9 | __version__ = '0.1.11'
10 |
11 |
12 | # Setup namespace
13 | from sqlparse import engine
14 | from sqlparse import filters
15 | from sqlparse import formatter
16 |
17 | # Deprecated in 0.1.5. Will be removed in 0.2.0
18 | from sqlparse.exceptions import SQLParseError
19 |
20 |
21 | def parse(sql, encoding=None):
22 | """Parse sql and return a list of statements.
23 |
24 | :param sql: A string containting one or more SQL statements.
25 | :param encoding: The encoding of the statement (optional).
26 | :returns: A tuple of :class:`~sqlparse.sql.Statement` instances.
27 | """
28 | return tuple(parsestream(sql, encoding))
29 |
30 |
31 | def parsestream(stream, encoding=None):
32 | """Parses sql statements from file-like object.
33 |
34 | :param stream: A file-like object.
35 | :param encoding: The encoding of the stream contents (optional).
36 | :returns: A generator of :class:`~sqlparse.sql.Statement` instances.
37 | """
38 | stack = engine.FilterStack()
39 | stack.full_analyze()
40 | return stack.run(stream, encoding)
41 |
42 |
43 | def format(sql, **options):
44 | """Format *sql* according to *options*.
45 |
46 | Available options are documented in :ref:`formatting`.
47 |
48 | In addition to the formatting options this function accepts the
49 | keyword "encoding" which determines the encoding of the statement.
50 |
51 | :returns: The formatted SQL statement as string.
52 | """
53 | encoding = options.pop('encoding', None)
54 | stack = engine.FilterStack()
55 | options = formatter.validate_options(options)
56 | stack = formatter.build_filter_stack(stack, options)
57 | stack.postprocess.append(filters.SerializerUnicode())
58 | return ''.join(stack.run(sql, encoding))
59 |
60 |
61 | def split(sql, encoding=None):
62 | """Split *sql* into single statements.
63 |
64 | :param sql: A string containting one or more SQL statements.
65 | :param encoding: The encoding of the statement (optional).
66 | :returns: A list of strings.
67 | """
68 | stack = engine.FilterStack()
69 | stack.split_statements = True
70 | return [unicode(stmt).strip() for stmt in stack.run(sql, encoding)]
71 |
72 |
73 | from sqlparse.engine.filter import StatementFilter
74 |
75 |
76 | def split2(stream):
77 | splitter = StatementFilter()
78 | return list(splitter.process(None, stream))
79 |
--------------------------------------------------------------------------------
/libs/sqlparse/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
2 | #
3 | # This module is part of python-sqlparse and is released under
4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
5 |
6 | """filter"""
7 |
8 | from sqlparse import lexer
9 | from sqlparse.engine import grouping
10 | from sqlparse.engine.filter import StatementFilter
11 |
12 | # XXX remove this when cleanup is complete
13 | Filter = object
14 |
15 |
16 | class FilterStack(object):
17 |
18 | def __init__(self):
19 | self.preprocess = []
20 | self.stmtprocess = []
21 | self.postprocess = []
22 | self.split_statements = False
23 | self._grouping = False
24 |
25 | def _flatten(self, stream):
26 | for token in stream:
27 | if token.is_group():
28 | for t in self._flatten(token.tokens):
29 | yield t
30 | else:
31 | yield token
32 |
33 | def enable_grouping(self):
34 | self._grouping = True
35 |
36 | def full_analyze(self):
37 | self.enable_grouping()
38 |
39 | def run(self, sql, encoding=None):
40 | stream = lexer.tokenize(sql, encoding)
41 | # Process token stream
42 | if self.preprocess:
43 | for filter_ in self.preprocess:
44 | stream = filter_.process(self, stream)
45 |
46 | if (self.stmtprocess or self.postprocess or self.split_statements
47 | or self._grouping):
48 | splitter = StatementFilter()
49 | stream = splitter.process(self, stream)
50 |
51 | if self._grouping:
52 |
53 | def _group(stream):
54 | for stmt in stream:
55 | grouping.group(stmt)
56 | yield stmt
57 | stream = _group(stream)
58 |
59 | if self.stmtprocess:
60 |
61 | def _run1(stream):
62 | ret = []
63 | for stmt in stream:
64 | for filter_ in self.stmtprocess:
65 | filter_.process(self, stmt)
66 | ret.append(stmt)
67 | return ret
68 | stream = _run1(stream)
69 |
70 | if self.postprocess:
71 |
72 | def _run2(stream):
73 | for stmt in stream:
74 | stmt.tokens = list(self._flatten(stmt.tokens))
75 | for filter_ in self.postprocess:
76 | stmt = filter_.process(self, stmt)
77 | yield stmt
78 | stream = _run2(stream)
79 |
80 | return stream
81 |
--------------------------------------------------------------------------------
/libs/sqlparse/engine/filter.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from sqlparse.sql import Statement, Token
4 | from sqlparse import tokens as T
5 |
6 |
7 | class StatementFilter:
8 | "Filter that split stream at individual statements"
9 |
10 | def __init__(self):
11 | self._in_declare = False
12 | self._in_dbldollar = False
13 | self._is_create = False
14 | self._begin_depth = 0
15 |
16 | def _reset(self):
17 | "Set the filter attributes to its default values"
18 | self._in_declare = False
19 | self._in_dbldollar = False
20 | self._is_create = False
21 | self._begin_depth = 0
22 |
23 | def _change_splitlevel(self, ttype, value):
24 | "Get the new split level (increase, decrease or remain equal)"
25 | # PostgreSQL
26 | if (ttype == T.Name.Builtin
27 | and value.startswith('$') and value.endswith('$')):
28 | if self._in_dbldollar:
29 | self._in_dbldollar = False
30 | return -1
31 | else:
32 | self._in_dbldollar = True
33 | return 1
34 | elif self._in_dbldollar:
35 | return 0
36 |
37 | # ANSI
38 | if ttype not in T.Keyword:
39 | return 0
40 |
41 | unified = value.upper()
42 |
43 | if unified == 'DECLARE' and self._is_create:
44 | self._in_declare = True
45 | return 1
46 |
47 | if unified == 'BEGIN':
48 | self._begin_depth += 1
49 | if self._in_declare or self._is_create:
50 | # FIXME(andi): This makes no sense.
51 | return 1
52 | return 0
53 |
54 | if unified == 'END':
55 | # Should this respect a preceeding BEGIN?
56 | # In CASE ... WHEN ... END this results in a split level -1.
57 | self._begin_depth = max(0, self._begin_depth - 1)
58 | return -1
59 |
60 | if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
61 | self._is_create = True
62 | return 0
63 |
64 | if (unified in ('IF', 'FOR')
65 | and self._is_create and self._begin_depth > 0):
66 | return 1
67 |
68 | # Default
69 | return 0
70 |
71 | def process(self, stack, stream):
72 | "Process the stream"
73 | consume_ws = False
74 | splitlevel = 0
75 | stmt = None
76 | stmt_tokens = []
77 |
78 | # Run over all stream tokens
79 | for ttype, value in stream:
80 | # Yield token if we finished a statement and there's no whitespaces
81 | if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
82 | stmt.tokens = stmt_tokens
83 | yield stmt
84 |
85 | # Reset filter and prepare to process next statement
86 | self._reset()
87 | consume_ws = False
88 | splitlevel = 0
89 | stmt = None
90 |
91 | # Create a new statement if we are not currently in one of them
92 | if stmt is None:
93 | stmt = Statement()
94 | stmt_tokens = []
95 |
96 | # Change current split level (increase, decrease or remain equal)
97 | splitlevel += self._change_splitlevel(ttype, value)
98 |
99 | # Append the token to the current statement
100 | stmt_tokens.append(Token(ttype, value))
101 |
102 | # Check if we get the end of a statement
103 | if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
104 | consume_ws = True
105 |
106 | # Yield pending statement (if any)
107 | if stmt is not None:
108 | stmt.tokens = stmt_tokens
109 | yield stmt
110 |
--------------------------------------------------------------------------------
/libs/sqlparse/exceptions.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2012 Andi Albrecht, albrecht.andi@gmail.com
2 | #
3 | # This module is part of python-sqlparse and is released under
4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
5 |
6 | """Exceptions used in this package."""
7 |
8 |
9 | class SQLParseError(Exception):
10 | """Base class for exceptions in this module."""
11 |
--------------------------------------------------------------------------------
/libs/sqlparse/functions.py:
--------------------------------------------------------------------------------
1 | '''
2 | Created on 17/05/2012
3 |
4 | @author: piranna
5 |
6 | Several utility functions to extract info from the SQL sentences
7 | '''
8 |
9 | from sqlparse.filters import ColumnsSelect, Limit
10 | from sqlparse.pipeline import Pipeline
11 | from sqlparse.tokens import Keyword, Whitespace
12 |
13 |
14 | def getlimit(stream):
15 | """Function that return the LIMIT of a input SQL """
16 | pipe = Pipeline()
17 |
18 | pipe.append(Limit())
19 |
20 | result = pipe(stream)
21 | try:
22 | return int(result)
23 | except ValueError:
24 | return result
25 |
26 |
27 | def getcolumns(stream):
28 | """Function that return the colums of a SELECT query"""
29 | pipe = Pipeline()
30 |
31 | pipe.append(ColumnsSelect())
32 |
33 | return pipe(stream)
34 |
35 |
36 | class IsType(object):
37 | """Functor that return is the statement is of a specific type"""
38 | def __init__(self, type):
39 | self.type = type
40 |
41 | def __call__(self, stream):
42 | for token_type, value in stream:
43 | if token_type not in Whitespace:
44 | return token_type in Keyword and value == self.type
45 |
--------------------------------------------------------------------------------
/libs/sqlparse/pipeline.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2011 Jesus Leganes "piranna", piranna@gmail.com
2 | #
3 | # This module is part of python-sqlparse and is released under
4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
5 |
6 | from types import GeneratorType
7 |
8 |
9 | class Pipeline(list):
10 | """Pipeline to process filters sequentially"""
11 |
12 | def __call__(self, stream):
13 | """Run the pipeline
14 |
15 | Return a static (non generator) version of the result
16 | """
17 |
18 | # Run the stream over all the filters on the pipeline
19 | for filter in self:
20 | # Functions and callable objects (objects with '__call__' method)
21 | if callable(filter):
22 | stream = filter(stream)
23 |
24 | # Normal filters (objects with 'process' method)
25 | else:
26 | stream = filter.process(None, stream)
27 |
28 | # If last filter return a generator, staticalize it inside a list
29 | if isinstance(stream, GeneratorType):
30 | return list(stream)
31 | return stream
32 |
--------------------------------------------------------------------------------
/libs/sqlparse/tokens.py:
--------------------------------------------------------------------------------
1 | # Copyright (C) 2008 Andi Albrecht, albrecht.andi@gmail.com
2 | #
3 | # This module is part of python-sqlparse and is released under
4 | # the BSD License: http://www.opensource.org/licenses/bsd-license.php.
5 |
6 | # The Token implementation is based on pygment's token system written
7 | # by Georg Brandl.
8 | # http://pygments.org/
9 |
10 | """Tokens"""
11 |
12 |
13 | class _TokenType(tuple):
14 | parent = None
15 |
16 | def split(self):
17 | buf = []
18 | node = self
19 | while node is not None:
20 | buf.append(node)
21 | node = node.parent
22 | buf.reverse()
23 | return buf
24 |
25 | def __contains__(self, val):
26 | return val is not None and (self is val or val[:len(self)] == self)
27 |
28 | def __getattr__(self, val):
29 | if not val or not val[0].isupper():
30 | return tuple.__getattribute__(self, val)
31 | new = _TokenType(self + (val,))
32 | setattr(self, val, new)
33 | new.parent = self
34 | return new
35 |
36 | def __hash__(self):
37 | return hash(tuple(self))
38 |
39 | def __repr__(self):
40 | return 'Token' + (self and '.' or '') + '.'.join(self)
41 |
42 |
43 | Token = _TokenType()
44 |
45 | # Special token types
46 | Text = Token.Text
47 | Whitespace = Text.Whitespace
48 | Newline = Whitespace.Newline
49 | Error = Token.Error
50 | # Text that doesn't belong to this lexer (e.g. HTML in PHP)
51 | Other = Token.Other
52 |
53 | # Common token types for source code
54 | Keyword = Token.Keyword
55 | Name = Token.Name
56 | Literal = Token.Literal
57 | String = Literal.String
58 | Number = Literal.Number
59 | Punctuation = Token.Punctuation
60 | Operator = Token.Operator
61 | Comparison = Operator.Comparison
62 | Wildcard = Token.Wildcard
63 | Comment = Token.Comment
64 | Assignment = Token.Assignement
65 |
66 | # Generic types for non-source code
67 | Generic = Token.Generic
68 |
69 | # String and some others are not direct childs of Token.
70 | # alias them:
71 | Token.Token = Token
72 | Token.String = String
73 | Token.Number = Number
74 |
75 | # SQL specific tokens
76 | DML = Keyword.DML
77 | DDL = Keyword.DDL
78 | Command = Keyword.Command
79 |
80 | Group = Token.Group
81 | Group.Parenthesis = Token.Group.Parenthesis
82 | Group.Comment = Token.Group.Comment
83 | Group.Where = Token.Group.Where
84 |
--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | ## Setup
2 |
3 | 1. Create a default configuration file that you will use for your application:
4 |
5 | ./d4.py --print-config > application.config
6 |
7 | 2. Edit the settings in this configuration file according to your local environment.
8 |
9 |
10 | ## MongoDB Example
11 |
12 | 1. Execute [mongosniff](http://www.mongodb.org/display/DOCS/mongosniff) on your application server to collect
13 | a workload trace of operations executed on the MongoDB server. You can pipe this into a file for later processing.
14 |
15 | mongosniff --source NET lo | gzip --best > sniff.out.gz
16 |
17 | 2. Load this mongosniff workload trace from into **D4**'s internal catalog
18 |
19 | gunzip -c sniff.out.gz | ./d4.py --config=application.config --reset --no-search
20 |
21 | The *--reset* flag will erase all of the metadata that may exist in the catalog database in target MongoDB.
22 | This does not modify your application's database.
23 | The *--no-search* flag will cause **D4** to halt the program immediately after processing the workload trace.
24 |
25 | If you are just testing and do not want to process the entire workload trace file, you can use the *--sess-limit* and *--op-limit* options to limit the number of records processed. For example, the following command will halt loading after processing 1000 new Sessions from the trace:
26 |
27 | gunzip -c sniff.out.gz | ./d4.py --config=application.config --reset --no-search --sess-limit=1000
28 |
29 | 3. Now execute the search algorithm to find the optimal design. Note that we use the *--no-load* option and
30 | exclude the *--reset* option because we will use the workload that was loaded in the previous step:
31 |
32 | ./d4.py --config=application.config --no-load
33 |
34 | TODO: Need to discuss how to use an existing MongoDB design in **D4** to check whether there is better configuration.
35 |
36 | TODO: Need to discuss how to enable the debug log and where to report issues.
37 |
38 | ## MySQL Example
39 | *To be written*
40 |
41 |
--------------------------------------------------------------------------------
/src/catalog/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../libs"))
7 |
8 | from utilmethods import *
9 | del utilmethods
10 |
11 | from collection import Collection
12 | del collection
--------------------------------------------------------------------------------
/src/costmodel/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../libs"))
7 | sys.path.append(os.path.join(basedir, "../.."))
8 |
9 | from abstractcostcomponent import AbstractCostComponent
10 | from costmodel import CostModel
11 | from nodeestimator import NodeEstimator
--------------------------------------------------------------------------------
/src/costmodel/abstractcostcomponent.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012 by Brown University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining
6 | # a copy of this software and associated documentation files (the
7 | # "Software"), to deal in the Software without restriction, including
8 | # without limitation the rights to use, copy, modify, merge, publish,
9 | # distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so, subject to
11 | # the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
19 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | # OTHER DEALINGS IN THE SOFTWARE.
23 | # -----------------------------------------------------------------------
24 |
25 | import logging
26 |
27 | LOG = logging.getLogger(__name__)
28 |
29 | ## ==============================================
30 | ## Abstract Cost Model Component
31 | ## ==============================================
32 | class AbstractCostComponent():
33 |
34 | def __init__(self, state):
35 | self.state = state
36 | self.debug = LOG.isEnabledFor(logging.DEBUG)
37 | self.lastDesign = None
38 | ## DEF
39 |
40 | def getCost(self, design, num_nodes=None):
41 | cost = self.getCostImpl(design, num_nodes)
42 | self.lastDesign = design
43 | return (cost)
44 | ## DEF
45 |
46 | def getCostImpl(self, design, num_nodes=None):
47 | raise NotImplementedError("Unimplemented %s.getCostImpl()" % self.__init__.im_class)
48 |
49 | def invalidateCache(self, newDesign, col_name):
50 | """Optional callback for when the cost model needs to invalidate a collection's cache"""
51 | pass
52 |
53 | def reset(self):
54 | """Optional callback for when the cost model needs to reset itself"""
55 | pass
56 |
57 | def finish(self):
58 | """Optional callback for when the cost model is finished a round"""
59 | pass
60 |
61 | ## CLASS
--------------------------------------------------------------------------------
/src/costmodel/disk/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from diskcostcomponent import DiskCostComponent
3 |
--------------------------------------------------------------------------------
/src/costmodel/network/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from networkcostcomponent import NetworkCostComponent
--------------------------------------------------------------------------------
/src/costmodel/skew/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from skewcostcomponent import SkewCostComponent
3 |
--------------------------------------------------------------------------------
/src/inputs/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'pavlo'
2 |
--------------------------------------------------------------------------------
/src/inputs/mongodb/README:
--------------------------------------------------------------------------------
1 | parse.py
2 | ----------------
3 | parses mongo sniff trace and stores 'workload' (list of sessions with their operations) in a mongo db
4 |
5 | recreate.py
6 | ----------------
7 | recreates the sample database from the 'workload' and stores it in mongo
8 |
9 | schema.py
10 | ----------------
11 | inferes the schmea catalog from the 'recreated' and stores it in mongo
12 |
13 |
14 |
15 | ---------------------------------
16 |
17 | Collecting samples on OSX:
18 |
19 | sudo /Applications/mongodb/bin/mongosniff --source NET lo0 | ../sanitizer/anonymize.py 0 > sample1.txt
--------------------------------------------------------------------------------
/src/inputs/mongodb/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'pavlo'
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../../libs"))
7 | sys.path.append(os.path.join(basedir, ".."))
8 |
9 | from abstractconverter import AbstractConverter
10 | from mongosniffconverter import MongoSniffConverter
--------------------------------------------------------------------------------
/src/inputs/mongodb/salt_crack.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | sys.path.append("../sanitizer")
4 | import anonymize # just for hash_string()
5 |
6 |
7 | expected_plain = "\"drivers\""
8 | expected_hash = "c9f685688b90e80b8055ef9f1d72b7ce/9"
9 | salt = 0
10 | while True:
11 | hash = anonymize.hash_string(expected_plain, salt)
12 | print "salt ", salt, ": ", hash
13 | if hash == expected_hash:
14 | print "FOUND SALT: ", salt
15 | salt += 1
16 | print "Done."
--------------------------------------------------------------------------------
/src/inputs/mongodb/samplecreator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | import fileinput
4 | import hashlib
5 | import time
6 | import re
7 | import argparse
8 | import yaml
9 | import json
10 | import logging
11 | from pymongo import Connection
12 | import random
13 | import string
14 |
15 | sys.path.append("../workload")
16 | from traces import *
17 |
18 | logging.basicConfig(level = logging.INFO,
19 | format="%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s",
20 | datefmt="%m-%d-%Y %H:%M:%S",
21 | stream = sys.stdout)
22 | LOG = logging.getLogger(__name__)
23 |
24 | ### DEFAULT VALUES
25 | ### you can specify these with args
26 | TARGET_DB = "sample_db"
27 | DEFAULT_HOST = "localhost"
28 | DEFAULT_PORT = "27017"
29 |
30 | #GLOBAL vars
31 | target_db = None
32 | connection = None
33 |
34 |
35 |
36 |
37 |
38 | def initDB(hostname, port, t_db):
39 | global connection
40 | global target_db
41 |
42 | # Initialize connection to db that stores raw transactions
43 | connection = Connection(hostname, port)
44 | target_db = connection[t_db]
45 |
46 | return
47 |
48 | def getRandomString(l):
49 | return "".join(random.sample(string.letters+string.digits, l))
50 |
51 |
52 | def getRandomUser():
53 | return {"first": getRandomString(8), "last": getRandomString(8), "address": {"street": getRandomString(8), "list": [getRandomString(2), getRandomString(2), getRandomString(2)]}}
54 |
55 | def getRandomArticle():
56 | return {"Title": getRandomString(20), "author": getRandomString(8), "text": getRandomString(30)}
57 |
58 | def populate():
59 | #sanity check
60 | users = []
61 | users.append({"first": "Emanuel", "last": "Buzek", "address": {"street": "Wix", "list": ["a", "b", "c"]}})
62 | users.append({"first": "Andy", "last": "Pavlo", "address": {"street": "Brown", "list": ["1", "2", "3"]}})
63 | users.append({"first": "Delete_me", "last": "XXX", "address": {"street": "homeless", "list": ["1", "2", "3"]}})
64 | #add a bunch of other users...
65 | for i in range(20):
66 | users.append(getRandomUser())
67 | target_db.users.insert(users)
68 |
69 |
70 | articles = []
71 | articles.append({"Title": "Why We Should Ban Religion And Kill The Pope", "author": "Buzek", "text": "Read online on www.fuckreligion.org"})
72 | articles.append({"Title": "Blah blah blah", "author": "Pavlo", "text": "Database bullshit"})
73 | for i in range(5):
74 | articles.append(getRandomArticle())
75 | target_db.articles.insert(articles)
76 |
77 | print("Done.")
78 |
79 |
80 |
81 |
82 | def clear():
83 | target_db.users.remove()
84 | target_db.articles.remove()
85 |
86 |
87 | def test():
88 | populate()
89 |
90 | target_db.users.find_one()
91 |
92 | #get the count of all articles
93 | target_db.articles.find().count()
94 |
95 | #delete one article
96 | target_db.users.remove({'first': 'Delete_me'})
97 |
98 | #update
99 | target_db.users.update({'last': 'Buzek'}, {'first': 'Ema'}, True, True)
100 |
101 | #retrieve all articles
102 | target_db.articles.find()
103 |
104 | def main():
105 | aparser = argparse.ArgumentParser(description='Sample Creator')
106 | aparser.add_argument('--host',
107 | help='hostname of machine running mongo server', default=DEFAULT_HOST)
108 | aparser.add_argument('--port', type=int,
109 | help='port to connect to', default=DEFAULT_PORT)
110 | aparser.add_argument('--target_db', help='db for the sample data', default=TARGET_DB)
111 |
112 | args = vars(aparser.parse_args())
113 |
114 | LOG.info("..:: Sample Creator ::..")
115 |
116 | settings = "host: ", args['host'], " port: ", args['port'], " target_db: ", args['target_db']
117 | LOG.info(settings)
118 |
119 | initDB(args['host'], args['port'], args['target_db'])
120 |
121 | clear()
122 |
123 | test()
124 |
125 |
126 | return
127 |
128 | if __name__ == '__main__':
129 | main()
130 |
131 |
132 |
133 |
134 |
--------------------------------------------------------------------------------
/src/inputs/mysql/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../../libs"))
7 | sys.path.append(os.path.join(basedir, ".."))
8 |
9 | from abstractconverter import AbstractConverter
10 | from mysqlconverter import MySQLConverter
11 | from sql2mongo import Sql2Mongo
12 |
--------------------------------------------------------------------------------
/src/inputs/mysql/utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import socket
4 |
5 | '''
6 | Convert the user host field of the MySQL query log trace to extract the IP
7 | address for addition to the session object
8 | '''
9 | def stripIPtoUnicode(sql_string) :
10 | l = sql_string.rfind('[') + 1;
11 | r = sql_string.rfind(']');
12 | ip = sql_string[l:r]
13 | if (ip == '') :
14 | return u'127.0.0.1'
15 | else :
16 | return unicode(ip)
17 | ## ENDIF
18 | ## ENDDEF
19 |
20 | '''
21 | Detect the host IP address
22 | '''
23 | def detectHostIP() :
24 | return unicode(socket.gethostbyname(socket.gethostname()))
25 | ## ENDDEF
26 |
--------------------------------------------------------------------------------
/src/multithreaded/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from multi_search import *
4 | from messageprocessor import *
--------------------------------------------------------------------------------
/src/multithreaded/messageprocessor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2011
4 | # Yang Lu
5 | # http://www.cs.brown.edu/~yanglu/
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining
8 | # a copy of this software and associated documentation files (the
9 | # "Software"), to deal in the Software without restriction, including
10 | # without limitation the rights to use, copy, modify, merge, publish,
11 | # distribute, sublicense, and/or sell copies of the Software, and to
12 | # permit persons to whom the Software is furnished to do so, subject to
13 | # the following conditions:
14 | #
15 | # The above copyright notice and this permission notice shall be
16 | # included in all copies or substantial portions of the Software.
17 | #
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
21 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24 | # OTHER DEALINGS IN THE SOFTWARE.
25 | # -----------------------------------------------------------------------
26 | import os
27 | import sys
28 | import logging
29 |
30 | basedir = os.path.realpath(os.path.dirname(__file__))
31 | sys.path.append(os.path.join(basedir, ".."))
32 |
33 | from message import *
34 | from pprint import pprint, pformat
35 | from multi_search_worker import Worker
36 | from ConfigParser import RawConfigParser
37 |
38 | LOG = logging.getLogger(__name__)
39 |
40 | class MessageProcessor:
41 | ''' Message Processor'''
42 | def __init__(self, channel):
43 | self.channel = channel
44 | self.worker = None
45 | self.config = None
46 | self.benchmark = None
47 |
48 | def processMessage(self):
49 | '''Main loop'''
50 | for item in self.channel:
51 | msg = getMessage(item)
52 | LOG.info("Incoming Message: %s" % getMessageName(msg.header))
53 |
54 | # MSG_CMD_INIT
55 | if msg.header == MSG_CMD_INIT:
56 | self.worker = Worker(msg.data[0], msg.data[1], self.channel, msg.data[2])
57 |
58 | elif msg.header == MSG_CMD_LOAD_DB:
59 | self.worker.load()
60 | # MSG_CMD_EXECUTE
61 | # Tells the worker thread to begin the search process
62 | # This will only occur once all of the threads complete the
63 | # EXECUTE_INIT phase.
64 | elif msg.header == MSG_CMD_EXECUTE:
65 | self.worker.execute(msg.data[0], msg.data[1])
66 |
67 | # MSG_CMD_UPDATE_BEST_COST
68 | # update the best cost of the current client
69 | elif msg.header == MSG_CMD_UPDATE_BEST_COST:
70 | self.worker.update(msg.data)
71 |
72 | # MSG_CMD_STOP
73 | # Tells the worker thread to halt the benchmark
74 | elif msg.header == MSG_CMD_STOP:
75 | # TODO
76 | pass
77 |
78 | # MSG_NOOP
79 | # A empty command that does not return the worker thread to return
80 | # a response. I forget why we have this...
81 | elif msg.header == MSG_NOOP:
82 | pass
83 | else:
84 | assert msg.header in MSG_NAME_MAPPING
85 | LOG.warn("Unexpected message type: %s", MSG_NAME_MAPPING[msg.header])
86 | return
87 | ## DEF
88 | ## CLASS
89 |
--------------------------------------------------------------------------------
/src/multithreaded/multi_search.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import sys
4 | import logging
5 | import execnet
6 |
7 | # Third-Party Dependencies
8 | # Remote execnet invocations won't have a __file__
9 | basedir = os.getcwd()
10 | sys.path.append(os.path.join(basedir, ".."))
11 | sys.path.append(os.path.join(basedir, "../search"))
12 |
13 | from search.designer import Designer
14 | from multi_search_coordinator import Coordinator
15 | from util import configutil
16 |
17 | LOG = logging.getLogger(__name__)
18 |
19 | class MultiClientDesigner:
20 | """
21 | This is the multithreaded version of LNS search
22 | """
23 | def __init__(self, config, args):
24 | self.config = config
25 | self.args = args # ONLY USED FOR Designer.setOptionsFromArguments: Comment: this is a weired method
26 | self.coordinator = Coordinator()
27 | self.channels = None
28 | ## DEF
29 |
30 | def runSearch(self):
31 | self.channels = self.createChannels()
32 |
33 | # Step 1: Initialize all of the Workers on the client nodes
34 | self.coordinator.init(self.config, self.channels, self.args)
35 |
36 | # Step 2: Execute search
37 | self.coordinator.execute()
38 | ## DEF
39 |
40 | def createChannels(self):
41 | '''Create a list of channels used for communication between coordinator and worker'''
42 | num_clients = self.config.getint(configutil.SECT_MULTI_SEARCH, 'num_clients')
43 | LOG.info("Starting LNS search on %d clients" % num_clients)
44 |
45 | import d4
46 | remoteCall = d4
47 | channels=[]
48 |
49 | # create channels to client nodes
50 | for i in xrange(num_clients):
51 | gw = execnet.makegateway("popen//id=sub"+str(i))
52 | ch = gw.remote_exec(remoteCall)
53 | channels.append(ch)
54 | ## FOR (hosts)
55 |
56 | LOG.debug(channels)
57 | return channels
58 | ## DEF
59 |
60 | ## CLASS
--------------------------------------------------------------------------------
/src/multithreaded/multi_search_worker.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | basedir = os.path.realpath(os.path.dirname(__file__))
5 | sys.path.append(os.path.join(basedir, ".."))
6 | sys.path.append(os.path.join(basedir, "../search"))
7 |
8 | from search.designer import Designer
9 | from util import configutil
10 | from message import *
11 |
12 | import catalog
13 | import workload
14 | import mongokit
15 |
16 | import logging
17 | LOG = logging.getLogger(__name__)
18 |
19 | class Worker:
20 | def __init__(self, config, args, channel, worker_id):
21 | self.config = config
22 | self.channel = channel
23 | self.args = args
24 | self.designer = None
25 | self.bestLock = None
26 | self.worker_id = worker_id
27 |
28 | sendMessage(MSG_INIT_COMPLETED, self.worker_id, self.channel)
29 | ## DEF
30 |
31 | def load(self):
32 | """
33 | Load data from mongodb
34 | """
35 | self.designer = self.establishConnection(self.config, self.args, self.channel)
36 | initialCost, initialDesign = self.designer.load()
37 | sendMessage(MSG_INITIAL_DESIGN, (initialCost, initialDesign, self.worker_id), self.channel)
38 | ## DEF
39 |
40 | def execute(self, initialCost, initialDesign):
41 | """
42 | Run LNS/BB search and inform the coordinator once getting a new best design
43 | """
44 | sendMessage(MSG_START_SEARCHING, self.worker_id, self.channel)
45 | self.designer.search(initialCost, initialDesign, self.worker_id)
46 | ## DEF
47 |
48 | def update(self, data):
49 | bestCost = data[0]
50 | bestDesign = data[1]
51 |
52 | self.designer.search_method.bbsearch_method.updateBest(bestCost, bestDesign)
53 | sendMessage(MSG_FINISHED_UPDATE, self.worker_id, self.channel)
54 | ## DEF
55 |
56 | def establishConnection(self, config, args, channel):
57 | ## ----------------------------------------------
58 | ## Connect to MongoDB
59 | ## ----------------------------------------------
60 | hostname = config.get(configutil.SECT_MONGODB, 'host')
61 | port = config.getint(configutil.SECT_MONGODB, 'port')
62 | assert hostname
63 | assert port
64 | try:
65 | conn = mongokit.Connection(host=hostname, port=port)
66 | except:
67 | LOG.error("Failed to connect to MongoDB at %s:%s" % (hostname, port))
68 | raise
69 | ## Register our objects with MongoKit
70 | conn.register([ catalog.Collection, workload.Session ])
71 |
72 | ## Make sure that the databases that we need are there
73 | db_names = conn.database_names()
74 | for key in [ 'dataset_db', ]: # FIXME 'workload_db' ]:
75 | if not config.has_option(configutil.SECT_MONGODB, key):
76 | raise Exception("Missing the configuration option '%s.%s'" % (configutil.SECT_MONGODB, key))
77 | elif not config.get(configutil.SECT_MONGODB, key):
78 | raise Exception("Empty configuration option '%s.%s'" % (configutil.SECT_MONGODB, key))
79 | ## FOR
80 |
81 | metadata_db = conn[config.get(configutil.SECT_MONGODB, 'metadata_db')]
82 | dataset_db = conn[config.get(configutil.SECT_MONGODB, 'dataset_db')]
83 |
84 | designer = Designer(config, metadata_db, dataset_db, channel)
85 | designer.setOptionsFromArguments(args)
86 |
87 | return designer
88 | ## DEF
89 |
90 | ## CLASS
--------------------------------------------------------------------------------
/src/sanitizer/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../libs"))
7 |
--------------------------------------------------------------------------------
/src/sanitizer/anonymized-sample.txt:
--------------------------------------------------------------------------------
1 | sniffing... 27017
2 | 1335807341.9623771 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.col 83 bytes id:42ead4c2 1122686146
3 | insert: { _id: ObjectId('4f9ecd6dc4fa803676735bb7'), check: 662174a690c0493f30a33bc344d454c9/22 }
4 | 1335807341.9625001 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.$cmd 76 bytes id:42ead4c3 1122686147
5 | query: { getlasterror: 1.0, w: 1.0 } ntoreturn: -1 ntoskip: 0
6 | 1335807341.9625461 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 94 bytes id:28b5a168 682991976 - 1122686147
7 | reply n:1 cursorId: 0
8 | { n: 0, connectionId: 5, wtime: 0, err: null, ok: 1.0 }
9 | 1335807341.9625919 - 127.0.0.1:53780 -->> 127.0.0.1:27017 admin.$cmd 80 bytes id:42ead4c4 1122686148
10 | query: { replSetGetStatus: 1, forShell: 1 } ntoreturn: 1 ntoskip: 0
11 | 1335807341.9626341 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 92 bytes id:28b5a169 682991977 - 1122686148
12 | reply n:1 cursorId: 0
13 | { errmsg: 2e772a67d7c6cb78973d3eb496d282eb/28, ok: 0.0 }
14 | 1335807352.9636409 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.col 102 bytes id:42ead4c5 1122686149
15 | insert: { _id: ObjectId('4f9ecd78c4fa803676735bb8'), string-key: 610f7a015f985f0120ab21a8450fa162/36 }
16 | 1335807352.9637721 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.$cmd 76 bytes id:42ead4c6 1122686150
17 | query: { getlasterror: 1.0, w: 1.0 } ntoreturn: -1 ntoskip: 0
18 | 1335807352.9638169 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 94 bytes id:28b5a16a 682991978 - 1122686150
19 | reply n:1 cursorId: 0
20 | { n: 0, connectionId: 5, wtime: 0, err: null, ok: 1.0 }
21 | 1335807352.9638691 - 127.0.0.1:53780 -->> 127.0.0.1:27017 admin.$cmd 80 bytes id:42ead4c7 1122686151
22 | query: { replSetGetStatus: 1, forShell: 1 } ntoreturn: 1 ntoskip: 0
23 | 1335807352.9639249 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 92 bytes id:28b5a16b 682991979 - 1122686151
24 | reply n:1 cursorId: 0
25 | { errmsg: 2e772a67d7c6cb78973d3eb496d282eb/28, ok: 0.0 }
26 | 1335807361.9646549 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.col 95 bytes id:42ead4c8 1122686152
27 | insert: { _id: ObjectId('4f9ecd80c4fa803676735bb9'), 123: 610f7a015f985f0120ab21a8450fa162/36 }
28 | 1335807361.9647429 - 127.0.0.1:53780 -->> 127.0.0.1:27017 fuck.$cmd 76 bytes id:42ead4c9 1122686153
29 | query: { getlasterror: 1.0, w: 1.0 } ntoreturn: -1 ntoskip: 0
30 | 1335807361.964772 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 94 bytes id:28b5a16c 682991980 - 1122686153
31 | reply n:1 cursorId: 0
32 | { n: 0, connectionId: 5, wtime: 0, err: null, ok: 1.0 }
33 | 1335807361.964807 - 127.0.0.1:53780 -->> 127.0.0.1:27017 admin.$cmd 80 bytes id:42ead4ca 1122686154
34 | query: { replSetGetStatus: 1, forShell: 1 } ntoreturn: 1 ntoskip: 0
35 | 1335807361.964834 - 127.0.0.1:27017 <<-- 127.0.0.1:53780 92 bytes id:28b5a16d 682991981 - 1122686154
36 | reply n:1 cursorId: 0
37 | { errmsg: 2e772a67d7c6cb78973d3eb496d282eb/28, ok: 0.0 }
38 |
--------------------------------------------------------------------------------
/src/sanitizer/out.txt:
--------------------------------------------------------------------------------
1 | sniffing... 27017
2 | 1316312038.809952 - 127.0.0.1:50923 -->> 127.0.0.1:27017 test.$cmd 79 bytes id:5514ea16 1427434006
3 | query: { create: 626df501270f676cbe6ca9967587ccdb/7, capped: undefined, size: undefined, max: undefined } ntoreturn: -1 ntoskip: 0
4 | 1316312038.810056 - 127.0.0.1:27017 <<-- 127.0.0.1:50923 91 bytes id:52acceff 1387056895 - 1427434006
5 | reply n:1 cursorId: 0
6 | { errmsg: "collection already exists", ok: 0.0 }
7 | 1316312038.8101029 - 127.0.0.1:50923 -->> 127.0.0.1:27017 admin.$cmd 80 bytes id:5514ea17 1427434007
8 | query: { replSetGetStatus: 1, forShell: 1 } ntoreturn: 1 ntoskip: 0
9 | 1316312038.8101411 - 127.0.0.1:27017 <<-- 127.0.0.1:50923 92 bytes id:52accf00 1387056896 - 1427434007
10 | reply n:1 cursorId: 0
11 | { errmsg: "not running with --replSet", ok: 0.0 }
12 |
--------------------------------------------------------------------------------
/src/sanitizer/sample-anonymize.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cmu-db/mongodb-d4/e33cd9a5d5d15d842895909cd0a9f804d4b7a975/src/sanitizer/sample-anonymize.txt
--------------------------------------------------------------------------------
/src/sanitizer/sample.dat:
--------------------------------------------------------------------------------
1 | sniffing... 27017
2 | 127.0.0.1:50923 -->> 127.0.0.1:27017 test.$cmd 79 bytes id:5514ea16 1427434006
3 | query: { create: "aaaa", capped: undefined, size: undefined, max: undefined } ntoreturn: -1 ntoskip: 0
4 | 127.0.0.1:27017 <<-- 127.0.0.1:50923 91 bytes id:52acceff 1387056895 - 1427434006
5 | reply n:1 cursorId: 0
6 | { errmsg: "collection already exists", ok: 0.0 }
7 | 127.0.0.1:50923 -->> 127.0.0.1:27017 admin.$cmd 80 bytes id:5514ea17 1427434007
8 | query: { replSetGetStatus: 1, forShell: 1 } ntoreturn: 1 ntoskip: 0
9 | 127.0.0.1:27017 <<-- 127.0.0.1:50923 92 bytes id:52accf00 1387056896 - 1427434007
10 | reply n:1 cursorId: 0
11 | { errmsg: "not running with --replSet", ok: 0.0 }
12 |
--------------------------------------------------------------------------------
/src/search/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../libs"))
7 |
8 | from designcandidates import DesignCandidates
9 | from design import Design
10 | #from designer import Designer
11 | from utilmethods import *
12 |
13 | # Designer Algorithms
14 | from initialdesigner import InitialDesigner
15 | from randomdesigner import RandomDesigner
16 | from lnsdesigner import LNSDesigner
17 |
--------------------------------------------------------------------------------
/src/search/abstractdesigner.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012 by Brown University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining
6 | # a copy of this software and associated documentation files (the
7 | # "Software"), to deal in the Software without restriction, including
8 | # without limitation the rights to use, copy, modify, merge, publish,
9 | # distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so, subject to
11 | # the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
19 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | # OTHER DEALINGS IN THE SOFTWARE.
23 | # -----------------------------------------------------------------------
24 |
25 | import logging
26 | from threading import Thread
27 |
28 | LOG = logging.getLogger(__name__)
29 |
30 | ## ==============================================
31 | ## Abstract Designer
32 | ## ==============================================
33 | class AbstractDesigner(Thread):
34 |
35 | def __init__(self, collections, workload, config):
36 | Thread.__init__(self)
37 | assert isinstance(collections, dict)
38 | assert not workload is None
39 | #assert not config is None
40 |
41 | self.collections = collections
42 | self.workload = workload
43 | self.config = config
44 | self.debug = LOG.isEnabledFor(logging.DEBUG)
45 | ## DEF
46 |
47 | def generate(self):
48 | raise NotImplementedError("Unimplemented %s.generate()" % self.__init__.im_class)
49 |
50 | def run(self):
51 | pass
52 | ## CLASS
--------------------------------------------------------------------------------
/src/search/designcandidates.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from pprint import pformat
4 |
5 | ## ==============================================
6 | ## DesignCandidates
7 | ## ==============================================
8 | '''
9 | An instance of this class is given to the BBSearch.
10 | It basically defines the search space, i.e. BBSearch enumerates
11 | possible solutions using this object.
12 |
13 | = Basic structure of this class: =
14 | set of COLLECTIONS mapped to:
15 | a) list of possible shard keys
16 | b) list of collections it can be denormalized to
17 | c) list of possible index keys (this will be very likely the same as a))
18 | '''
19 | class DesignCandidates():
20 |
21 | '''
22 | class constructor
23 | '''
24 | def __init__(self):
25 | # collection names
26 | self.collections = set()
27 | # col names mapped to possible index keys
28 | self.indexKeys = {}
29 | # col names mapped to possible shard keys
30 | self.shardKeys = {}
31 | # col names mapped to possible col names the collection can be denormalized to
32 | self.denorm = {}
33 |
34 |
35 | def addCollection(self, collection, indexKeys, shardKeys, denorm) :
36 | if collection not in self.collections :
37 | self.collections.add(collection)
38 | self.indexKeys[collection] = indexKeys
39 | self.shardKeys[collection] = shardKeys
40 | self.denorm[collection] = denorm
41 |
42 | def getCandidates(self, collection_names):
43 | candidates = DesignCandidates()
44 | for coll_name in collection_names:
45 | candidates.addCollection(coll_name, self.indexKeys[coll_name], self.shardKeys[coll_name], self.denorm[coll_name])
46 |
47 | return candidates
48 |
49 | def __str__(self):
50 | return pformat(self.__dict__)
51 |
52 | ## CLASS
--------------------------------------------------------------------------------
/src/search/randomdesigner.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012 by Brown University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining
6 | # a copy of this software and associated documentation files (the
7 | # "Software"), to deal in the Software without restriction, including
8 | # without limitation the rights to use, copy, modify, merge, publish,
9 | # distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so, subject to
11 | # the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
19 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | # OTHER DEALINGS IN THE SOFTWARE.
23 | # -----------------------------------------------------------------------
24 |
25 | import logging
26 | import random
27 |
28 | # mongodb-d4
29 | from design import Design
30 | from abstractdesigner import AbstractDesigner
31 |
32 | LOG = logging.getLogger(__name__)
33 |
34 | ## ==============================================
35 | ## InitialDesigner
36 | ## ==============================================
37 | class RandomDesigner(AbstractDesigner):
38 |
39 | def __init__(self, collections, workload, config):
40 | AbstractDesigner.__init__(self, collections, workload, config)
41 | ## DEF
42 |
43 | def generate(self):
44 | LOG.info("Generating random design")
45 | design = Design()
46 | rng = random.Random()
47 | for col_info in self.collections.itervalues():
48 | design.addCollection(col_info['name'])
49 |
50 | col_fields = []
51 | for field, data in col_info['fields'].iteritems():
52 | col_fields.append(field)
53 |
54 | # Figure out which attribute has the highest value for
55 | # the params that we care about when choosing the best design
56 | attrs = [ ]
57 | chosen_field = None
58 | while chosen_field is None or str(chosen_field).startswith("#") or str(chosen_field).startswith("_"):
59 | chosen_field = random.choice(col_fields)
60 | attrs.append(chosen_field)
61 | print "field: ", chosen_field
62 |
63 | design.addShardKey(col_info['name'], attrs)
64 | design.addIndex(col_info['name'], attrs)
65 |
66 | return design
67 | ## DEF
68 |
69 | ## CLASS
--------------------------------------------------------------------------------
/src/search/utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import json
4 | import logging
5 | from pprint import pformat
6 | from design import Design
7 |
8 | import os
9 | import sys
10 |
11 | basedir = os.path.realpath(os.path.dirname(__file__))
12 | sys.path.append(os.path.join(basedir, "../"))
13 |
14 | from util import constants
15 |
16 | LOG = logging.getLogger(__name__)
17 |
18 | def fromJSON(input) :
19 | '''
20 | Convert the result of designer.py into a tuple of Design instances (initial, final)
21 | '''
22 | solutions = json.loads(input)
23 | initial = fromLIST(solutions['initial'])
24 | final = fromLIST(solutions['final'])
25 | return (initial, final)
26 |
27 | def fromLIST(list) :
28 | d = Design()
29 | for col in list :
30 | d.addCollection(col['collection'])
31 | d.addShardKey(col['collection'], col['shardKey'])
32 | for i in col['indexes'] :
33 | d.addIndex(col['collection'], i)
34 | d.denorm[col['collection']] = col['denorm']
35 | return d
36 |
37 | def getIndexSize(col_info, indexKeys):
38 | """Estimate the amount of memory required by the indexes of a given design"""
39 | # TODO: This should be precomputed ahead of time. No need to do this
40 | # over and over again.
41 | if not indexKeys:
42 | return 0
43 | ## IF
44 | index_size = 0
45 | for f_name in indexKeys:
46 | f = col_info.getField(f_name)
47 | if f:
48 | index_size += f['avg_size']
49 | index_size += constants.DEFAULT_ADDRESS_SIZE
50 |
51 | #LOG.debug("%s Index %s Memory: %d bytes", col_info['name'], repr(indexKeys), index_size)
52 | return index_size
53 |
54 | def buildLoadingList(design):
55 | """Generate the ordered list of collections based on the order that we need to load them"""
56 | LOG.debug("Computing collection load order")
57 |
58 | # First split the list of collections between those that are normalized
59 | # and those are not
60 | loadOrder = [ ]
61 | denormalized = { }
62 | for collection in design.getCollections():
63 | # Examine the design and see whether this collection
64 | # is denormalized into another collection
65 | if not design.isDenormalized(collection):
66 | loadOrder.append(collection)
67 | else:
68 | # Now for the denormalized guys, get their hierarchy
69 | # so that we can figure out who should get loaded first
70 | denormalized[collection] = design.getDenormalizationHierarchy(collection)
71 | LOG.debug("'%s' Denormalization Hierarchy: %s" % (collection, denormalized[collection]))
72 | ## FOR
73 |
74 | while len(denormalized) > 0:
75 | # Loop through each denormalized collection and remove any collection
76 | # from their heirarchy that is already in the load list
77 | for collection in denormalized.keys():
78 | denormalized[collection] = filter(lambda x: not x in loadOrder, denormalized[collection])
79 | ## FOR
80 |
81 | # Now any collection that is not waiting for any other collection
82 | # can be loaded!
83 | newLoads = [ ]
84 | for collection in denormalized.keys():
85 | if len(denormalized[collection]) == 0:
86 | newLoads.append(collection)
87 | ## FOR
88 | assert len(newLoads) > 0, "Loading deadlock due to denormalization!"
89 |
90 | for collection in newLoads:
91 | loadOrder.append(collection)
92 | del denormalized[collection]
93 | ## FOR
94 | ## WHILE
95 |
96 | return loadOrder
97 | ## DEF
--------------------------------------------------------------------------------
/src/util/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from constants import *
4 | from utilmethods import *
5 | from histogram import Histogram
--------------------------------------------------------------------------------
/src/util/constants.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import sys
3 |
4 | PROJECT_NAME = "mongodb-d4"
5 | PROJECT_URL = "https://github.com/apavlo/mongodb-d4"
6 |
7 | ## ==============================================
8 | ## METADATA DB
9 | ## ==============================================
10 |
11 | # The default name of the metadata database
12 | METADATA_DB_NAME = "metadata"
13 |
14 | # The schema catalog information about the application
15 | COLLECTION_SCHEMA = "schema"
16 | COLLECTION_WORKLOAD = "sessions"
17 |
18 | CATALOG_COLL = "catalog"
19 | CATALOG_FIELDS = 'fields'
20 |
21 | ## ==============================================
22 | ## DATASET DB
23 | ## ==============================================
24 |
25 | # The default name of the reconstructed database
26 | DATASET_DB_NAME = "dataset"
27 |
28 | ## ==============================================
29 | ## WORKLOAD PROCESSING OPTIONS
30 | ## ==============================================
31 |
32 | SKIP_MONGODB_ID_FIELD = False
33 |
34 | # List of collection names prefixes that we should ignore
35 | # when performing various processing tasks
36 | IGNORED_COLLECTIONS = [ 'system', 'local', 'admin', 'config' ]
37 |
38 | # If a query's collection name is mangled when processing traces,
39 | # we'll use this value to indicate that it is invalid
40 | INVALID_COLLECTION_MARKER = "*INVALID*"
41 |
42 | # The default initial session id. New session ids will
43 | # start at this value
44 | INITIAL_SESSION_ID = 100
45 |
46 | # Special marker that represents a 'virtual' field for the
47 | # inner values of a list type
48 | LIST_INNER_FIELD = "__INNER__"
49 |
50 | # Replace any key that starts with a '$' with this string
51 | REPLACE_KEY_DOLLAR_PREFIX = '#'
52 |
53 | # Replace any '.' in a key with this string
54 | REPLACE_KEY_PERIOD = '__'
55 |
56 | # This identifies that an operation has to perform a full scan
57 | # on an entire collection rather than retrieving a single document
58 | FULL_SCAN_DOCUMENT_ID = sys.maxint
59 |
60 | ## ==============================================
61 | ## MONGO OPERATION TYPES
62 | ## ==============================================
63 | OP_TYPE_QUERY = '$query'
64 | OP_TYPE_INSERT = '$insert'
65 | OP_TYPE_ISERT = '$isert'
66 | OP_TYPE_DELETE = '$delete'
67 | OP_TYPE_UPDATE = '$update'
68 | OP_TYPE_REPLY = '$reply'
69 | OP_TYPE_GETMORE = '$getMore'
70 | OP_TYPE_KILLCURSORS = '$killCursors'
71 | OP_TYPE_UNKNOWN = 'unknown'
72 | OP_TYPE_ALL = [ ]
73 | for k in locals().keys():
74 | if k.startswith("OP_TYPE_"): OP_TYPE_ALL.append(locals()[k])
75 |
76 | ## ==============================================
77 | ## PREDICATE TYPES
78 | ## ==============================================
79 | PRED_TYPE_RANGE = 'range'
80 | PRED_TYPE_EQUALITY = 'eq'
81 | PRED_TYPE_REGEX = 'regex'
82 |
83 | ## ==============================================
84 | ## COSTMODEL DEFAULTS
85 | ## ==============================================
86 | DEFAULT_ADDRESS_SIZE = 8 # bytes
87 | DEFAULT_TIME_INTERVALS = 10
88 |
89 | # Whether to preload documents in the LRUBuffers
90 | DEFAULT_LRU_PRELOAD = True
91 |
92 | # The size of pages on disk for each MongoDB database node
93 | DEFAULT_PAGE_SIZE = 4096 # bytes
94 |
95 | # Window size in lru buffer: how many collection are preloaded into the buffer
96 | WINDOW_SIZE = 1024
97 |
98 | # Slot size upper bound: if the slot size is larger than this value, we will consider it as a
99 | # full page scan
100 | SLOT_SIZE_LIMIT = 10
101 |
102 | ## ==============================================
103 | ## CANDIDATES GENERATOR CONSTRAINTS
104 | ## ==============================================
105 | MIN_SELECTIVITY = 0.01
106 |
107 | MAX_INDEX_SIZE = 10
108 |
109 | EXAUSTED_SEARCH_BAR = 4
110 |
111 | NUMBER_OF_BACKUP_KEYS = 2
112 |
113 | ## ==============================================
114 | ## MONGO DATASET RECONSTRUCTION CONSTRAINTS
115 | ## ==============================================
116 |
117 | # The minimum size of nested fields, with which we will extract them from its parent collection
118 | MIN_SIZE_OF_NESTED_FIELDS = 3
119 |
120 | # Split documents with more than K fields
121 | MIN_SPLIT_SIZE = 3
122 |
123 | # We want to SKIP these two fields since we are functional fields not data fields
124 | FUNCTIONAL_FIELD = 'parent_col'
125 | ## ==============================================
126 | ## REPLAY BENCHMARK
127 | ## ==============================================
128 |
129 | # how many sessions to handle each time
130 | WORKLOAD_WINDOW_SIZE = 1000
131 |
--------------------------------------------------------------------------------
/src/util/mathutil.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------
3 | # Copyright (C) 2012 by Brown University
4 | #
5 | # Permission is hereby granted, free of charge, to any person obtaining
6 | # a copy of this software and associated documentation files (the
7 | # "Software"), to deal in the Software without restriction, including
8 | # without limitation the rights to use, copy, modify, merge, publish,
9 | # distribute, sublicense, and/or sell copies of the Software, and to
10 | # permit persons to whom the Software is furnished to do so, subject to
11 | # the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
19 | # IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | # OTHER DEALINGS IN THE SOFTWARE.
23 | # -----------------------------------------------------------------------
24 |
25 | import math
26 | import functools
27 | import logging
28 | from pprint import pformat
29 |
30 | LOG = logging.getLogger(__name__)
31 |
32 | def quartiles(N):
33 | debug = LOG.isEnabledFor(logging.DEBUG)
34 |
35 | # Calculate the median
36 | median = percentile(N, 0.50)
37 | if debug: LOG.debug("Median: %s" % median)
38 |
39 | # Split into two halves
40 | # Do not include the median into the halves, or the minimum and maximum
41 | lower = []
42 | upper = []
43 | isUpper = False
44 | for i in xrange(1, len(N)-1):
45 | if not isUpper and N[i] >= median:
46 | isUpper = True
47 | if isUpper:
48 | upper.append(N[i])
49 | else:
50 | lower.append(N[i])
51 | ## FOR
52 |
53 | if debug: LOG.debug("Lower Portion: %d [%s-%s]" % (len(lower), lower[0], lower[-1]))
54 | if debug: LOG.debug("Upper Portion: %d [%s-%s]" % (len(upper), upper[0], upper[-1]))
55 |
56 | # Return (lowerQuartile, upperQuartile)
57 | return (percentile(lower, 0.50), percentile(upper, 0.50))
58 | ## DEF
59 |
60 | ## Original: http://code.activestate.com/recipes/511478-finding-the-percentile-of-the-values/
61 | def percentile(N, percent, key=lambda x:x):
62 | """
63 | Find the percentile of a list of values.
64 |
65 | @parameter N - is a list of values. Note N MUST BE already sorted.
66 | @parameter percent - a float value from 0.0 to 1.0.
67 | @parameter key - optional key function to compute value from each element of N.
68 |
69 | @return - the percentile of the values
70 | """
71 | if not N:
72 | return None
73 | k = (len(N)-1) * percent
74 | f = math.floor(k)
75 | c = math.ceil(k)
76 | if f == c:
77 | return key(N[int(k)])
78 | d0 = key(N[int(f)]) * (c-k)
79 | d1 = key(N[int(c)]) * (k-f)
80 | return d0+d1
81 | ## DEF
82 |
83 | ## Original: FROM: http://www.physics.rutgers.edu/~masud/computing/WPark_recipes_in_python.html
84 | def stddev(x):
85 | n, mean, std = len(x), 0, 0
86 | for a in x:
87 | mean = mean + a
88 | mean /= float(n)
89 | for a in x:
90 | std = std + (a - mean)**2
91 | std = math.sqrt(std / float(n-1))
92 | return std
--------------------------------------------------------------------------------
/src/util/termcolor.py:
--------------------------------------------------------------------------------
1 | # Copyright: 2008 Nadia Alramli
2 | # http://nadiana.com/python-curses-terminal-controller
3 | # License: BSD
4 |
5 | """Terminal controller module
6 | Example of usage:
7 | print BG_BLUE + 'Text on blue background' + NORMAL
8 | print BLUE + UNDERLINE + 'Blue underlined text' + NORMAL
9 | print BLUE + BG_YELLOW + BOLD + 'text' + NORMAL
10 | """
11 |
12 | import sys
13 |
14 | # The current module
15 | MODULE = sys.modules[__name__]
16 |
17 | COLORS = "BLUE GREEN CYAN RED MAGENTA YELLOW WHITE BLACK".split()
18 | # List of terminal controls, you can add more to the list.
19 | CONTROLS = {
20 | 'BOL':'cr', 'UP':'cuu1', 'DOWN':'cud1', 'LEFT':'cub1', 'RIGHT':'cuf1',
21 | 'CLEAR_SCREEN':'clear', 'CLEAR_EOL':'el', 'CLEAR_BOL':'el1',
22 | 'CLEAR_EOS':'ed', 'BOLD':'bold', 'BLINK':'blink', 'DIM':'dim',
23 | 'REVERSE':'rev', 'UNDERLINE':'smul', 'NORMAL':'sgr0',
24 | 'HIDE_CURSOR':'cinvis', 'SHOW_CURSOR':'cnorm'
25 | }
26 |
27 | # List of numeric capabilities
28 | VALUES = {
29 | 'COLUMNS':'cols', # Width of the terminal (None for unknown)
30 | 'LINES':'lines', # Height of the terminal (None for unknown)
31 | 'MAX_COLORS': 'colors',
32 | }
33 |
34 | def default():
35 | """Set the default attribute values"""
36 | for color in COLORS:
37 | setattr(MODULE, color, '')
38 | setattr(MODULE, 'BG_%s' % color, '')
39 | for control in CONTROLS:
40 | setattr(MODULE, control, '')
41 | for value in VALUES:
42 | setattr(MODULE, value, None)
43 |
44 | def setup():
45 | """Set the terminal control strings"""
46 | # Initializing the terminal
47 | curses.setupterm()
48 | # Get the color escape sequence template or '' if not supported
49 | # setab and setaf are for ANSI escape sequences
50 | bgColorSeq = curses.tigetstr('setab') or curses.tigetstr('setb') or ''
51 | fgColorSeq = curses.tigetstr('setaf') or curses.tigetstr('setf') or ''
52 |
53 | for color in COLORS:
54 | # Get the color index from curses
55 | colorIndex = getattr(curses, 'COLOR_%s' % color)
56 | # Set the color escape sequence after filling the template with index
57 | setattr(MODULE, color, curses.tparm(fgColorSeq, colorIndex))
58 | # Set background escape sequence
59 | setattr(
60 | MODULE, 'BG_%s' % color, curses.tparm(bgColorSeq, colorIndex)
61 | )
62 | for control in CONTROLS:
63 | # Set the control escape sequence
64 | setattr(MODULE, control, curses.tigetstr(CONTROLS[control]) or '')
65 | for value in VALUES:
66 | # Set terminal related values
67 | setattr(MODULE, value, curses.tigetnum(VALUES[value]))
68 |
69 | def bold(text):
70 | return render('%(BOLD)s' + text + '%(NORMAL)s')
71 |
72 | def render(text):
73 | """Helper function to render text easily
74 | Example:
75 | render("%(GREEN)s%(BOLD)stext%(NORMAL)s") -> a bold green text
76 | """
77 | return text % MODULE.__dict__
78 |
79 | try:
80 | import curses
81 | setup()
82 | except Exception, e:
83 | # There is a failure; set all attributes to default
84 | print 'Warning: %s' % e
85 | default()
--------------------------------------------------------------------------------
/src/util/utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import logging
4 |
5 | from util import constants
6 |
7 | LOG = logging.getLogger(__name__)
8 |
9 | def escapeFieldNames(content):
10 | """Fix key names so that they can be stored in MongoDB"""
11 | copy = dict(content.items())
12 | toFix = [ ]
13 | for k, v in copy.iteritems():
14 | # Keys can't start with '$' and they can't contain '.'
15 | if k.startswith('$') or k.find(".") != -1:
16 | toFix.append(k)
17 | if type(v) == dict:
18 | v = escapeFieldNames(v)
19 | elif type(v) == list:
20 | for i in xrange(0, len(v)):
21 | if type(v[i]) == dict:
22 | v[i] = escapeFieldNames(v[i])
23 | ## FOR
24 | copy[k] = v
25 | ## FOR
26 |
27 | for k in toFix:
28 | v = copy[k]
29 | del copy[k]
30 |
31 | if k.startswith('$'):
32 | k = constants.REPLACE_KEY_DOLLAR_PREFIX + k[1:]
33 | k = k.replace(".", constants.REPLACE_KEY_PERIOD)
34 | copy[k] = v
35 | ## FOR
36 |
37 | return copy
38 | ## DEF
39 |
--------------------------------------------------------------------------------
/src/workload/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../libs"))
7 |
8 | # Mongokit Objects
9 | from session import Session
10 |
11 | # workload combiner
12 | from workloadcombiner import WorkloadCombiner
13 | # Regular Classes
14 | from ophasher import OpHasher
15 |
16 | from utilmethods import *
17 | del utilmethods
--------------------------------------------------------------------------------
/src/workload/utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import logging
4 | from util import constants
5 | from pprint import pformat
6 |
7 | LOG = logging.getLogger(__name__)
8 |
9 | def isOpRegex(op, field=None):
10 | """Returns true if this operation contains a regex query"""
11 |
12 | # if "predicates" in op:
13 | # return constants.PRED_TYPE_REGEX in op["predicates"].itervalues()
14 |
15 | regex_flag = constants.REPLACE_KEY_DOLLAR_PREFIX + "regex"
16 | for contents in getOpContents(op):
17 | if field is None:
18 | for k, v in contents.iteritems():
19 | if isinstance(v, dict) and regex_flag in v:
20 | return True
21 | elif field in contents:
22 | if isinstance(contents[field], dict) and regex_flag in contents[field]:
23 | return True
24 | ## FOR
25 | return False
26 | ## FOR
27 |
28 | def getOpContents(op):
29 | """Return a list of all of the query contents for the given operation"""
30 | # QUERY
31 | if op['type'] == constants.OP_TYPE_QUERY:
32 | # TODO: Why are we not examining the resp_content here?
33 | contents = [ ]
34 | for opContent in op['query_content']:
35 | try:
36 | if '#query' in opContent and opContent['#query']:
37 | contents.append(opContent['#query'])
38 | except:
39 | LOG.error("Invalid query content:\n%s", pformat(opContent))
40 | raise
41 |
42 | # INSERT + UPDATE + DELETE
43 | elif op['type'] in [constants.OP_TYPE_INSERT, \
44 | constants.OP_TYPE_ISERT, \
45 | constants.OP_TYPE_UPDATE, \
46 | constants.OP_TYPE_DELETE]:
47 | contents = op['query_content']
48 | else:
49 | raise Exception("Unexpected type '%s' for %s" % (op['type'], op))
50 |
51 | return contents
52 | ## DEF
53 |
54 |
55 | def getReferencedFields(op):
56 | """
57 | Return a tuple of all the fields referenced in the fields dict
58 | The fields will be sorted lexiographically so that two documents with
59 | the same fields always come back with the same tuple
60 | """
61 | fields = set()
62 | for contents in getOpContents(op):
63 | for key in contents.iterkeys():
64 | if not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX):
65 | fields.add(key)
66 | return tuple(sorted(list(fields)))
67 | ## DEF
68 |
69 | ## ==============================================
70 | ## OLD STUFF
71 | ## ==============================================
72 |
73 | # TODO: This is just for testing that our Sessions object
74 | # validates correctly. The parser/santizer should be fixed
75 | # to use the Sessions object directly
76 | @DeprecationWarning
77 | def convertWorkload(conn):
78 | old_workload = conn['designer']['mongo_comm']
79 | new_workload = ['workload']
80 |
81 | new_sess = conn['designer'].Session()
82 | new_sess['ip1'] = u'127.0.0.1:59829'
83 | new_sess['ip2'] = u'127.0.0.1:27017'
84 |
85 | for trace in old_workload.find({'IP1': new_sess['ip1'], 'IP2': new_sess['ip2']}):
86 | new_sess['uid'] = trace['uid']
87 | if not trace['content']: continue
88 |
89 | assert len(trace['content']) == 1, pformat(trace['content'])
90 | #print "CONTENT:", pformat(trace['content'])
91 | op = {
92 | 'collection': trace['collection'],
93 | 'content': trace['content'][0],
94 | 'timestamp': float(trace['timestamp']),
95 | 'type': trace['type'],
96 | 'size': int(trace['size'].replace("bytes", "")),
97 | }
98 | new_sess['operations'].append(op)
99 | ## FOR
100 |
101 | print new_sess
102 | new_sess.save()
103 | ## DEF
--------------------------------------------------------------------------------
/tests/README:
--------------------------------------------------------------------------------
1 | Testing code for MongoDB-Designer
2 | The script "runTests.sh" will execute all python scripts
3 |
4 | Dependencies:
5 | python-nose
6 |
7 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Third-Party Dependencies
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../libs"))
7 | sys.path.append(os.path.join(basedir, "../src"))
8 |
9 | from mongodbtestcase import MongoDBTestCase
--------------------------------------------------------------------------------
/tests/api/unittest_results.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | import string
6 | import random
7 | import unittest
8 | from pprint import pprint, pformat
9 |
10 | basedir = os.path.realpath(os.path.dirname(__file__))
11 | sys.path.append(os.path.join(basedir, "../../src"))
12 | sys.path.append(os.path.join(basedir, "../../exps"))
13 | from api.results import Results
14 |
15 | class TestResults(unittest.TestCase):
16 |
17 | def setUp(self):
18 | self.txnNames = [ ]
19 | for i in xrange(0, 6):
20 | self.txnNames.append("txn-%02d" % i)
21 | pass
22 |
23 | def compareResults(self, r1, r2):
24 | self.assertEquals(r1.start, r2.start)
25 | self.assertEquals(r1.stop, r2.stop)
26 | for txn in self.txnNames:
27 | self.assertEquals(r1.txn_counters[txn], r2.txn_counters[txn])
28 | self.assertEquals(r1.txn_times[txn], r2.txn_times[txn])
29 | ## FOR
30 | self.assertEquals(len(r1.completed), len(r2.completed))
31 | ## DEF
32 |
33 | def testOpCount(self):
34 | totalOpCount = 0
35 | results = [ Results() for i in xrange(10) ]
36 | map(Results.startBenchmark, results)
37 | for r in results:
38 | for i in xrange(0, 5000):
39 | txn = random.choice(self.txnNames)
40 | id = r.startTransaction(txn)
41 | assert id != None
42 | ops = random.randint(1, 10)
43 | r.stopTransaction(id, ops)
44 | totalOpCount += ops
45 | ## FOR
46 | ## FOR
47 | map(Results.stopBenchmark, results)
48 |
49 | r = Results()
50 | map(r.append, results)
51 | self.assertEquals(totalOpCount, r.opCount)
52 | ## DEF
53 |
54 |
55 | def testAppend(self):
56 | r1 = Results()
57 | r1.startBenchmark()
58 | for i in xrange(0, 5000):
59 | txn = random.choice(self.txnNames)
60 | id = r1.startTransaction(txn)
61 | assert id != None
62 | r1.stopTransaction(id, 1)
63 | ## FOR
64 | r1.stopBenchmark()
65 | print r1.show()
66 |
67 | # Append the time and then make sure they're the same
68 | r2 = Results()
69 | r2.append(r1)
70 | self.compareResults(r1, r2)
71 |
72 | ## DEF
73 |
74 | def testPickle(self):
75 | r = Results()
76 | r.startBenchmark()
77 | for i in xrange(0, 1000):
78 | txn = random.choice(self.txnNames)
79 | id = r.startTransaction(txn)
80 | assert id != None
81 | r.stopTransaction(id, 1)
82 | ## FOR
83 |
84 | # Serialize
85 | import pickle
86 | p = pickle.dumps(r, -1)
87 | assert p
88 |
89 | # Deserialize
90 | clone = pickle.loads(p)
91 | assert clone
92 |
93 | # Make sure the txn counts are equal
94 | self.compareResults(r, clone)
95 | ## DEF
96 |
97 | ## CLASS
98 |
99 | if __name__ == '__main__':
100 | unittest.main()
101 | ## MAIN
--------------------------------------------------------------------------------
/tests/catalog/unittest_utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os, sys
4 | import itertools
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../src"))
8 |
9 | import unittest
10 |
11 | import catalog
12 |
13 | class TestUtilMethods(unittest.TestCase):
14 | TEST_FIELDS = {
15 | "scalarKey": 1234,
16 | "listKey": range(10),
17 | "nestedKey": {
18 | "innerKey1": 5678,
19 | "innerKey2": 5678,
20 | }
21 | }
22 |
23 | def testGetAllValues(self):
24 | values = catalog.getAllValues(TestUtilMethods.TEST_FIELDS)
25 | self.assertIsNotNone(values)
26 | self.assertIsInstance(values, tuple)
27 |
28 | # Make sure we can hash it
29 | hash_v = hash(values)
30 | # print "hash_v:", hash_v
31 | self.assertIsNotNone(hash_v)
32 |
33 | for v in TestUtilMethods.TEST_FIELDS.itervalues():
34 | if isinstance(v, dict):
35 | expected = tuple(v.values())
36 | elif isinstance(v, list):
37 | expected = tuple(v)
38 | else:
39 | expected = v
40 | self.assertIn(expected, values)
41 | ## FOR
42 | ## DEF
43 |
44 | def testGetFieldValue(self):
45 | fields = TestUtilMethods.TEST_FIELDS
46 | for shardKey in fields.keys():
47 | expected = fields[shardKey]
48 | if shardKey == "nestedKey":
49 | expected = fields[shardKey]["innerKey2"]
50 | shardKey += ".innerKey2"
51 |
52 | actual = catalog.getFieldValue(shardKey, fields)
53 | # print shardKey, "->", actual
54 | self.assertIsNotNone(actual, shardKey)
55 | self.assertEqual(expected, actual, shardKey)
56 | ## FOR
57 |
58 | ## Make sure that if we give it an invald key that we get back None
59 | actual = catalog.getFieldValue("LiptonSoup", fields)
60 | self.assertIsNone(actual)
61 | ## DEF
62 |
63 | def testFieldTypeSerialization(self):
64 | for t in [ int, str, unicode, float ]:
65 | t_bson = catalog.fieldTypeToString(t)
66 | self.assertFalse(t_bson == None)
67 | #print "BSON:", t_bson
68 | t_python = catalog.fieldTypeToPython(t_bson)
69 | self.assertFalse(t_python == None)
70 | #print "PYTHON:", t_python
71 | self.assertEquals(t, t_python)
72 | ## FOR
73 | ## DEF
74 |
75 | ## CLASS
76 |
77 | if __name__ == '__main__':
78 | unittest.main()
79 | ## MAIN
--------------------------------------------------------------------------------
/tests/costmodel/disk/unittest_diskcostcomponent_indexinsertionpenalty.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | from pprint import pformat
6 | import unittest
7 |
8 | basedir = os.path.realpath(os.path.dirname(__file__))
9 | sys.path.append(os.path.join(basedir, ".."))
10 |
11 | # mongodb-d4
12 | from costmodeltestcase import CostModelTestCase
13 | from search import Design
14 | from workload import Session
15 | from util import constants
16 | from costmodel.disk import DiskCostComponent
17 |
18 | class TestDiskCost_IndexInsertionPenalty(CostModelTestCase):
19 |
20 | def setUp(self):
21 | CostModelTestCase.setUp(self)
22 | self.cm = DiskCostComponent(self.state)
23 | # DEF
24 |
25 | def testDiskCost_IndexInsertionPenalty(self):
26 | """
27 | IndexInsertionPenalty should be high if we build bad indexes
28 | """
29 | # 1
30 | d = Design()
31 | for col_name in CostModelTestCase.COLLECTION_NAMES:
32 | d.addCollection(col_name)
33 | d.addIndex(col_name, ["field00"])
34 | ## FOR
35 |
36 | self.cm.reset()
37 | self.cm.state.reset()
38 | self.cm.getCost(d)
39 | p0 = self.cm.total_index_insertion_penalty
40 |
41 | # 2
42 | d = Design()
43 | for col_name in CostModelTestCase.COLLECTION_NAMES:
44 | d.addCollection(col_name)
45 | d.addIndex(col_name, ["field01"])
46 | ## FOR
47 |
48 | self.cm.reset()
49 | self.cm.state.reset()
50 | self.cm.getCost(d)
51 | p1 = self.cm.total_index_insertion_penalty
52 |
53 | self.assertEqual(p0, p1)
54 |
55 | #3
56 | d = Design()
57 | for col_name in CostModelTestCase.COLLECTION_NAMES:
58 | d.addCollection(col_name)
59 | d.addIndex(col_name, ["field00", "field01"])
60 | ## FOR
61 |
62 | self.cm.reset()
63 | self.cm.state.reset()
64 | self.cm.getCost(d)
65 | p2 = self.cm.total_index_insertion_penalty
66 |
67 | self.assertEqual(p0, p2)
68 |
69 | #4
70 | d = Design()
71 | for col_name in CostModelTestCase.COLLECTION_NAMES:
72 | d.addCollection(col_name)
73 | d.addIndex(col_name, ["field00", "field02"])
74 | ## FOR
75 |
76 | self.cm.reset()
77 | self.cm.state.reset()
78 | self.cm.getCost(d)
79 | p3 = self.cm.total_index_insertion_penalty
80 |
81 | self.assertGreater(p3, p0)
82 |
83 | #5
84 | d = Design()
85 | for col_name in CostModelTestCase.COLLECTION_NAMES:
86 | d.addCollection(col_name)
87 | d.addIndex(col_name, ["field01", "field02"])
88 | ## FOR
89 |
90 | self.cm.reset()
91 | self.cm.state.reset()
92 | self.cm.getCost(d)
93 | p4 = self.cm.total_index_insertion_penalty
94 |
95 | self.assertGreater(p4, p0)
96 |
97 | #6
98 | d = Design()
99 | for col_name in CostModelTestCase.COLLECTION_NAMES:
100 | d.addCollection(col_name)
101 | d.addIndex(col_name, ["field00", "field01", "field02"])
102 | ## FOR
103 |
104 | self.cm.reset()
105 | self.cm.state.reset()
106 | self.cm.getCost(d)
107 | p5 = self.cm.total_index_insertion_penalty
108 |
109 | self.assertGreater(p5, p0)
110 | ## DEF
111 |
112 | def testDiskCost_IndexInsertionPenalty_integrated_to_cost_component(self):
113 | """
114 | Check if index insertion penalty contributes to the total diskcost
115 | """
116 |
117 | ## DEF
118 |
119 | ## CLASS
120 |
121 | if __name__ == '__main__':
122 | unittest.main()
123 | ## MAIN
124 |
--------------------------------------------------------------------------------
/tests/costmodel/disk/unittest_diskcostcomponentindexes.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | from pprint import pformat
6 | import unittest
7 |
8 | basedir = os.path.realpath(os.path.dirname(__file__))
9 | sys.path.append(os.path.join(basedir, ".."))
10 |
11 | # mongodb-d4
12 | from costmodeltestcase_index import CostModelTestCase
13 | from search import Design
14 | from workload import Session
15 | from util import constants
16 | from costmodel.disk import DiskCostComponent
17 |
18 | class TestDiskCostIndexes(CostModelTestCase):
19 |
20 | def setUp(self):
21 | CostModelTestCase.setUp(self)
22 | self.cm = DiskCostComponent(self.state)
23 | self.cm.no_index_insertion_penalty = True
24 | # DEF
25 | def testDiskCostIndexes(self):
26 | """Check whether disk cost calculations work correctly"""
27 | # First get the disk cost when there are no indexes
28 | d = Design()
29 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
30 | d.addCollection(col_info['name'])
31 |
32 | cost0 = self.cm.getCost(d)
33 | print "diskCost0:", cost0
34 | # The cost should be exactly equal to one, which means that every operation
35 | # has to perform a full sequential scan on the collection
36 | self.assertEqual(cost0, 1.0)
37 |
38 | # Now add the all indexes. The disk cost should be lower
39 | d = Design()
40 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
41 | d.addCollection(col_info['name'])
42 | d.addIndex(col_info['name'], col_info['interesting'])
43 | self.state.invalidateCache(col_info['name'])
44 |
45 | self.cm.reset()
46 | self.cm.state.reset()
47 | cost1 = self.cm.getCost(d)
48 | print "diskCost1:", cost1
49 | self.assertGreater(cost0, cost1)
50 |
51 | def testDiskCostOnDifferentIndexes(self):
52 | """Check how indexes will affect the disk cost"""
53 | # 1. Put index on both of the fields seperately
54 | d = Design()
55 | d.addCollection(CostModelTestCase.COLLECTION_NAME)
56 | d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field00"])
57 | d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01"])
58 |
59 | self.cm.reset()
60 | self.cm.state.reset()
61 | cost0 = self.cm.getCost(d)
62 | print "diskCost0:", cost0
63 |
64 | # 3. Put indexes on both field together
65 | d = Design()
66 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
67 | d.addCollection(CostModelTestCase.COLLECTION_NAME)
68 | d.addIndex(CostModelTestCase.COLLECTION_NAME, ["field01", "field00"])
69 | self.state.invalidateCache(col_info['name'])
70 |
71 | self.cm.reset()
72 | self.cm.state.reset()
73 | cost1 = self.cm.getCost(d)
74 | print "diskCost1:", cost1
75 |
76 | self.assertGreater(cost0, cost1)
77 |
78 | def testDiskCostCaching(self):
79 | """Check whether disk cost calculations work correctly with caching enabled"""
80 | self.cm.cache_enable = True
81 |
82 | # Give the mofo a full Design with indexes
83 | d = Design()
84 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
85 | d.addCollection(col_info['name'])
86 | d.addIndex(col_info['name'], col_info['interesting'])
87 | ## FOR
88 | cost0 = self.cm.getCost(d)
89 | print "diskCost0:", cost0
90 | # FIXME self.assertGreater(cost0, 0.0)
91 |
92 | # We should get the same cost back after we execute it a second time
93 | cost1 = self.cm.getCost(d)
94 | print "diskCost1:", cost1
95 | # FIXME self.assertEqual(cost0, cost1)
96 | ## DEF
97 |
98 | ## CLASS
99 |
100 | if __name__ == '__main__':
101 | unittest.main()
102 | ## MAIN
103 |
--------------------------------------------------------------------------------
/tests/costmodel/disk/unittest_diskcostcomponentindexes_withprojection.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | from pprint import pformat
6 | import unittest
7 |
8 | basedir = os.path.realpath(os.path.dirname(__file__))
9 | sys.path.append(os.path.join(basedir, ".."))
10 |
11 | # mongodb-d4
12 | from costmodeltestcase_index_withprojection import CostModelTestCase
13 | from search import Design
14 | from workload import Session
15 | from util import constants
16 | from costmodel.disk import DiskCostComponent
17 |
18 | class TestDiskCostIndexesWithProjection(CostModelTestCase):
19 |
20 | def setUp(self):
21 | CostModelTestCase.setUp(self)
22 | self.cm = DiskCostComponent(self.state)
23 | self.cm.no_index_insertion_penalty = True
24 |
25 | ## DEF
26 | def testDiskCostIndexes(self):
27 | """Check whether disk cost calculations work correctly"""
28 | # First get the disk cost when there are no indexes
29 | d = Design()
30 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
31 | d.addCollection(col_info['name'])
32 |
33 | cost0 = self.cm.getCost(d)
34 | print "diskCost0:", cost0
35 | # The cost should be exactly equal to one, which means that every operation
36 | # has to perform a full sequential scan on the collection
37 | self.assertEqual(cost0, 1.0)
38 |
39 | # Now add one index. The disk cost should be lower
40 | d = Design()
41 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
42 | d.addCollection(col_info['name'])
43 | d.addIndex(col_info['name'], ["field01"])
44 | self.state.invalidateCache(col_info['name'])
45 |
46 | self.cm.reset()
47 | self.cm.state.reset()
48 | cost1 = self.cm.getCost(d)
49 | print "diskCost1:", cost1
50 | self.assertGreater(cost0, cost1)
51 |
52 | # Now add one more index. The disk cost should be lower again
53 | d = Design()
54 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
55 | d.addCollection(col_info['name'])
56 | d.addIndex(col_info['name'], ["field01", "field00"])
57 | self.state.invalidateCache(col_info['name'])
58 |
59 | self.cm.reset()
60 | self.cm.state.reset()
61 | cost2 = self.cm.getCost(d)
62 | print "diskCost2:", cost2
63 |
64 | # Now add the one index. The disk cost should be much lower
65 | d = Design()
66 | col_info = self.collections[CostModelTestCase.COLLECTION_NAME]
67 | d.addCollection(col_info['name'])
68 | d.addIndex(col_info['name'], ["field01", "field00", "field02"])
69 | self.state.invalidateCache(col_info['name'])
70 |
71 | self.cm.reset()
72 | self.cm.state.reset()
73 | cost3 = self.cm.getCost(d)
74 | print "diskCost3:", cost3
75 | self.assertGreater(cost2, cost3)
76 |
77 | ## CLASS
78 |
79 | if __name__ == '__main__':
80 | unittest.main()
81 | ## MAIN
82 |
--------------------------------------------------------------------------------
/tests/costmodel/disk/unittest_fastlrubuffer.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | import unittest
3 |
4 | basedir = os.path.realpath(os.path.dirname(__file__))
5 | sys.path.append(os.path.join(basedir, "../../../src"))
6 |
7 | from costmodel.disk.fastlrubufferusingwindow import FastLRUBufferWithWindow
8 |
9 | class TestFastLRUbufferWithWindow(unittest.TestCase):
10 |
11 | def setUp(self):
12 | pass
13 |
14 | def testAllBufferOperations_push(self):
15 | self.lru = FastLRUBufferWithWindow(1)
16 | slot_size = 1
17 | for i in xrange(100):
18 | tup = (i)
19 | self.lru.__push__(tup, slot_size)
20 |
21 | self.assertEqual(len(self.lru.buffer), self.lru.window_size)
22 |
23 | def testAllBufferOperations_push_slotsize_0(self):
24 | self.lru = FastLRUBufferWithWindow(10)
25 | slot_size = 1
26 | for i in xrange(9):
27 | tup = (i)
28 | self.lru.__push__(tup, slot_size)
29 |
30 | tup = (9)
31 | slot_size = 9
32 | self.lru.__push__(9, slot_size)
33 | self.assertEqual(len(self.lru.buffer), 2)
34 |
35 | def testAllBufferOperations_push_slotsize_1(self):
36 | self.lru = FastLRUBufferWithWindow(10)
37 | slot_size = 1
38 | for i in xrange(9):
39 | tup = (i)
40 | self.lru.__push__(tup, slot_size)
41 |
42 | tup = (9)
43 | slot_size = 10
44 | self.lru.__push__(9, slot_size)
45 | self.assertEqual(len(self.lru.buffer), 1)
46 | ## DEF
47 |
48 | def testAllBufferOperations_push_slotsize_2(self):
49 | self.lru = FastLRUBufferWithWindow(10)
50 | slot_size = 1
51 | for i in xrange(9):
52 | tup = (i)
53 | self.lru.__push__(tup, slot_size)
54 |
55 | tup = (9)
56 | slot_size = 10
57 | self.lru.__push__(tup, slot_size)
58 | self.assertEqual(len(self.lru.buffer), 1)
59 |
60 | slot_size = 1
61 | for i in xrange(9):
62 | tup = (i)
63 | self.lru.__push__(tup, slot_size)
64 |
65 | self.assertEqual(len(self.lru.buffer), 9)
66 | ## DEF
67 |
68 | def testAllBufferOperations_push_slotsize_3(self):
69 | self.lru = FastLRUBufferWithWindow(10)
70 | slot_size = 1
71 | for i in xrange(9):
72 | tup = (i)
73 | self.lru.__push__(tup, slot_size)
74 |
75 | tup = (9)
76 | slot_size = 10
77 | self.lru.__push__(tup, slot_size)
78 | self.assertEqual(len(self.lru.buffer), 1)
79 |
80 | tup = (11)
81 | slot_size = 1
82 | self.lru.__push__(tup, slot_size)
83 |
84 | self.assertEqual(len(self.lru.buffer), 1)
85 | ## DEF
86 |
87 | def testAllBufferOperations_update(self):
88 | self.lru = FastLRUBufferWithWindow(100)
89 | slot_size = 1
90 | for i in xrange(100):
91 | tup = (i)
92 | self.lru.__push__(tup, slot_size)
93 |
94 | for i in xrange(100):
95 | tup = (i)
96 | self.lru.__update__(tup)
97 | self.assertEqual(self.lru.tail[2], i)
98 |
99 | def testAllBufferOperations_pop(self):
100 | self.lru = FastLRUBufferWithWindow(100)
101 | slot_size = 1
102 | for i in xrange(100):
103 | tup = (i)
104 | self.lru.__push__(tup, slot_size)
105 | for i in xrange(100):
106 | self.lru.__pop__()
107 | self.assertEqual(len(self.lru.buffer), self.lru.window_size - i - 1)
108 |
109 |
110 | if __name__ == '__main__':
111 | unittest.main()
112 |
113 |
114 |
--------------------------------------------------------------------------------
/tests/costmodel/network/unittest_networkcostcomponenttpcc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | from pprint import pformat
6 | import unittest
7 | import copy
8 |
9 | basedir = os.path.realpath(os.path.dirname(__file__))
10 | sys.path.append(os.path.join(basedir, "../.."))
11 |
12 | # mongodb-d4
13 | from tpcctestcase import TPCCTestCase as CostModelTestCase
14 | from search import Design
15 | from workload import Session
16 | from util import constants
17 | from costmodel.network import NetworkCostComponent
18 | from workload.workloadcombiner import WorkloadCombiner
19 | from tpcc import constants as tpccConstants
20 |
21 | class TestNetworkCostTPCC(CostModelTestCase):
22 |
23 | def setUp(self):
24 | CostModelTestCase.setUp(self)
25 | self.cm = NetworkCostComponent(self.state)
26 | ## DEF
27 |
28 | def testNetworkCostDenormalization(self):
29 | """Check network cost for queries that reference denormalized collections"""
30 | # Get the "base" design cost when all of the collections
31 | # are sharded on their "interesting" fields
32 | d = Design()
33 | i = 0
34 | for col_info in self.collections.itervalues():
35 | d.addCollection(col_info['name'])
36 | if i == 0:
37 | d.addShardKey(col_info['name'], col_info['interesting'])
38 | else:
39 | d.addShardKey(col_info['name'], ["_id"])
40 |
41 | self.cm.invalidateCache(d, col_info['name'])
42 | i += 1
43 | ## FOR
44 | self.cm.reset()
45 | self.state.reset()
46 | cost0 = self.cm.getCost(d)
47 |
48 | print "cost0:", cost0
49 |
50 | # Now get the network cost for when we denormalize the
51 | # second collection inside of the first one
52 | # We should have a lower cost because there should now be fewer queries
53 | d = Design()
54 | i = 0
55 | for col_info in self.collections.itervalues():
56 | self.assertTrue(col_info['interesting'])
57 | d.addCollection(col_info['name'])
58 | if i == 0:
59 | d.addShardKey(col_info['name'], col_info['interesting'])
60 | else:
61 | d.addShardKey(col_info['name'], ["_id"])
62 | self.cm.invalidateCache(d, col_info['name'])
63 | i += 1
64 |
65 | d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)
66 |
67 | combiner = WorkloadCombiner(self.collections, self.workload)
68 | combinedWorkload = combiner.process(d)
69 | self.state.updateWorkload(combinedWorkload)
70 |
71 | self.cm.reset()
72 | self.state.reset()
73 | cost1 = self.cm.getCost(d)
74 | print "cost1:", cost1
75 |
76 | self.assertLess(cost1, cost0)
77 | # DEF
78 |
79 | ## CLASS
80 |
81 | if __name__ == '__main__':
82 | unittest.main()
83 | ## MAIN
--------------------------------------------------------------------------------
/tests/costmodel/skew/unittest_skewcostcomponent.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | from pprint import pformat
6 | import unittest
7 |
8 | basedir = os.path.realpath(os.path.dirname(__file__))
9 | sys.path.append(os.path.join(basedir, "../"))
10 |
11 | # mongodb-d4
12 | from costmodeltestcase import CostModelTestCase
13 | from search import Design
14 | from workload import Session
15 | from util import constants
16 | from costmodel.skew import SkewCostComponent
17 |
18 | class TestSkewCost(CostModelTestCase):
19 |
20 | def setUp(self):
21 | CostModelTestCase.setUp(self)
22 | self.cm = SkewCostComponent(self.state)
23 | ## DEF
24 |
25 | def testSkewCost(self):
26 | """Check whether skew cost calculations work correctly"""
27 | col_info = self.collections[CostModelTestCase.COLLECTION_NAMES[0]]
28 | shard_key = col_info['interesting'][0]
29 |
30 | d = Design()
31 | d.addCollection(col_info['name'])
32 | d.addShardKey(col_info['name'], [shard_key])
33 |
34 | # First get the skew cost when the queries got each node uniformly
35 | # This is the best-case scenario
36 | op_ctr = 0
37 | for sess in self.workload:
38 | for op in sess['operations']:
39 | query_content = [ {constants.REPLACE_KEY_DOLLAR_PREFIX + "query":\
40 | {shard_key: op_ctr % CostModelTestCase.NUM_NODES }\
41 | } ]
42 | op['collection'] = col_info['name']
43 | op['query_content'] = query_content
44 | op['predicates'] = { shard_key: constants.PRED_TYPE_EQUALITY }
45 | op_ctr += 1
46 | ## FOR (op)
47 | ## FOR (session)
48 |
49 | col_info["fields"][shard_key]["ranges"] = range(CostModelTestCase.NUM_NODES)
50 |
51 | cost0 = self.cm.getCost(d)
52 | self.assertLessEqual(cost0, 1.0)
53 | # print "skewCost0:", cost0
54 |
55 | # Then make all of the operations go to a single node
56 | # This is the worst-case scenario
57 | query_content = [ {constants.REPLACE_KEY_DOLLAR_PREFIX + "query":\
58 | {shard_key: 1000l }\
59 | } ]
60 | for sess in self.workload:
61 | for op in sess['operations']:
62 | op['collection'] = col_info['name']
63 | op['query_content'] = query_content
64 | op['predicates'] = { shard_key: constants.PRED_TYPE_EQUALITY }
65 | ## FOR
66 | self.state.reset()
67 | self.cm.reset()
68 | cost1 = self.cm.getCost(d)
69 | self.assertLessEqual(cost1, 1.0)
70 | # print "skewCost1:", cost1
71 |
72 | self.assertGreater(cost1, cost0)
73 |
74 | ## DEF
75 |
76 | def testGetSplitWorkload(self):
77 | """Check that the workload is split into intervals"""
78 |
79 | self.assertEqual(CostModelTestCase.NUM_SESSIONS, sum(map(len, self.cm.workload_segments)))
80 | for i in xrange(0, CostModelTestCase.NUM_INTERVALS):
81 | # print "[%02d]: %d" % (i, len(self.cm.workload_segments[i]))
82 | self.assertGreater(len(self.cm.workload_segments[i]), 0)
83 | ## FOR
84 | self.assertEqual(CostModelTestCase.NUM_INTERVALS, len(self.cm.workload_segments))
85 | ## DEF
86 |
87 |
88 | ## CLASS
89 |
90 | if __name__ == '__main__':
91 | unittest.main()
92 | ## MAIN
--------------------------------------------------------------------------------
/tests/costmodel/unittest_costmodel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | import unittest
6 |
7 | basedir = os.path.realpath(os.path.dirname(__file__))
8 | sys.path.append(os.path.join(basedir, "../../"))
9 |
10 | # mongodb-d4
11 | from costmodeltestcase import CostModelTestCase
12 | import costmodel
13 | from search import Design
14 |
15 | class TestCostModel(CostModelTestCase):
16 |
17 | def setUp(self):
18 | CostModelTestCase.setUp(self)
19 | self.cm = costmodel.CostModel(self.collections, self.workload, self.costModelConfig)
20 | ## DEF
21 |
22 | def testSameDesignExecutedTwice_withemptydesign(self):
23 | """
24 | If the same design is executed twice, they should have the same result
25 | """
26 | d = Design()
27 | for col_name in CostModelTestCase.COLLECTION_NAMES:
28 | d.addCollection(col_name)
29 |
30 | ## for
31 | cost0 = self.cm.overallCost(d)
32 | cost1 = self.cm.overallCost(d)
33 |
34 | self.assertEqual(cost0, cost1)
35 |
36 | ## def
37 |
38 | def testSameDesignExecutedTwice_withfulldesign(self):
39 | """
40 | If the same design is executed twice, they should have the same result
41 | """
42 | d = Design()
43 | for col_name in CostModelTestCase.COLLECTION_NAMES:
44 | d.addCollection(col_name)
45 | col_info = self.collections[col_name]
46 | d.addIndex(col_name, col_info['interesting'])
47 | ## for
48 |
49 | cost0 = self.cm.overallCost(d)
50 | cost1 = self.cm.overallCost(d)
51 |
52 | self.assertEqual(cost0, cost1)
53 | ## def
54 |
55 | ## CLASS
56 |
57 | if __name__ == '__main__':
58 | unittest.main()
59 | ## MAIN
--------------------------------------------------------------------------------
/tests/costmodel/unittest_costmodel_denormalization.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 | import os
4 | import sys
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../src"))
8 | sys.path.append(os.path.join(basedir, "../../src/search"))
9 | sys.path.append(os.path.join(basedir, "../"))
10 |
11 | from util import constants
12 | from tpcctestcase import TPCCTestCase
13 | from search import Design
14 | from costmodel import CostModel
15 | from tpcc import constants as tpccConstants
16 |
17 | class FindExpectedDesign(TPCCTestCase):
18 | """
19 | Try to see if the existing cost model could generate the best desgin we
20 | expected
21 | """
22 | def setUp(self):
23 | TPCCTestCase.setUp(self)
24 | ## DEF
25 |
26 | def testfindExpectedDesign(self):
27 | """Perform the actual search for a design"""
28 | # Generate all the design candidates
29 | # Instantiate cost model
30 | cmConfig = {
31 | 'weight_network': 4,
32 | 'weight_disk': 1,
33 | 'weight_skew': 1,
34 | 'nodes': 10,
35 | 'max_memory': 1024,
36 | 'skew_intervals': 10,
37 | 'address_size': 64,
38 | 'window_size': 500
39 | }
40 | cm = CostModel(self.collections, self.workload, cmConfig)
41 | d0 = self.getManMadeDesign()
42 | cost0 = cm.overallCost(d0)
43 |
44 | d1 = d0.copy()
45 | d1.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)
46 | cost1 = cm.overallCost(d1)
47 |
48 | self.assertLess(cost1, cost0)
49 | ## def
50 |
51 | def getManMadeDesign(self, denorm=True):
52 | # create a best design mannually
53 |
54 | d = Design()
55 | d.addCollection(tpccConstants.TABLENAME_ITEM)
56 | d.addCollection(tpccConstants.TABLENAME_WAREHOUSE)
57 | d.addCollection(tpccConstants.TABLENAME_DISTRICT)
58 | d.addCollection(tpccConstants.TABLENAME_CUSTOMER)
59 | d.addCollection(tpccConstants.TABLENAME_STOCK)
60 | d.addCollection(tpccConstants.TABLENAME_ORDERS)
61 | d.addCollection(tpccConstants.TABLENAME_NEW_ORDER)
62 | d.addCollection(tpccConstants.TABLENAME_ORDER_LINE)
63 |
64 | d.addIndex(tpccConstants.TABLENAME_ITEM, ["I_ID"])
65 | d.addIndex(tpccConstants.TABLENAME_WAREHOUSE, ["W_ID"])
66 | d.addIndex(tpccConstants.TABLENAME_DISTRICT, ["D_W_ID", "D_ID"])
67 | d.addIndex(tpccConstants.TABLENAME_CUSTOMER, ["C_W_ID", "C_D_ID","C_ID"])
68 | d.addIndex(tpccConstants.TABLENAME_ORDERS, ["O_W_ID", "O_D_ID", "O_C_ID"])
69 | d.addIndex(tpccConstants.TABLENAME_ORDERS, ["O_W_ID", "O_D_ID", "O_ID"])
70 | d.addIndex(tpccConstants.TABLENAME_STOCK, ["S_W_ID", "S_I_ID"])
71 | d.addIndex(tpccConstants.TABLENAME_NEW_ORDER, ["NO_W_ID", "NO_D_ID", "NO_O_ID"])
72 | d.addIndex(tpccConstants.TABLENAME_ORDER_LINE, ["OL_W_ID", "OL_D_ID", "OL_O_ID"])
73 |
74 | d.addShardKey(tpccConstants.TABLENAME_ITEM, ["I_ID"])
75 | d.addShardKey(tpccConstants.TABLENAME_WAREHOUSE, ["W_ID"])
76 | d.addShardKey(tpccConstants.TABLENAME_DISTRICT, ["W_ID"])
77 | d.addShardKey(tpccConstants.TABLENAME_CUSTOMER, ["W_ID"])
78 | d.addShardKey(tpccConstants.TABLENAME_ORDERS, ["W_ID"])
79 | d.addShardKey(tpccConstants.TABLENAME_STOCK, ["W_ID"])
80 | d.addShardKey(tpccConstants.TABLENAME_NEW_ORDER, ["W_ID"])
81 | d.addShardKey(tpccConstants.TABLENAME_ORDER_LINE, ["W_ID"])
82 |
83 | return d
84 |
85 | if __name__ == '__main__':
86 | unittest.main()
87 | ## MAIN
88 |
--------------------------------------------------------------------------------
/tests/exps/replay/unittest_denormalizer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import os, sys
4 | basedir = os.path.realpath(os.path.dirname(__file__))
5 | sys.path.append(os.path.join(basedir, "../../../src"))
6 | sys.path.append(os.path.join(basedir, "../../../src/search"))
7 | sys.path.append(os.path.join(basedir, "../../../exps/benchmarks/replay"))
8 |
9 | import unittest
10 | from workloadgenerator import CostModelTestCase
11 | from search import Design
12 | from denormalizer import Denormalizer
13 |
14 | class TestDenormalizer(CostModelTestCase):
15 |
16 | def setUp(self):
17 | CostModelTestCase.setUp(self)
18 | self.col_names = [ x for x in self.collections.iterkeys()]
19 | ## DEF
20 |
21 | def testDenormalizer(self):
22 | d = Design()
23 | for col_name in self.col_names:
24 | d.addCollection(col_name)
25 | ## FOR
26 | op_list = self.printOperations()
27 | col_list = self.printAllCollections()
28 | d.setDenormalizationParent("koalas", "apples")
29 |
30 | dn = Denormalizer(self.metadata_db, self.dataset_db, d)
31 | dn.process()
32 |
33 | new_op_list = self.printOperations()
34 | new_col_list = self.printAllCollections()
35 |
36 | self.assertTrue("koalas" not in new_op_list)
37 | self.assertTrue("koalas" not in new_col_list)
38 | ## DEF
39 |
40 | def printOperations(self):
41 | op_list = []
42 | for sess in self.metadata_db.Session.fetch():
43 | for op in sess['operations']:
44 | op_list.append(op['collection'])
45 | ## FOR
46 | ## FOR
47 | return op_list
48 | ## DEF
49 |
50 | def printAllCollections(self):
51 | col_list = [ ]
52 | for col_name in self.dataset_db.collection_names():
53 | col_list.append(col_name)
54 | ## FOR
55 | return col_list
56 | ## DEF
57 |
58 | if __name__ == '__main__':
59 | unittest.main()
60 | ## MAIN
--------------------------------------------------------------------------------
/tests/exps/replay/workloadgenerator.py:
--------------------------------------------------------------------------------
1 |
2 | import os, sys
3 | import random
4 | import time
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../../"))
8 | sys.path.append(os.path.join(basedir, "../"))
9 | # mongodb-d4
10 | try:
11 | from mongodbtestcase import MongoDBTestCase
12 | except ImportError:
13 | from tests import MongoDBTestCase
14 |
15 | from costmodel.state import State
16 | from search import Design
17 | from workload import Session
18 | from util import constants
19 | from inputs.mongodb import MongoSniffConverter
20 |
21 | class CostModelTestCase(MongoDBTestCase):
22 | """
23 | Base test case for cost model components
24 | """
25 |
26 | COLLECTION_NAMES = ["apples", "unexpected", "koalas"]
27 | NUM_DOCUMENTS = 10000
28 | NUM_SESSIONS = 2
29 | NUM_NODES = 8
30 | NUM_INTERVALS = 10
31 |
32 | def setUp(self):
33 | MongoDBTestCase.setUp(self)
34 |
35 | # WORKLOAD
36 | timestamp = time.time()
37 | for i in xrange(CostModelTestCase.NUM_SESSIONS):
38 | sess = self.metadata_db.Session()
39 | sess['session_id'] = i
40 | sess['ip_client'] = "client:%d" % (1234+i)
41 | sess['ip_server'] = "server:5678"
42 | sess['start_time'] = timestamp
43 |
44 | for j in xrange(0, len(CostModelTestCase.COLLECTION_NAMES)):
45 | _id = str(random.random())
46 | queryId = long((i<<16) + j)
47 | queryContent = { }
48 | queryPredicates = { }
49 |
50 | responseContent = {"_id": _id}
51 | responseId = (queryId<<8)
52 |
53 | f_name = "field" + str(random.randint(0, 10))
54 | responseContent[f_name] = random.randint(0, 100)
55 | queryContent[f_name] = responseContent[f_name]
56 | queryPredicates[f_name] = constants.PRED_TYPE_EQUALITY
57 |
58 | queryContent = { constants.REPLACE_KEY_DOLLAR_PREFIX + "query": queryContent }
59 | op = Session.operationFactory()
60 | op['collection'] = CostModelTestCase.COLLECTION_NAMES[j]
61 | op['type'] = constants.OP_TYPE_QUERY
62 | op['query_id'] = queryId
63 | op['query_content'] = [ queryContent ]
64 | op['resp_content'] = [ responseContent ]
65 | op['resp_id'] = responseId
66 | op['predicates'] = queryPredicates
67 | op['query_time'] = timestamp
68 | timestamp += 1
69 | op['resp_time'] = timestamp
70 | sess['operations'].append(op)
71 | ## FOR (ops)
72 | sess['end_time'] = timestamp
73 | timestamp += 2
74 | sess.save()
75 | ## FOR (sess)
76 |
77 | # Use the MongoSniffConverter to populate our metadata
78 | converter = MongoSniffConverter(self.metadata_db, self.dataset_db)
79 | converter.no_mongo_parse = True
80 | converter.no_mongo_sessionizer = True
81 | converter.process()
82 | self.assertEqual(CostModelTestCase.NUM_SESSIONS, self.metadata_db.Session.find().count())
83 |
84 | self.collections = dict([ (c['name'], c) for c in self.metadata_db.Collection.fetch()])
85 | self.assertEqual(len(CostModelTestCase.COLLECTION_NAMES), len(self.collections))
86 |
87 | populated_workload = [c for c in self.metadata_db.Session.fetch()]
88 | self.workload = populated_workload
89 |
90 | # Increase the database size beyond what the converter derived from the workload
91 | for col_name, col_info in self.collections.iteritems():
92 | col_info['doc_count'] = CostModelTestCase.NUM_DOCUMENTS
93 | col_info['avg_doc_size'] = 1024 # bytes
94 | col_info['max_pages'] = col_info['doc_count'] * col_info['avg_doc_size'] / (4 * 1024)
95 | col_info.save()
96 | # print pformat(col_info)
97 |
98 | self.costModelConfig = {
99 | 'max_memory': 1024, # MB
100 | 'skew_intervals': CostModelTestCase.NUM_INTERVALS,
101 | 'address_size': 64,
102 | 'nodes': CostModelTestCase.NUM_NODES,
103 | 'window_size': 3
104 | }
105 |
106 | self.state = State(self.collections, populated_workload, self.costModelConfig)
107 | ## DEF
108 | ## CLASS
--------------------------------------------------------------------------------
/tests/exps/tools/unittest_design_deserializer.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import os
3 | import sys
4 |
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../../src"))
7 | sys.path.append(os.path.join(basedir, "../../../src/search"))
8 | sys.path.append(os.path.join(basedir, "../.."))
9 | sys.path.append(os.path.join(basedir, "../../../exps/tools"))
10 |
11 | from util import constants
12 | from tpcctestcase import TPCCTestCase
13 | from search import Design
14 | from design_deserializer import Deserializer
15 | from costmodel import CostModel
16 | from tpcc import constants as tpccConstants
17 |
18 | class FindExpectedDesign(TPCCTestCase):
19 | """
20 | Try to see if the existing cost model could generate the best desgin we
21 | expected
22 | """
23 | def setUp(self):
24 | TPCCTestCase.setUp(self)
25 | ## DEF
26 |
27 | def testfindExpectedDesign(self):
28 | """Perform the actual search for a design"""
29 | # Generate all the design candidates
30 | # Instantiate cost model
31 | cmConfig = {
32 | 'weight_network': 4,
33 | 'weight_disk': 1,
34 | 'weight_skew': 1,
35 | 'nodes': 10,
36 | 'max_memory': 1024,
37 | 'skew_intervals': 10,
38 | 'address_size': 64,
39 | 'window_size': 500
40 | }
41 | cm = CostModel(self.collections, self.workload, cmConfig)
42 | d0 = self.getManMadeDesign()
43 | print d0
44 | output_design = d0.toJSON()
45 | cost0 = cm.overallCost(d0)
46 | ds = Deserializer(output_design)
47 | d1 = ds.Deserialize()
48 | print d1
49 | cost1 = cm.overallCost(d1)
50 |
51 | self.assertEqual(cost1, cost0)
52 | ## def
53 |
54 | def getManMadeDesign(self, denorm=True):
55 | # create a best design mannually
56 |
57 | d = Design()
58 | d.addCollection(tpccConstants.TABLENAME_ITEM)
59 | d.addCollection(tpccConstants.TABLENAME_WAREHOUSE)
60 | d.addCollection(tpccConstants.TABLENAME_DISTRICT)
61 | d.addCollection(tpccConstants.TABLENAME_CUSTOMER)
62 | d.addCollection(tpccConstants.TABLENAME_STOCK)
63 | d.addCollection(tpccConstants.TABLENAME_ORDERS)
64 | d.addCollection(tpccConstants.TABLENAME_NEW_ORDER)
65 | d.addCollection(tpccConstants.TABLENAME_ORDER_LINE)
66 |
67 | d.addIndex(tpccConstants.TABLENAME_ITEM, ["I_ID"])
68 | d.addIndex(tpccConstants.TABLENAME_WAREHOUSE, ["W_ID"])
69 | d.addIndex(tpccConstants.TABLENAME_DISTRICT, ["D_W_ID", "D_ID"])
70 | d.addIndex(tpccConstants.TABLENAME_CUSTOMER, ["C_W_ID", "C_D_ID","C_ID"])
71 | d.addIndex(tpccConstants.TABLENAME_ORDERS, ["O_W_ID", "O_D_ID", "O_C_ID"])
72 | d.addIndex(tpccConstants.TABLENAME_ORDERS, ["O_W_ID", "O_D_ID", "O_ID"])
73 | d.addIndex(tpccConstants.TABLENAME_STOCK, ["S_W_ID", "S_I_ID"])
74 | d.addIndex(tpccConstants.TABLENAME_NEW_ORDER, ["NO_W_ID", "NO_D_ID", "NO_O_ID"])
75 | d.addIndex(tpccConstants.TABLENAME_ORDER_LINE, ["OL_W_ID", "OL_D_ID", "OL_O_ID"])
76 |
77 | d.addShardKey(tpccConstants.TABLENAME_ITEM, ["I_ID"])
78 | d.addShardKey(tpccConstants.TABLENAME_WAREHOUSE, ["W_ID"])
79 | d.addShardKey(tpccConstants.TABLENAME_DISTRICT, ["W_ID"])
80 | d.addShardKey(tpccConstants.TABLENAME_CUSTOMER, ["W_ID"])
81 | d.addShardKey(tpccConstants.TABLENAME_ORDERS, ["W_ID"])
82 | d.addShardKey(tpccConstants.TABLENAME_STOCK, ["W_ID"])
83 | d.addShardKey(tpccConstants.TABLENAME_NEW_ORDER, ["W_ID"])
84 | d.addShardKey(tpccConstants.TABLENAME_ORDER_LINE, ["W_ID"])
85 |
86 | return d
87 |
88 | if __name__ == '__main__':
89 | unittest.main()
90 | ## MAIN
91 |
--------------------------------------------------------------------------------
/tests/mongodbtestcase.py:
--------------------------------------------------------------------------------
1 |
2 | import os, sys
3 | import unittest
4 |
5 | import logging
6 | logging.basicConfig(level = logging.INFO,
7 | format="%(asctime)s [%(filename)s:%(lineno)03d] %(levelname)-5s: %(message)s",
8 | datefmt="%m-%d-%Y %H:%M:%S",
9 | stream = sys.stdout)
10 |
11 | basedir = os.path.realpath(os.path.dirname(__file__))
12 | sys.path.append(os.path.realpath(os.path.join(basedir, "../libs")))
13 | sys.path.append(os.path.realpath(os.path.join(basedir, "../src")))
14 |
15 | # Third-Party Dependencies
16 | import mongokit
17 |
18 | # mongodb-d4
19 | from catalog import Collection
20 | from workload import Session
21 | from util import constants
22 |
23 | class MongoDBTestCase(unittest.TestCase):
24 | """
25 | Special test case that will automatically setup our connections
26 | for the metadata and workload databases
27 | """
28 |
29 | def setUp(self):
30 | conn = mongokit.Connection()
31 | conn.register([ Collection, Session ])
32 |
33 | # Drop the databases first
34 | # Note that we prepend "test_" in front of the db names
35 | db_prefix = "test_"
36 | for dbName in [constants.METADATA_DB_NAME, constants.DATASET_DB_NAME]:
37 | conn.drop_database(db_prefix + dbName)
38 | self.metadata_db = conn[db_prefix + constants.METADATA_DB_NAME]
39 | self.dataset_db = conn[db_prefix + constants.DATASET_DB_NAME]
40 |
41 | ## DEF
--------------------------------------------------------------------------------
/tests/runTests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh -x
2 |
3 | nosetests --verbose --nocapture $(find . -name "unittest*.py" -type f)
--------------------------------------------------------------------------------
/tests/sanitizer/trace-anon.out:
--------------------------------------------------------------------------------
1 | sniffing... 27017
2 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 admin.$cmd 60 bytes id:0 0
3 | query: { "whatsmyuri" : 1 } ntoreturn: 1 ntoskip: 0
4 | 000.000 - 127.0.0.1:27017 <<-- 127.0.0.1:33082 78 bytes id:10 16 - 0
5 | reply n:1 cursorId: 0
6 | { "you" : XXX_HASH_XXX/15, "ok" : 1 }
7 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 41 bytes id:1 1
8 | delete flags: 0 q: {}
9 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 109 bytes id:2 2
10 | insert: { "_id" : { "$oid" : XXX_HASH_XXX/24 }, "num" : 1, "key" : XXX_HASH_XXX/36 }
11 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 111 bytes id:3 3
12 | insert: { "_id" : { "$oid" : XXX_HASH_XXX/24 }, "num" : 2, "key" : XXX_HASH_XXX/38 }
13 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 114 bytes id:4 4
14 | insert: { "_id" : { "$oid" : XXX_HASH_XXX/24 }, "num" : 3, "key" : XXX_HASH_XXX/39 }
15 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 103 bytes id:5 5
16 | insert: { "_id" : { "$oid" : XXX_HASH_XXX/24 }, "num" : 4, "key" : XXX_HASH_XXX/36 }
17 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 108 bytes id:6 6
18 | update flags:0 q:{ "num" : 3 } o:{ "num" : 3, "key" : XXX_HASH_XXX/32 }
19 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 83 bytes id:7 7
20 | update flags:0 q:{ "num" : 1 } o:{ "num" : 1, "key" : XXX_HASH_XXX/2 }
21 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 88 bytes id:8 8
22 | delete flags: 0 q: { "key" : XXX_HASH_XXX/39 }
23 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 52 bytes id:9 9
24 | delete flags: 0 q: { "key" : XXX_HASH_XXX/2 }
25 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 65 bytes id:a 10
26 | delete flags: 0 q: { "key" : XXX_HASH_XXX/28 }
27 | 000.000 - 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.$cmd 86 bytes id:b 11
28 | query: { "count" : XXX_HASH_XXX/3, "query" : {}, "fields" : {} } ntoreturn: -1 ntoskip: 0
29 | 000.000 - 127.0.0.1:27017 <<-- 127.0.0.1:33082 64 bytes id:11 17 - 11
30 | reply n:1 cursorId: 0
31 | { "n" : 3, "ok" : 1 }
32 |
--------------------------------------------------------------------------------
/tests/sanitizer/trace-clean.out:
--------------------------------------------------------------------------------
1 | sniffing... 27017
2 | 127.0.0.1:33082 -->> 127.0.0.1:27017 admin.$cmd 60 bytes id:0 0
3 | query: { "whatsmyuri" : 1 } ntoreturn: 1 ntoskip: 0
4 | 127.0.0.1:27017 <<-- 127.0.0.1:33082 78 bytes id:10 16 - 0
5 | reply n:1 cursorId: 0
6 | { "you" : "127.0.0.1:33082", "ok" : 1 }
7 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 41 bytes id:1 1
8 | delete flags: 0 q: {}
9 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 109 bytes id:2 2
10 | insert: { "_id" : { "$oid" : "4fbe85545df2ef2def485677" }, "num" : 1, "key" : " \t \tsome string \twith spaces \t" }
11 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 111 bytes id:3 3
12 | insert: { "_id" : { "$oid" : "4fbe85545df2ef2def485678" }, "num" : 2, "key" : "These \"quotes\" should be \"escaped\"" }
13 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 114 bytes id:4 4
14 | insert: { "_id" : { "$oid" : "4fbe85545df2ef2def485679" }, "num" : 3, "key" : "These \"quotes\" should be escaped too." }
15 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 103 bytes id:5 5
16 | insert: { "_id" : { "$oid" : "4fbe85545df2ef2def48567a" }, "num" : 4, "key" : "\n\n newlines \\ \\ \" \" \t \n \"\"" }
17 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 108 bytes id:6 6
18 | update flags:0 q:{ "num" : 3 } o:{ "num" : 3, "key" : "This is \" \" \n \n \t \t a TEST" }
19 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 83 bytes id:7 7
20 | update flags:0 q:{ "num" : 1 } o:{ "num" : 1, "key" : "\"" }
21 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 88 bytes id:8 8
22 | delete flags: 0 q: { "key" : "These \"quotes\" should be escaped too." }
23 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 52 bytes id:9 9
24 | delete flags: 0 q: { "key" : "\"" }
25 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.col 65 bytes id:a 10
26 | delete flags: 0 q: { "key" : "\n\n\n\n\n\t\t\t\t\t\"\"\"\"" }
27 | 127.0.0.1:33082 -->> 127.0.0.1:27017 test_db.$cmd 86 bytes id:b 11
28 | query: { "count" : "col", "query" : {}, "fields" : {} } ntoreturn: -1 ntoskip: 0
29 | 127.0.0.1:27017 <<-- 127.0.0.1:33082 64 bytes id:11 17 - 11
30 | reply n:1 cursorId: 0
31 | { "n" : 3, "ok" : 1 }
32 |
--------------------------------------------------------------------------------
/tests/search/unittest_bbsearch_ShardKeyIterator.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import logging
4 | import unittest
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../src"))
8 |
9 | from search import bbsearch
10 | LOG = logging.getLogger(__name__)
11 |
12 | class TestShardKeyIterator(unittest.TestCase):
13 | def setUp(self):
14 | pass
15 |
16 | def testIfGeneratedAllCombination(self):
17 | expected = [["3", "2", "1"], ["3", "2"], ["3", "1"], ["2", "1"], ["3"], ["2"], ["1"]]
18 | iterator = bbsearch.ShardKeyIterator(["3", "2", "1"], -1)
19 | for combinations in expected:
20 | result = iterator.next()
21 | self.assertEqual(tuple(combinations), tuple(result))
22 | if len(result) == 1 and result[0] == "1":
23 | break
24 |
25 | def testIfGeneratedLimitedCombination(self):
26 | expected = [["3", "2"], ["3"], ["2"]]
27 | iterator = bbsearch.ShardKeyIterator(["3", "2", "1"], 2)
28 | for combinations in expected:
29 | result = iterator.next()
30 | self.assertEqual(tuple(combinations), tuple(result))
31 | if len(result) == 0 and result[0] == "2":
32 | break
33 |
34 | if __name__ == '__main__':
35 | unittest.main()
36 |
--------------------------------------------------------------------------------
/tests/search/unittest_findExpectedDesign.py:
--------------------------------------------------------------------------------
1 |
2 | import unittest
3 | import os
4 | import sys
5 |
6 | basedir = os.path.realpath(os.path.dirname(__file__))
7 | sys.path.append(os.path.join(basedir, "../../src"))
8 | sys.path.append(os.path.join(basedir, "../../src/search"))
9 | sys.path.append(os.path.join(basedir, "../"))
10 |
11 | from util import configutil
12 | from util import constants
13 |
14 | from tpcctestcase import TPCCTestCase
15 | from ConfigParser import RawConfigParser
16 | from search.designer import Designer
17 | from search import Design
18 | from designcandidates import DesignCandidates
19 | from initialdesigner import InitialDesigner
20 | from lnsdesigner import LNSDesigner
21 | from costmodel import CostModel
22 | from tpcc import constants as tpccConstants
23 | from search import bbsearch
24 |
25 | LNS_RUN_TIME = 2 * 60 * 60 # seconds
26 |
27 | class FindExpectedDesign(TPCCTestCase):
28 | """
29 | Try to see if the existing cost model could generate the best desgin we
30 | expected
31 | """
32 | def setUp(self):
33 | TPCCTestCase.setUp(self)
34 |
35 | config = RawConfigParser()
36 | configutil.setDefaultValues(config)
37 |
38 | self.designer = Designer(config, self.metadata_db, self.dataset_db)
39 | self.dc = self.designer.generateDesignCandidates(self.collections, self.workload)
40 | self.assertIsNotNone(self.dc)
41 |
42 | # Make sure that we don't have any invalid candidate keys
43 | for col_name in self.collections.iterkeys():
44 | for index_keys in self.dc.indexKeys[col_name]:
45 | for key in index_keys:
46 | assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \
47 | "Unexpected candidate key '%s.%s'" % (col_name, key)
48 | ## FOR
49 |
50 | ## DEF
51 |
52 | def outtestfindExpectedDesign(self):
53 | """Perform the actual search for a design"""
54 | # Generate all the design candidates
55 | # Instantiate cost model
56 | cmConfig = {
57 | 'weight_network': 4,
58 | 'weight_disk': 1,
59 | 'weight_skew': 1,
60 | 'nodes': 10,
61 | 'max_memory': 1024,
62 | 'skew_intervals': 10,
63 | 'address_size': 64,
64 | 'window_size': 500
65 | }
66 | cm = CostModel(self.collections, self.workload, cmConfig)
67 |
68 | initialDesign = InitialDesigner(self.collections, self.workload, None).generate()
69 | upper_bound = cm.overallCost(initialDesign)
70 | print "init solution: ", initialDesign
71 | print "init solution cost: ", upper_bound
72 | collectionNames = [c for c in self.collections]
73 |
74 | dc = self.dc.getCandidates(collectionNames)
75 | print "candidates: ", dc
76 | ln = LNSDesigner(self.collections, \
77 | self.dc, \
78 | self.workload, \
79 | None, \
80 | cm, \
81 | initialDesign, \
82 | upper_bound, \
83 | LNS_RUN_TIME)
84 | solution = ln.solve()
85 | print "Best cost: ", ln.bestCost
86 | print "solution: ", solution
87 | ## DEF
88 |
89 | if __name__ == '__main__':
90 | unittest.main()
91 | ## MAIN
92 |
--------------------------------------------------------------------------------
/tests/search/unittest_initialdesigner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | import random
6 | import unittest
7 | import logging
8 | from pprint import pprint
9 |
10 | basedir = os.path.realpath(os.path.dirname(__file__))
11 | sys.path.append(os.path.join(basedir, "../"))
12 |
13 | # mongodb-d4
14 | from tpcctestcase import TPCCTestCase
15 | from search import Design
16 | from workload import Session
17 | import catalog
18 | from search import InitialDesigner
19 | from util import constants, configutil
20 |
21 | class TestInitialDesigner(TPCCTestCase):
22 |
23 | def setUp(self):
24 | TPCCTestCase.setUp(self)
25 | self.config = configutil.makeDefaultConfig()
26 | self.designer = InitialDesigner(self.collections, self.workload, self.config)
27 | self.col_keys = self.designer.generateCollectionHistograms()
28 | self.design = Design()
29 | map(self.design.addCollection, self.col_keys.iterkeys())
30 | ## DEF
31 |
32 | def testCheckForInvalidKeys(self):
33 | d = self.designer.generate()
34 | self.assertIsNotNone(d)
35 |
36 | # Make sure that we don't have any invalid keys
37 | for col_name in d.getCollections():
38 | for index_keys in d.getIndexes(col_name):
39 | for key in index_keys:
40 | assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \
41 | "Invalid index key '%s.%s'" % (col_name, key)
42 | ## FOR
43 | for key in d.getShardKeys(col_name):
44 | assert not key.startswith(constants.REPLACE_KEY_DOLLAR_PREFIX), \
45 | "Invalid shard key '%s.%s'" % (col_name, key)
46 | ## FOR
47 | ## DEF
48 |
49 | def testSelectShardingKeys(self):
50 | # Select on set of keys at random and increase its occurence
51 | # in the histogram so that we will pick it
52 | expected = { }
53 | for col_name, h in self.col_keys.iteritems():
54 | keys = random.choice(h.keys())
55 | h.put(keys, 999999)
56 | expected[col_name] = keys
57 |
58 | self.designer.__selectShardingKeys__(self.design, self.col_keys)
59 |
60 | # Then check to make sure it picked what we expected it to
61 | for col_name in self.col_keys.iterkeys():
62 | shard_keys = self.design.getShardKeys(col_name)
63 | self.assertIsNotNone(shard_keys)
64 | self.assertIsInstance(shard_keys, tuple)
65 | self.assertEquals(expected[col_name], shard_keys)
66 | #print self.design
67 | ## DEF
68 |
69 | def testSelectIndexKeys(self):
70 | # Select on set of keys at random and increase its occurence
71 | # in the histogram so that we will pick it
72 | expected = { }
73 | for col_name, h in self.col_keys.iteritems():
74 | keys = random.choice(h.keys())
75 | h.put(keys, 999999)
76 | expected[col_name] = keys
77 |
78 | node_memory = self.config.get(configutil.SECT_CLUSTER, "node_memory")
79 | self.designer.__selectIndexKeys__(self.design, self.col_keys, node_memory)
80 | #print self.design
81 |
82 | # Then check to make sure it picked what we expected it to
83 | for col_name in self.col_keys.iterkeys():
84 | index_keys = self.design.getIndexKeys(col_name)
85 | self.assertIsNotNone(index_keys)
86 | self.assertIsInstance(index_keys, list)
87 | # FIXME self.assertEquals(expected[col_name], shard_keys)
88 | ## DEF
89 |
90 | ## CLASS
91 |
92 | if __name__ == '__main__':
93 | unittest.main()
94 | ## MAIN
--------------------------------------------------------------------------------
/tests/search/unittest_lnsdesigner.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | import unittest
6 |
7 | basedir = os.path.realpath(os.path.dirname(__file__))
8 | sys.path.append(os.path.join(basedir, "../../src"))
9 |
10 | from search.lnsdesigner import LNSDesigner
11 |
12 | class TestSearchSpace (unittest.TestCase) :
13 |
14 | def setUp(self):
15 | self.collections = { }
16 | for i in xrange(100):
17 | self.collections["key" + str(i)] = i
18 | pass
19 | ## DEF
20 |
21 | def testRandomCollectionGenerator(self):
22 | """
23 | Check whether RandomCollectionGenerator can generate random collections
24 | """
25 | rcg = LNSDesigner.RandomCollectionGenerator(self.collections)
26 | map_round_to_set = { }
27 | for j in xrange(3):
28 | map_round_to_set[j] = rcg.getRandomCollections(3)
29 | ## FOR
30 |
31 | value_list = [val for val in map_round_to_set.itervalues()]
32 |
33 | self.assertNotEqual(sorted(value_list[0]), sorted(value_list[1]))
34 | self.assertNotEqual(sorted(value_list[0]), sorted(value_list[2]))
35 | self.assertNotEqual(sorted(value_list[1]), sorted(value_list[2]))
36 | ## DEF
37 | ## CLASS
38 |
39 | if __name__ == '__main__':
40 | unittest.main()
41 | ## MAIN
--------------------------------------------------------------------------------
/tests/search/unittest_utilmethods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import unittest
5 | import itertools
6 | from search import *
7 |
8 | class TestUtilMethods (unittest.TestCase):
9 |
10 | def setUp(self):
11 | pass
12 |
13 | def testBuildLoadingList(self) :
14 | # Denormalization Tree
15 | # A
16 | # / \
17 | # B C
18 | # |
19 | # D
20 | expected = [
21 | ['A'], ['B', 'C'], ['D']
22 | ]
23 |
24 | d = design.Design()
25 | d.addCollections(itertools.chain(*expected))
26 | d.setDenormalizationParent('B', 'A')
27 | d.setDenormalizationParent('C', 'A')
28 | d.setDenormalizationParent('D', 'B')
29 | print d
30 |
31 | loadOrder = utilmethods.buildLoadingList(d)
32 | print loadOrder
33 | self.assertNotEqual(loadOrder, None)
34 |
35 | # Go through each round and pop out collections
36 | # as we simulate them being loaded
37 | for loadRound in expected:
38 | while len(loadRound) > 0:
39 | collection = loadOrder.pop(0)
40 | self.assertNotEqual(collection, None)
41 | self.assertTrue(collection in loadRound)
42 | loadRound.remove(collection)
43 | ## WHILE
44 | ## FOR
45 |
46 | # Make sure that we processed all of our collections
47 | self.assertEqual(0, len(loadOrder))
48 |
49 | ## DEF
50 |
51 | ## CLASS
52 |
53 | if __name__ == '__main__':
54 | unittest.main()
55 | ## MAIN
--------------------------------------------------------------------------------
/tests/util/unittest_configutil.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../src"))
7 |
8 | import unittest
9 | from pprint import pprint, pformat
10 |
11 | from util import configutil
12 |
13 | class TestConfigUtil(unittest.TestCase):
14 |
15 | def setUp(self):
16 | pass
17 |
18 | def testMakeDefaultConfig(self):
19 | c = configutil.makeDefaultConfig()
20 | self.assertIsNotNone(c)
21 | for sect in configutil.ALL_SECTIONS:
22 | self.assertIn(sect, c.sections())
23 | for key, desc, default in configutil.DEFAULT_CONFIG[sect]:
24 | self.assertIn(key, c.options(sect))
25 | self.assertEqual(default, c.get(sect, key))
26 | ## DEF
27 |
28 | ## CLASS
29 |
30 | if __name__ == '__main__':
31 | unittest.main()
32 | ## MAIN
--------------------------------------------------------------------------------
/tests/util/unittest_histogram.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | import string
6 | import random
7 | import unittest
8 | from pprint import pprint, pformat
9 |
10 | basedir = os.path.realpath(os.path.dirname(__file__))
11 | sys.path.append(os.path.join(basedir, "../../src"))
12 | from util.histogram import Histogram
13 |
14 | class TestHistogram(unittest.TestCase):
15 |
16 | def setUp(self):
17 | pass
18 |
19 | def testPickle(self):
20 | h = Histogram()
21 | letters = [ x for x in string.letters ] + ["-"]
22 |
23 | for i in xrange(0, 100):
24 | key = ""
25 | for x in xrange(0, 10):
26 | key += random.choice(letters)
27 | assert len(key) > 0
28 |
29 | h.put(key, delta=random.randint(1, 10))
30 | assert h[key] > 0
31 | ## FOR
32 |
33 | # Serialize
34 | import pickle
35 | p = pickle.dumps(h, -1)
36 | assert p
37 |
38 | # Deserialize
39 | clone = pickle.loads(p)
40 | assert clone
41 |
42 | for key in h.keys():
43 | self.assertEquals(h[key], clone[key])
44 | ## FOR
45 | self.assertEquals(h.getSampleCount(), clone.getSampleCount())
46 | self.assertEquals(sorted(h.getMinCountKeys()), sorted(clone.getMinCountKeys()))
47 | ## DEF
48 |
49 | ## CLASS
50 |
51 | if __name__ == '__main__':
52 | unittest.main()
53 | ## MAIN
--------------------------------------------------------------------------------
/tests/util/unittest_mathutil.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../src"))
7 |
8 | import unittest
9 | from pprint import pprint, pformat
10 |
11 | from util import mathutil
12 |
13 | class TestMathUtil(unittest.TestCase):
14 |
15 | def setUp(self):
16 | pass
17 |
18 | def testPercentile(self):
19 | data = [
20 | (range(10), 0.25, 2.25),
21 | (range(10), 0.75, 6.75),
22 | (range(10), 0.50, 4.5),
23 | (range(11), 0.50, 5)
24 | ]
25 | for values, p, expected in data:
26 | actual = mathutil.percentile(values, p)
27 | self.assertEqual(expected, actual)
28 | ## DEF
29 |
30 | ## CLASS
31 |
32 | if __name__ == '__main__':
33 | unittest.main()
34 | ## MAIN
--------------------------------------------------------------------------------
/tests/util/unittest_utilmethods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../src"))
7 |
8 | import unittest
9 | from pprint import pprint, pformat
10 |
11 | import util
12 |
13 | class TestUtilMethods (unittest.TestCase):
14 |
15 | def setUp(self):
16 | pass
17 |
18 | def getAllKeys(self, d, keys=None):
19 | if keys == None: keys = []
20 | for k, v in d.iteritems():
21 | if not k in keys: keys.append(k)
22 | if type(v) == dict:
23 | self.getAllKeys(v, keys)
24 | return keys
25 | ## DEF
26 |
27 | def testEscapeFieldNames(self):
28 | content = [
29 | {'$query': {'_id': '1cba73b8a555ba442a3630ccf735dffd/14'}},
30 | {'$query': {'_id': {'$in': []}}},
31 | {'count': '107f3bf172abf9dae6458f1dbb0d4ad6/11',
32 | 'query': {'md5': {'$in': ['c3117f341b734d3ce6e71608480de82d/34']}}},
33 | {'$query': {'foo.bar': 1234}},
34 | ]
35 |
36 | for i in xrange(0, len(content)):
37 | orig = content[i]
38 |
39 | escaped = util.escapeFieldNames(content[i])
40 | self.assertNotEqual(escaped, None)
41 | keys = self.getAllKeys(escaped)
42 | for k in keys:
43 | self.assertFalse(k.startswith('$'), pformat(escaped))
44 | self.assertEqual(-1, k.find("."))
45 | print pformat(escaped)
46 | ## FOR
47 | ## DEF
48 |
49 | ## CLASS
50 |
51 | if __name__ == '__main__':
52 | unittest.main()
53 | ## MAIN
--------------------------------------------------------------------------------
/tests/workload/unittest_ophasher.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | import os, sys
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../src"))
7 |
8 | import unittest
9 |
10 | from util import constants
11 | from workload.ophasher import OpHasher
12 |
13 | class TestOpHasher (unittest.TestCase):
14 |
15 | def setUp(self):
16 | self.hasher = OpHasher()
17 | pass
18 |
19 | def genQuery(self, query):
20 | return [ {constants.REPLACE_KEY_DOLLAR_PREFIX + "query": query} ]
21 |
22 | def genUpdate(self, query, update):
23 | return [ query, update ]
24 |
25 | def testHashQuery(self):
26 | op = {
27 | "collection": u'ABC',
28 | "query_content": self.genQuery({"a": 2}),
29 | "type": "$query",
30 | }
31 | h0 = self.hasher.hash(op)
32 | self.assertNotEqual(h0, None)
33 |
34 | op["query_content"] = self.genQuery({"a": 3})
35 | h1 = self.hasher.hash(op)
36 | self.assertEqual(h0, h1)
37 |
38 | op["query_content"] = self.genQuery({"a": {"$all": [2, 3]}})
39 | h2 = self.hasher.hash(op)
40 | self.assertNotEqual(h0, h2)
41 | ## DEF
42 |
43 | def testComplexQuery(self):
44 | content = {u'_id': u'7df2cdb0268fe84ad602e228d75f4812/108',
45 | u'cid': {u'#oid': u'310794ef49b9b02c7f29b1ff64c6f7b3/26'},
46 | u'd': u'b34918b94d030d5b288053f08258f1c9/10',
47 | u'g': u'5e3f1e67d663a535fe0ceeab07dd0e12/12',
48 | u'hid': u'd259f04f68e37fdebff7c55b67a04fb7/34',
49 | u'hy': {u'0': {u'n': 0, u't': 0},
50 | u'1': {u'n': 0, u't': 0},
51 | u'10': {u'n': 0, u't': 0},
52 | u'11': {u'n': 0, u't': 0},
53 | u'12': {u'n': 0, u't': 0},
54 | u'13': {u'n': 0, u't': 0},
55 | u'14': {u'n': 0, u't': 0},
56 | u'15': {u'n': 0, u't': 0},
57 | u'16': {u'n': 0, u't': 0},
58 | u'17': {u'n': 0, u't': 0},
59 | u'18': {u'n': 0, u't': 0},
60 | u'19': {u'n': 0, u't': 0},
61 | u'2': {u'n': 0, u't': 0},
62 | u'20': {u'n': 0, u't': 0},
63 | u'21': {u'n': 0, u't': 0},
64 | u'22': {u'n': 0, u't': 0},
65 | u'23': {u'n': 0, u't': 0},
66 | u'3': {u'n': 0, u't': 0},
67 | u'4': {u'n': 0, u't': 0},
68 | u'5': {u'n': 0, u't': 0},
69 | u'6': {u'n': 0, u't': 0},
70 | u'7': {u'n': 0, u't': 0},
71 | u'8': {u'n': 0, u't': 0},
72 | u'9': {u'n': 0, u't': 0}},
73 | u'i': u'22922d9f495e1502e3af3dac1a8a4a8b/22'}
74 | op = {
75 | "collection": u'ABC',
76 | "query_content": self.genQuery(content),
77 | "type": "$query",
78 | }
79 | h0 = self.hasher.hash(op)
80 | self.assertNotEqual(h0, None)
81 | ## DEF
82 |
83 | def testHashUpdate(self):
84 | whereClause = {"u_id": 123, "i_id": 456}
85 | updateClause = {"rating": 999}
86 |
87 | op = {
88 | "collection": u'ABC',
89 | "query_content": self.genUpdate(whereClause, updateClause),
90 | "type": "$update",
91 | }
92 | h0 = self.hasher.hash(op)
93 | self.assertNotEqual(h0, None)
94 |
95 | newWhere = dict(whereClause.items() + [("XXX", 123)])
96 | op["query_content"] = self.genUpdate(newWhere, updateClause)
97 | h1 = self.hasher.hash(op)
98 | self.assertNotEqual(h0, h1)
99 |
100 | newUpdate = dict(updateClause.items() + [("XXX", 123)])
101 | op["query_content"] = self.genUpdate(whereClause, newUpdate)
102 | h2 = self.hasher.hash(op)
103 | self.assertNotEqual(h0, h2)
104 | ## DEF
105 |
106 | ## CLASS
107 |
108 | if __name__ == '__main__':
109 | unittest.main()
110 | ## MAIN
--------------------------------------------------------------------------------
/tests/workload/unittest_utilmethods.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import os, sys
4 |
5 | basedir = os.path.realpath(os.path.dirname(__file__))
6 | sys.path.append(os.path.join(basedir, "../../src"))
7 |
8 | import unittest
9 |
10 | import workload
11 | from util import constants
12 |
13 | class TestUtilMethods(unittest.TestCase):
14 |
15 | def testGetReferencedFields(self):
16 | op = {
17 | 'collection': 'blah',
18 | 'predicates': { },
19 | 'query_aggregate': True,
20 | 'query_content': [ ],
21 | 'resp_content': [{'n': 16, 'ok': 1}],
22 | 'type': constants.OP_TYPE_QUERY,
23 | }
24 | expected = set()
25 | for i in xrange(4):
26 | keyName = 'key%02d' % i
27 | for ii in xrange(10):
28 | op['query_content'].append({"#query": {keyName: {"#gt": i*ii}}})
29 | expected.add(keyName)
30 | op['predicates'][keyName] = constants.PRED_TYPE_RANGE
31 | expected = sorted(expected)
32 | #print "EXPECTED:", expected
33 |
34 | fields = workload.getReferencedFields(op)
35 | #print "FIELDS:", fields
36 | self.assertIsNotNone(fields)
37 | self.assertIsInstance(fields, tuple)
38 | self.assertEquals(len(expected), len(fields))
39 |
40 | for i in xrange(len(expected)):
41 | self.assertEquals(expected[i], fields[i])
42 | ## FOR
43 | ## DEF
44 |
45 | def testIsOpRegex(self):
46 | op = {
47 | 'collection': 'blah',
48 | 'predicates': {'_id': constants.PRED_TYPE_REGEX},
49 | 'query_aggregate': True,
50 | 'query_content': [
51 | {'#query': {'_id': {'#options': 'XXXXXXX',
52 | '#regex': 'YYYYY'}},
53 | 'count': 'site.songs',
54 | 'fields': None}],
55 | 'query_group': None,
56 | 'query_hash': 3563430808431869716L,
57 | 'query_id': 579750519L,
58 | 'query_limit': -1,
59 | 'query_offset': 0,
60 | 'query_size': 125,
61 | 'query_time': 1338410992.894204,
62 | 'resp_content': [{'n': 16, 'ok': 1}],
63 | 'resp_id': 108641633L,
64 | 'resp_size': 64,
65 | 'resp_time': 1338410992.911907,
66 | 'type': constants.OP_TYPE_QUERY,
67 | 'update_multi': None,
68 | 'update_upsert': None
69 | }
70 |
71 | ret = workload.isOpRegex(op)
72 | self.assertTrue(ret)
73 |
74 | ## DEF
75 |
76 |
77 | ## CLASS
78 |
79 | if __name__ == '__main__':
80 | unittest.main()
81 | ## MAIN
--------------------------------------------------------------------------------
/tests/workload/unittest_workloadcombinerwithtpcc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import os, sys
4 | basedir = os.path.realpath(os.path.dirname(__file__))
5 | sys.path.append(os.path.join(basedir, "../search"))
6 | sys.path.append(os.path.join(basedir, ".."))
7 | sys.path.append(os.path.join(basedir, "../../src"))
8 |
9 | import unittest
10 | from workload.workloadcombiner import WorkloadCombiner
11 | from tpcctestcase import TPCCTestCase as CostModelTestCase
12 | from costmodel.disk import DiskCostComponent
13 | from search import Design
14 | from tpcc import constants as tpccConstants
15 |
16 | class TestWorkloadCombiner(CostModelTestCase):
17 |
18 | def setUp(self):
19 | CostModelTestCase.setUp(self)
20 | self.cm = DiskCostComponent(self.state)
21 | self.col_names = [ x for x in self.collections.iterkeys()]
22 | ## DEF
23 |
24 | def testQueriesCombination(self):
25 | """Test if the total number of queries are reduced"""
26 | original_number_of_queries = 0
27 | for sess in self.workload:
28 | for op in sess["operations"]:
29 | original_number_of_queries += 1
30 |
31 | print "orignal number of queries: " + str(original_number_of_queries)
32 |
33 | # Initialize a combiner
34 | combiner = WorkloadCombiner(self.col_names, self.workload)
35 |
36 | # initialize a design with denormalization
37 | d = Design()
38 | for col_name in self.collections.iterkeys():
39 | d.addCollection(col_name)
40 |
41 | d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)
42 |
43 | combinedWorkload = combiner.process(d)
44 |
45 | number_of_queries_from_combined_workload = 0
46 | for sess in combinedWorkload:
47 | for op in sess["operations"]:
48 | number_of_queries_from_combined_workload += 1
49 |
50 | print "number of queries after query combination: " + str(number_of_queries_from_combined_workload)
51 |
52 | self.assertGreater(original_number_of_queries, number_of_queries_from_combined_workload)
53 |
54 | def testDiskCostNotChangedAfterQueryCombination(self):
55 | """Disk cost should not be changed after query combination"""
56 | d = Design()
57 | d = Design()
58 | for col_name in self.collections.iterkeys():
59 | d.addCollection(col_name)
60 |
61 | cost0 = self.cm.getCost(d)
62 | print "cost0 " + str(cost0)
63 |
64 | # Initialize a combiner
65 | combiner = WorkloadCombiner(self.col_names, self.workload)
66 |
67 | # initialize a design with denormalization
68 | d = Design()
69 | d = Design()
70 | for col_name in self.collections.iterkeys():
71 | d.addCollection(col_name)
72 | self.state.invalidateCache(col_name)
73 |
74 | d.setDenormalizationParent(tpccConstants.TABLENAME_ORDER_LINE, tpccConstants.TABLENAME_ORDERS)
75 |
76 | combinedWorkload = combiner.process(d)
77 | self.state.updateWorkload(combinedWorkload)
78 |
79 | self.cm.reset()
80 | self.cm.state.reset()
81 | cost1 = self.cm.getCost(d)
82 |
83 | print "cost1 " + str(cost1)
84 |
85 | self.assertEqual(cost0, cost1)
86 | ## CLASS
87 |
88 | if __name__ == '__main__':
89 | unittest.main()
90 | ## MAIN
91 |
--------------------------------------------------------------------------------