├── README ├── adb.py ├── adisp.py ├── mysql_benchmark.py └── threadpool.py /README: -------------------------------------------------------------------------------- 1 | This is a simple benchmark for various asynchronous Python MySQL 2 | client libraries. 3 | 4 | The client libraries tested here are: 5 | 6 | - Twisted's adbapi which uses the MySQL client library written in C. 7 | - the pure Python txMySQL asynchronous client library 8 | - my own adb.py interface using tornado and adisp 9 | 10 | The adbapi and txMySQL libraries assume they're running on the Twisted 11 | framework. The benchmarks allow you to run different reactor 12 | implementations: 13 | 14 | - Twisted's default reactor 15 | - the Tornado-based reactor for Twisted 16 | 17 | The adb.py library is designed to run directly on top of Tornado and 18 | uses adisp.py to make asynchronous programming a lot easier. 19 | 20 | Here is how you write asynchronous database code with the adb and 21 | adisp modules: 22 | 23 | from adisp import process 24 | from adb import Database 25 | 26 | def __init__(self): 27 | self.adb = Database(driver="psycopg2", 28 | host='DATABASE_HOST', 29 | database='DATABASE_DB', 30 | user='DATABASE_USER', 31 | password='DATABASE_PASSWD', 32 | num_threads=3, 33 | tx_connection_pool_size=2, 34 | queue_timeout=0.001) 35 | 36 | 37 | @process 38 | def someFunctionInvokedFromIOLoop(self): 39 | # Drop the table if it exists 40 | yield self.adb.runOperation("drop table if exists benchmark") 41 | # Create a table to insert the data into 42 | yield self.adb.runOperation(""" 43 | create table benchmark ( 44 | userid int not null primary key, 45 | data VARCHAR(100) 46 | ); 47 | """) 48 | rows_to_insert = 100000 49 | # Insert some rows in parallel 50 | start_time = time.time() 51 | stmts = [] 52 | for i in xrange(rows_to_insert): 53 | stmts.append( 54 | ("insert into benchmark (userid, data) values (%s, %s)", 55 | (i, i))) 56 | numrows = yield map(self.adb.runOperation, stmts) 57 | end_time = time.time() 58 | 59 | rows = yield self.adb.runQuery("select count(*) from benchmark") 60 | print 'inserted %s records, time taken = %s seconds' % \ 61 | (rows, end_time - start_time) 62 | 63 | You can also use transactions: 64 | 65 | @process 66 | def transactions(self): 67 | txId = yield self.adb.beginTransaction() 68 | yield self.adb.runOperation( 69 | "insert into mytable (userid, data) values (%s, %s)", 70 | (1, "test"), 71 | txId) 72 | yield self.adb.commitTransaction(txId) 73 | 74 | To rollback a transaction, use rollbackTransaction(txId) instead of 75 | commitTransaction(). 76 | 77 | 78 | 79 | The command line options for the benchmark are the following: 80 | 81 | --db The database to use 82 | --dbhost Database host 83 | --dbpasswd Database user's password 84 | --dbuser Database user to use 85 | --pool_size Database connection pool size 86 | --use_adb Use adb.py database module 87 | --use_adbapi Use twisted's adbapi module 88 | --use_tornado Use tornado twisted reactor instead of twisted's reactor 89 | --use_txmysql Use txMySQL database module. Only works with Twisted and 90 | MySQL 91 | 92 | Below are some benchmark results. The benchmark program runs on my 93 | Hackintosh Core i7-2600K 3.4GHz (4 cores 8 threads) running MacOS X 94 | 10.6.7. The database servers were run on the same machine. The 95 | benchmark connects to the databases using TCP sockets instead of Unix 96 | domain sockets. 97 | 98 | The MySQL server version used was 5.1, while Postgres version was 99 | 9.0.4. 100 | 101 | The pool size specified was --pool_size=10 102 | 103 | Here are the times in seconds reported by the program: 104 | 105 | MySQL: 106 | Tornado Twisted 107 | adb 11.79 N/A 108 | adbapi 18.99 19.74 109 | txMySQL 45.19 43.80 (numbers from a previous version of this benchmark) 110 | 111 | 112 | Postgres: 113 | Tornado Twisted 114 | adb 9.03 N/A 115 | adbapi 16.49 17.00 116 | 117 | 118 | 119 | Conclusion 120 | ========== 121 | 122 | The new adb.py module running on top of Tornado is 40% faster than 123 | Twisted's adbapi. As a bonus, in this benchmark the memory usage of 124 | adb was 140MB versus the 300MB used by Twisted's adbapi. 125 | 126 | When used with adisp, programming asynchronous database operations 127 | with adb and Tornado is a real breeze, since you no longer need to 128 | worry about creating callbacks to handle the results from the 129 | database. 130 | 131 | 132 | References 133 | ========== 134 | 135 | The txMySQL implementation tested against is available in the main 136 | trunk at: 137 | 138 | https://github.com/hybridlogic/txMySQL 139 | 140 | The Tornado-based reactor implementation used is available in the main 141 | trunk at: 142 | 143 | https://github.com/facebook/tornado 144 | -------------------------------------------------------------------------------- /adb.py: -------------------------------------------------------------------------------- 1 | # Asynchronous database interface for Tornado with transaction support. 2 | # 3 | # Author: Ovidiu Predescu 4 | # Date: August 2011 5 | 6 | from functools import partial 7 | import psycopg2 8 | from collections import deque 9 | 10 | import tornado.ioloop 11 | 12 | from threadpool import ThreadPool 13 | from adisp import process, async 14 | 15 | class Database: 16 | """Asynchronous database interface. 17 | 18 | The `driver' argument specifies which database to use. Possible 19 | values are: 20 | 21 | MySQLdb - for MySQL 22 | psycopg2 - for Postgres 23 | """ 24 | def __init__(self, 25 | driver=None, 26 | database=None, user=None, password=None, 27 | host='localhost', 28 | ioloop=tornado.ioloop.IOLoop.instance(), 29 | num_threads=10, 30 | tx_connection_pool_size=5, 31 | queue_timeout=1): 32 | if not(driver): 33 | raise ValueError("Missing 'driver' argument") 34 | self._driver = driver 35 | self._database = database 36 | self._user = user 37 | self._password = password 38 | self._host = host 39 | self._threadpool = ThreadPool( 40 | per_thread_init_func=self.create_connection, 41 | per_thread_close_func=self.close_connection, 42 | num_threads=num_threads, 43 | queue_timeout=queue_timeout) 44 | self._ioloop = ioloop 45 | 46 | # Connection pool for transactions 47 | self._connection_pool = [] 48 | for i in xrange(tx_connection_pool_size): 49 | conn = self.create_connection() 50 | self._connection_pool.append(conn) 51 | self._waiting_on_connection = deque() 52 | 53 | def create_connection(self): 54 | """This method is executed in a worker thread. 55 | 56 | Initializes the per-thread state. In this case we create one 57 | database connection per-thread. 58 | """ 59 | if self._driver == "psycopg2": 60 | try: 61 | import psycopg2 62 | conn = psycopg2.connect(database=self._database, 63 | user=self._user, 64 | password=self._password, 65 | host=self._host) 66 | except Exception as ex: 67 | raise ex 68 | elif self._driver == "MySQLdb": 69 | try: 70 | import MySQLdb 71 | conn = MySQLdb.connect(db=self._database, 72 | user=self._user, 73 | passwd=self._password, 74 | host=self._host, 75 | port=3306) 76 | except Exception as ex: 77 | raise ex 78 | else: 79 | raise ValueError("Unknown driver %s" % self._driver) 80 | return conn 81 | 82 | def close_connection(self, conn): 83 | conn.close() 84 | 85 | def stop(self): 86 | self._threadpool.stop() 87 | for conn in self._connection_pool: 88 | conn.close() 89 | 90 | @async 91 | def beginTransaction(self, callback): 92 | """Begins a transaction. Picks up a transaction from the pool 93 | and passes it to the callback. If none is available, adds the 94 | callback to `_waiting_on_connection'. 95 | """ 96 | if self._connection_pool: 97 | conn = self._connection_pool.pop() 98 | callback(conn) 99 | else: 100 | self._waiting_on_connection.append(callback) 101 | 102 | @async 103 | def commitTransaction(self, connection, callback): 104 | self._threadpool.add_task( 105 | partial(self._commitTransaction, connection, callback)) 106 | 107 | def _commitTransaction(self, conn, callback, thread_state=None): 108 | """Invoked in a worker thread. 109 | """ 110 | conn.commit() 111 | self._ioloop.add_callback( 112 | partial(self._releaseConnectionInvokeCallback, conn, callback)) 113 | 114 | @async 115 | def rollbackTransaction(self, connection, callback): 116 | self._threadpool.add_task( 117 | partial(self._rollbackTransaction, connection, callback)) 118 | 119 | def _rollbackTransaction(self, conn, callback, thread_state=None): 120 | """Invoked in a worker thread. 121 | """ 122 | conn.rollback() 123 | self._ioloop.add_callback( 124 | partial(self._releaseConnectionInvokeCallback, conn, callback)) 125 | 126 | def _releaseConnectionInvokeCallback(self, conn, callback): 127 | """Release the connection back in the connection pool and 128 | invoke the callback. Invokes any waiting callbacks before 129 | releasing the connection into the pool. 130 | """ 131 | # First invoke the callback to let the program know we're done 132 | # with the transaction. 133 | callback(conn) 134 | # Now check to see if we have any pending clients. If so pass 135 | # them the newly released connection. 136 | if self._waiting_on_connection: 137 | callback = self._waiting_on_connection.popleft() 138 | callback(conn) 139 | else: 140 | self._connection_pool.append(conn) 141 | 142 | @async 143 | def runQuery(self, query, args=None, conn=None, callback=None): 144 | """Send a SELECT query to the database. 145 | 146 | The callback is invoked with all the rows in the result. 147 | """ 148 | self._threadpool.add_task( 149 | partial(self._query, query, args, conn), callback) 150 | 151 | def _query(self, query, args, conn=None, thread_state=None): 152 | """This method is called in a worker thread. 153 | 154 | Execute the query and return the result so it can be passed as 155 | argument to the callback. 156 | """ 157 | if not conn: 158 | conn = thread_state 159 | cursor = conn.cursor() 160 | cursor.execute(query, args) 161 | rows = cursor.fetchall() 162 | cursor.close() 163 | return rows 164 | 165 | @async 166 | def runOperation(self, stmt, args=None, conn=None, callback=None): 167 | """Execute a SQL statement other than a SELECT. 168 | 169 | The statement is committed immediately. The number of rows 170 | affected by the statement is passed as argument to the 171 | callback. 172 | """ 173 | self._threadpool.add_task( 174 | partial(self._execute, stmt, args, conn), callback) 175 | 176 | def _execute(self, stmt, args, conn=None, thread_state=None): 177 | """This method is called in a worker thread. 178 | 179 | Executes the statement. 180 | """ 181 | # Check if stmt is a tuple. This can happen when we use map() 182 | # with adisp to execute multiple statements in parallel. 183 | if isinstance(stmt, tuple): 184 | args = stmt[1] 185 | stmt = stmt[0] 186 | if not conn: 187 | conn = thread_state 188 | should_commit = True 189 | else: 190 | should_commit = False 191 | cursor = conn.cursor() 192 | cursor.execute(stmt, args) 193 | if should_commit: 194 | conn.commit() 195 | rowcount = cursor.rowcount 196 | cursor.close() 197 | return rowcount 198 | -------------------------------------------------------------------------------- /adisp.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | ''' 3 | Adisp is a library that allows structuring code with asynchronous calls and 4 | callbacks without defining callbacks as separate functions. The code then 5 | becomes sequential and easy to read. The library is not a framework by itself 6 | and can be used in other environments that provides asynchronous working model 7 | (see an example with Tornado server in proxy_example.py). 8 | 9 | Usage: 10 | 11 | ## Organizing calling code 12 | 13 | All the magic is done with Python 2.5 decorators that allow for control flow to 14 | leave a function, do sometihing else for some time and then return into the 15 | calling function with a result. So the function that makes asynchronous calls 16 | should look like this: 17 | 18 | @process 19 | def my_handler(): 20 | response = yield some_async_func() 21 | data = parse_response(response) 22 | result = yield some_other_async_func(data) 23 | store_result(result) 24 | 25 | Each `yield` is where the function returns and lets the framework around it to 26 | do its job. And the code after `yield` is what usually goes in a callback. 27 | 28 | The @process decorator is needed around such a function. It makes it callable 29 | as an ordinary function and takes care of dispatching callback calls back into 30 | it. 31 | 32 | ## Writing asynchronous function 33 | 34 | In the example above functions "some_async_func" and "some_other_async_func" 35 | are those that actually run an asynchronous process. They should follow two 36 | conditions: 37 | 38 | - accept a "callback" parameter with a callback function that they should call 39 | after an asynchronous process is finished 40 | - a callback should be called with one parameter -- the result 41 | - be wrapped in the @async decorator 42 | 43 | The @async decorator makes a function call lazy allowing the @process that 44 | calls it to provide a callback to call. 45 | 46 | Using async with @-syntax is most convenient when you write your own 47 | asynchronous function (and can make your callback parameter to be named 48 | "callback"). But when you want to call some library function you can wrap it in 49 | async in place. 50 | 51 | # call http.fetch(url, callback=callback) 52 | result = yield async(http.fetch) 53 | 54 | # call http.fetch(url, cb=safewrap(callback)) 55 | result = yield async(http.fetch, cbname='cb', cbwrapper=safewrap)(url) 56 | 57 | Here you can use two optional parameters for async: 58 | 59 | - `cbname`: a name of a parameter in which the function expects callbacks 60 | - `cbwrapper`: a wrapper for the callback iself that will be applied before 61 | calling it 62 | 63 | ## Chain calls 64 | 65 | @async function can also be @process'es allowing to effectively chain 66 | asynchronous calls as it can be done with normal functions. In this case the 67 | @async decorator shuold be the outer one: 68 | 69 | @async 70 | @process 71 | def async_calling_other_asyncs(arg, callback): 72 | # .... 73 | 74 | ## Multiple asynchronous calls 75 | 76 | The library also allows to call multiple asynchronous functions in parallel and 77 | get all their result for processing at once: 78 | 79 | @async 80 | def async_http_get(url, callback): 81 | # get url asynchronously 82 | # call callback(response) at the end 83 | 84 | @process 85 | def get_stat(): 86 | urls = ['http://.../', 'http://.../', ... ] 87 | responses = yield map(async_http_get, urls) 88 | 89 | After *all* the asynchronous calls will complete `responses` will be a list of 90 | responses corresponding to given urls. 91 | ''' 92 | from functools import partial 93 | 94 | class CallbackDispatcher(object): 95 | def __init__(self, generator): 96 | self.g = generator 97 | try: 98 | self.call(self.g.next()) 99 | except StopIteration: 100 | pass 101 | 102 | def _send_result(self, results, single): 103 | try: 104 | result = results[0] if single else results 105 | self.call(self.g.send(result)) 106 | except StopIteration: 107 | pass 108 | 109 | def call(self, callers): 110 | single = not hasattr(callers, '__iter__') 111 | if single: 112 | callers = [callers] 113 | self.call_count = len(list(callers)) 114 | results = [None] * self.call_count 115 | if self.call_count == 0: 116 | self._send_result(results, single) 117 | else: 118 | for count, caller in enumerate(callers): 119 | caller(callback=partial(self.callback, results, count, single)) 120 | 121 | def callback(self, results, index, single, arg): 122 | self.call_count -= 1 123 | results[index] = arg 124 | if self.call_count > 0: 125 | return 126 | self._send_result(results, single) 127 | 128 | def process(func): 129 | def wrapper(*args, **kwargs): 130 | CallbackDispatcher(func(*args, **kwargs)) 131 | return wrapper 132 | 133 | def async(func, cbname='callback', cbwrapper=lambda x: x): 134 | def wrapper(*args, **kwargs): 135 | def caller(callback): 136 | kwargs[cbname] = cbwrapper(callback) 137 | return func(*args, **kwargs) 138 | return caller 139 | return wrapper 140 | -------------------------------------------------------------------------------- /mysql_benchmark.py: -------------------------------------------------------------------------------- 1 | # Author: Ovidiu Predescu 2 | # Date: July 2011 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may 5 | # not use this file except in compliance with the License. You may obtain 6 | # a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | # License for the specific language governing permissions and limitations 14 | # under the License. 15 | """ 16 | A simple benchmark for various asynchronous Python MySQL client libraries. 17 | """ 18 | 19 | import sys 20 | import time 21 | 22 | from adisp import process 23 | import adb 24 | 25 | import tornado.options 26 | from tornado.options import define, options 27 | 28 | define("dbhost", default="127.0.0.1", help="Database host") 29 | define("dbuser", default="", help="Database user to use") 30 | define("dbpasswd", default="", help="User's password") 31 | define("db", default="test", help="Database to use") 32 | 33 | define("use_tornado", default=False, 34 | help="Use tornado twisted reactor instead of twisted's reactor") 35 | 36 | define("use_txmysql", default=False, help="Use txMySQL database module") 37 | define("use_adbapi", default=False, help="Use twisted's adbapi module") 38 | define("use_adb", default=False, help="Use our own adb module") 39 | define("use_momoko", default=False, help="Use the momoko module") 40 | 41 | define("use_postgres", default=False, help="Use Postgres with psycopg2") 42 | define("use_mysql", default=False, help="Use MySQL") 43 | 44 | define("pool_size", default=10, help="Database connection pool size") 45 | 46 | class DbBenchmark: 47 | def __init__(self): 48 | self._pool = None 49 | 50 | def run(self): 51 | d = self._createTable() 52 | d.addCallback(self._startBenchmark) 53 | d.addErrback(self._dbError) 54 | 55 | def _dbError(self, error): 56 | print "Error accessing the database: %s" % error 57 | 58 | def _createTable(self): 59 | print 'creating benchmark table' 60 | return self._pool.runOperation( 61 | """drop table if exists benchmark; 62 | create table benchmark( 63 | id INT NOT NULL PRIMARY KEY, 64 | data VARCHAR(100) 65 | ) 66 | """) 67 | 68 | def _startBenchmark(self, ignored): 69 | print "Starting benchmark %s" % self.__class__ 70 | self._start_time = time.time() 71 | self._totalNumInserts = 100000 72 | self._numDone = 0 73 | for i in xrange(self._totalNumInserts): 74 | d = self._doInsert(i) 75 | d.addCallback(self._insertDone) 76 | d.addErrback(self._dbError) 77 | 78 | def _doInsert(self, i): 79 | raise NotImplementedError 80 | 81 | def _insertDone(self, ignored): 82 | self._numDone += 1 83 | if self._numDone == self._totalNumInserts: 84 | d = self._pool.runQuery("select count(*) from benchmark") 85 | d.addCallback(self._allInsertsDone) 86 | d.addErrback(self._dbError) 87 | 88 | def _allInsertsDone(self, results): 89 | self._end_time = time.time() 90 | row = results[0] 91 | num = row[0] 92 | print "inserted %d records, time taken = %f seconds" %\ 93 | (num, (self._end_time - self._start_time)) 94 | reactor.stop() 95 | 96 | class TwistedDbAPI(DbBenchmark): 97 | def __init__(self): 98 | from twisted.enterprise import adbapi 99 | print "Creating twisted adbapi ConnectionPool" 100 | self._pool = adbapi.ConnectionPool(dbapiName="MySQLdb", 101 | host=options.dbhost, 102 | port=3306, 103 | unix_socket='', 104 | user=options.dbuser, 105 | passwd=options.dbpasswd, 106 | db=options.db, 107 | cp_min=options.pool_size, 108 | cp_max=options.pool_size) 109 | self._numRuns = 0 110 | 111 | def _doInsert(self, i): 112 | return self._pool.runOperation( 113 | "insert benchmark (id, data) values (%s, %s)" % (i, i)) 114 | 115 | 116 | class TxMySQL(DbBenchmark): 117 | def __init__(self): 118 | from txmysql import client 119 | print "Creating txMySQL ConnectionPool" 120 | self._pool = client.ConnectionPool(hostname=options.dbhost, 121 | username=options.dbuser, 122 | password=options.dbpasswd, 123 | database=options.db, 124 | num_connections=options.pool_size) 125 | 126 | def _doInsert(self, i): 127 | return self._pool.runOperation( 128 | "insert benchmark(data) values (%d)" % i) 129 | 130 | class TwistedDbPostgresAPI(DbBenchmark): 131 | def __init__(self): 132 | from twisted.enterprise import adbapi 133 | print "Creating twisted adbapi ConnectionPool" 134 | self._pool = adbapi.ConnectionPool(dbapiName="psycopg2", 135 | host=options.dbhost, 136 | user=options.dbuser, 137 | password=options.dbpasswd, 138 | database=options.db, 139 | cp_min=options.pool_size, 140 | cp_max=options.pool_size) 141 | self._numRuns = 0 142 | 143 | def _doInsert(self, i): 144 | return self._pool.runOperation( 145 | "insert into benchmark (id, data) values (%s, %s)" % (i, i)) 146 | 147 | 148 | class AsyncDatabaseBenchmark: 149 | def __init__(self, driver, 150 | database=None, user=None, password=None, host=None): 151 | self.adb = adb.Database(driver=driver, database=database, user=user, 152 | password=password, host=host, 153 | num_threads=options.pool_size, 154 | queue_timeout=0.1) 155 | 156 | def run(self): 157 | self.ioloop = tornado.ioloop.IOLoop.instance() 158 | self.ioloop.add_callback(self.whenRunningCallback) 159 | 160 | @process 161 | def whenRunningCallback(self): 162 | # Drop the table if it exists 163 | yield self.adb.runOperation("drop table if exists benchmark") 164 | yield self.adb.runOperation(""" 165 | create table benchmark ( 166 | userid int not null primary key, 167 | data VARCHAR(100) 168 | ); 169 | """) 170 | rows_to_insert = 100000 171 | # Insert some rows 172 | start_time = time.time() 173 | stmts = [] 174 | for i in xrange(rows_to_insert): 175 | stmts.append( 176 | ("insert into benchmark (userid, data) values (%s, %s)", 177 | (i, i))) 178 | numrows = yield map(self.adb.runOperation, stmts) 179 | end_time = time.time() 180 | 181 | rows = yield self.adb.runQuery("select count(*) from benchmark") 182 | print 'inserted %s records, time taken = %s seconds' % \ 183 | (rows, end_time - start_time) 184 | self.ioloop.stop() 185 | self.adb.stop() 186 | 187 | try: 188 | # Quick hack to test agains momoko: http://momoko.61924.nl/ 189 | import momoko 190 | has_momoko = True 191 | 192 | class MomokoDatabaseBenchmark: 193 | def __init__(self, database=None, user=None, password=None, host=None): 194 | self.adb = momoko.AdispClient({ 195 | 'host': 'localhost', 196 | 'database': database, 197 | 'user': user, 198 | 'password': password, 199 | 'min_conn': 10, 200 | 'max_conn': 10, 201 | 'cleanup_timeout': 10 202 | }) 203 | 204 | def run(self): 205 | self.ioloop = tornado.ioloop.IOLoop.instance() 206 | self.ioloop.add_callback(self.whenRunningCallback) 207 | 208 | @momoko.process 209 | def whenRunningCallback(self): 210 | # Drop the table if it exists 211 | yield self.adb.execute("drop table if exists benchmark") 212 | yield self.adb.execute(""" 213 | create table benchmark ( 214 | userid int not null primary key, 215 | data VARCHAR(100) 216 | ); 217 | """) 218 | rows_to_insert = 100000 219 | # Insert some rows 220 | start_time = time.time() 221 | stmts = [] 222 | for i in xrange(rows_to_insert): 223 | yield self.adb.execute( 224 | "insert into benchmark (userid, data) values (%s, %s)", 225 | (i, i)) 226 | end_time = time.time() 227 | 228 | cursor = yield self.adb.execute("select count(*) from benchmark") 229 | rows = cursor.fetchall() 230 | print 'inserted %s records, time taken = %s seconds' % \ 231 | (rows, end_time - start_time) 232 | self.ioloop.stop() 233 | self.adb.close() 234 | 235 | except ImportError: 236 | has_momoko = False 237 | 238 | if __name__ == "__main__": 239 | tornado.options.parse_command_line() 240 | if options.use_tornado: 241 | import tornado.platform.twisted 242 | tornado.platform.twisted.install() 243 | from twisted.internet import reactor 244 | 245 | benchmark = None 246 | if options.use_adbapi: 247 | if options.use_postgres: 248 | benchmark = TwistedDbPostgresAPI() 249 | elif options.use_mysql: 250 | benchmark = TwistedDbAPI() 251 | elif options.use_txmysql: 252 | # This only works with MySQL 253 | benchmark = TxMySQL() 254 | elif options.use_adb: 255 | if options.use_postgres: 256 | driver = "psycopg2" 257 | elif options.use_mysql: 258 | driver = "MySQLdb" 259 | else: 260 | print 'Use --use_postgres or --use_mysql to specify database.' 261 | sys.exit(1) 262 | benchmark = AsyncDatabaseBenchmark(driver, 263 | database=options.db, 264 | user=options.dbuser, 265 | password=options.dbpasswd, 266 | host=options.dbhost) 267 | elif options.use_momoko: 268 | if not has_momoko: 269 | print 'Could not find momoko in PYTHONPATH' 270 | sys.exit(1) 271 | benchmark = MomokoDatabaseBenchmark(database=options.db, 272 | user=options.dbuser, 273 | password=options.dbpasswd, 274 | host=options.dbhost) 275 | 276 | if benchmark: 277 | benchmark.run() 278 | else: 279 | print 'Could not find a useful combination of options' 280 | print 'At least one of --use_adbapi or --use_txmysql should be '\ 281 | 'specified' 282 | sys.exit(1) 283 | 284 | if options.use_adb or options.use_momoko: 285 | print "Using Tornado IOLoop directly" 286 | tornado.ioloop.IOLoop.instance().start() 287 | else: 288 | reactor.suggestThreadPoolSize(options.pool_size) 289 | print "Using reactor %s" % reactor 290 | reactor.run() 291 | 292 | -------------------------------------------------------------------------------- /threadpool.py: -------------------------------------------------------------------------------- 1 | # Thread pool to be used with Tornado. 2 | # 3 | # Author: Ovidiu Predescu 4 | # Date: August 2011 5 | 6 | import sys 7 | import thread 8 | from threading import Thread 9 | from Queue import Queue, Empty 10 | from functools import partial 11 | import tornado.ioloop 12 | import time 13 | 14 | class ThreadPool: 15 | """Creates a thread pool containing `num_threads' worker threads. 16 | 17 | The caller can execute a task in a worker thread by invoking 18 | add_task(). The `func' argument will be executed by one of the 19 | worker threads as soon as one becomes available. If `func' needs 20 | to take any arguments, wrap the function using functools.partial. 21 | 22 | The caller has the option of specifying a callback to be invoked 23 | on the main thread's IOLoop instance. In a callback is specified 24 | it is passed as argument the return value of `func'. 25 | 26 | You can initialize per-thread state by setting the 27 | `per_thread_init_func'. This function is called before the worker 28 | threads are started and its return value is stored internally by 29 | each thread. This state is then passed as an optional argument to 30 | the `func' function using the `thread_state' named argument. 31 | 32 | Per-thread state is useful if you want to use the thread pool for 33 | database interaction. Create a database connection for each thread 34 | and store them as thread state. 35 | 36 | If you don't use per-thread state, you should define your worker 37 | function like this (add any other arguments when using 38 | functools.partial): 39 | 40 | def func(**kw): 41 | ... 42 | return some-result 43 | 44 | If you plan on using per-thread state, you could use the following 45 | prototype: 46 | 47 | def func(thread_state=None): 48 | ... 49 | return some-result 50 | 51 | To stop the worker threads in the thread pool use the stop() 52 | method. 53 | 54 | The queue_timeout parameter sets the time queue.get() waits for an 55 | object to appear in the queue. The default is 1 second, which is 56 | low enough for interactive usage. It should be lowered to maybe 57 | 0.001 (1ms) to make unittests run fast, and increased when you 58 | expect the thread pool to be rarely stopped (like in a production 59 | environment). 60 | """ 61 | 62 | def __init__(self, 63 | per_thread_init_func=None, 64 | per_thread_close_func=None, 65 | num_threads=10, 66 | queue_timeout=1, 67 | ioloop=tornado.ioloop.IOLoop.instance()): 68 | self._ioloop = ioloop 69 | self._num_threads = num_threads 70 | self._queue = Queue() 71 | self._queue_timeout = queue_timeout 72 | self._threads = [] 73 | self._running = True 74 | for i in xrange(num_threads): 75 | t = WorkerThread(self, per_thread_init_func, per_thread_close_func) 76 | t.start() 77 | self._threads.append(t) 78 | 79 | def add_task(self, func, callback=None): 80 | """Add a function to be invoked in a worker thread.""" 81 | self._queue.put((func, callback)) 82 | 83 | def stop(self): 84 | self._running = False 85 | map(lambda t: t.join(), self._threads) 86 | 87 | class WorkerThread(Thread): 88 | def __init__(self, pool, per_thread_init_func, per_thread_close_func): 89 | Thread.__init__(self) 90 | self._pool = pool 91 | self._per_thread_init_func = per_thread_init_func 92 | self._per_thread_close_func = per_thread_close_func 93 | 94 | def run(self): 95 | if self._per_thread_init_func: 96 | thread_state = self._per_thread_init_func() 97 | else: 98 | thread_state = None 99 | queue = self._pool._queue 100 | queue_timeout = self._pool._queue_timeout 101 | while self._pool._running: 102 | try: 103 | (func, callback) = queue.get(True, queue_timeout) 104 | result = func(thread_state=thread_state) 105 | if callback: 106 | self._pool._ioloop.add_callback(partial(callback, result)) 107 | except Empty: 108 | pass 109 | if self._per_thread_close_func: 110 | self._per_thread_close_func(thread_state) 111 | --------------------------------------------------------------------------------