├── src ├── __init__.py ├── error.py ├── server.py ├── http_common.py ├── client.py └── push_tcp.py ├── setup.py ├── scripts └── proxy.py └── README /src/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Non-blocking HTTP components. 5 | """ 6 | 7 | from client import Client 8 | from server import Server 9 | from push_tcp import run, stop, schedule, now, running 10 | from http_common import dummy, header_dict, get_hdr, \ 11 | safe_methods, idempotent_methods, hop_by_hop_hdrs 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup 4 | 5 | setup(name='nbhttp', 6 | version='0.7.3', 7 | description='Non-blocking HTTP components', 8 | author='Mark Nottingham', 9 | author_email='mnot@mnot.net', 10 | url='http://github.com/mnot/nbhttp/', 11 | download_url='http://github.com/mnot/nbhttp/tarball/nbhttp-0.7.3', 12 | packages=['nbhttp'], 13 | package_dir={'nbhttp': 'src'}, 14 | scripts=['scripts/proxy.py'], 15 | classifiers=[ 16 | 'Development Status :: 4 - Beta', 17 | 'Intended Audience :: Developers', 18 | 'License :: OSI Approved :: MIT License', 19 | 'Programming Language :: Python', 20 | 'Topic :: Internet :: WWW/HTTP', 21 | 'Topic :: Internet :: Proxy Servers', 22 | 'Topic :: Internet :: WWW/HTTP :: HTTP Servers', 23 | 'Topic :: Software Development :: Libraries :: Python Modules', 24 | ] 25 | ) -------------------------------------------------------------------------------- /scripts/proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | A simple HTTP proxy as a demonstration. 5 | """ 6 | 7 | 8 | import sys 9 | try: # run from dist without installation 10 | sys.path.insert(0, "..") 11 | from src import Client, Server, header_dict, run, client, schedule 12 | except ImportError: 13 | from nbhttp import Client, Server, header_dict, run, client, schedule 14 | 15 | # TODO: CONNECT support 16 | # TODO: remove headers nominated by Connection 17 | # TODO: add Via 18 | 19 | class ProxyClient(Client): 20 | read_timeout = 10 21 | connect_timeout = 15 22 | 23 | def proxy_handler(method, uri, req_hdrs, s_res_start, req_pause): 24 | # can modify method, uri, req_hdrs here 25 | def c_res_start(version, status, phrase, res_hdrs, res_pause): 26 | # can modify status, phrase, res_hdrs here 27 | res_body, res_done = s_res_start(status, phrase, res_hdrs, res_pause) 28 | # can modify res_body here 29 | return res_body, res_done 30 | c = ProxyClient(c_res_start) 31 | req_body, req_done = c.req_start(method, uri, req_hdrs, req_pause) 32 | # can modify req_body here 33 | return req_body, req_done 34 | 35 | 36 | if __name__ == "__main__": 37 | import sys 38 | port = int(sys.argv[1]) 39 | server = Server('', port, proxy_handler) 40 | run() -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | 2 | nbhttp - Tools for building non-blocking HTTP components 3 | 4 | 5 | NOTE: This is no longer maintained! See mnot/thor. 6 | 7 | * About nbhttp 8 | 9 | nbhttp is a set of libraries for building non-blocking (a.k.a. asynchronous 10 | or event-driven) HTTP clients, servers and intermediaries. 11 | 12 | Its aims are to expose full HTTP functionality in a conformant manner, with 13 | the maximum potential for performance. 14 | 15 | It is NOT YET SUITABLE FOR PRODUCTION USE. In particular, the code has not 16 | been optimised or benchmarked, nor are there limits on resource usage, logging 17 | or any number of other things that make a practical web server or intermediary 18 | a useful thing. 19 | 20 | However, it's lots of fun to prototype and play with. 21 | 22 | 23 | * Requirements 24 | 25 | nbhttp needs Python 2.5 or greater; see 26 | 27 | Optionally, it will take advantage of the pyevent extension, if installed. 28 | See . 29 | 30 | 31 | * Installation 32 | 33 | If you have setuptools, you can install from the repository: 34 | 35 | > easy_install nbhttp 36 | 37 | Otherwise, download a tarball and install using: 38 | 39 | > python setup.py install 40 | 41 | 42 | * SUPPORT, REPORTING ISSUES AND CONTRIBUTING 43 | 44 | See to give feedback, report issues, and 45 | contribute. 46 | 47 | 48 | * License 49 | 50 | Copyright (c) 2008-2009 Mark Nottingham 51 | 52 | Permission is hereby granted, free of charge, to any person obtaining a copy 53 | of this software and associated documentation files (the "Software"), to deal 54 | in the Software without restriction, including without limitation the rights 55 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 56 | copies of the Software, and to permit persons to whom the Software is 57 | furnished to do so, subject to the following conditions: 58 | 59 | The above copyright notice and this permission notice shall be included in 60 | all copies or substantial portions of the Software. 61 | 62 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 63 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 64 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 65 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 66 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 67 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 68 | THE SOFTWARE. 69 | -------------------------------------------------------------------------------- /src/error.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | errors 5 | """ 6 | 7 | __author__ = "Mark Nottingham " 8 | __copyright__ = """\ 9 | Copyright (c) 2008-2010 Mark Nottingham 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in 19 | all copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 | THE SOFTWARE. 28 | """ 29 | 30 | # General parsing errors 31 | 32 | ERR_CHUNK = { 33 | 'desc': "Chunked encoding error", 34 | } 35 | ERR_EXTRA_DATA = { 36 | 'desc': "Extra data received", 37 | } 38 | 39 | ERR_BODY_FORBIDDEN = { 40 | 'desc': "This message does not allow a body", 41 | } 42 | 43 | ERR_HTTP_VERSION = { 44 | 'desc': "Unrecognised HTTP version", # FIXME: more specific status 45 | } 46 | 47 | ERR_READ_TIMEOUT = { 48 | 'desc': "Read timeout", 49 | } 50 | 51 | ERR_TRANSFER_CODE = { 52 | 'desc': "Unknown request transfer coding", 53 | 'status': ("501", "Not Implemented"), 54 | } 55 | 56 | ERR_WHITESPACE_HDR = { 57 | 'desc': "Whitespace between request-line and first header", 58 | 'status': ("400", "Bad Request"), 59 | } 60 | 61 | ERR_TOO_MANY_MSGS = { 62 | 'desc': "Too many messages to parse", 63 | 'status': ("400", "Bad Request"), 64 | } 65 | 66 | # client-specific errors 67 | 68 | ERR_URL = { 69 | 'desc': "Unsupported or invalid URI", 70 | 'status': ("400", "Bad Request"), 71 | } 72 | ERR_LEN_REQ = { 73 | 'desc': "Content-Length required", 74 | 'status': ("411", "Length Required"), 75 | } 76 | 77 | ERR_CONNECT = { 78 | 'desc': "Connection closed", 79 | 'status': ("504", "Gateway Timeout"), 80 | } 81 | 82 | # server-specific errors 83 | 84 | ERR_HOST_REQ = { 85 | 'desc': "Host header required", 86 | } 87 | -------------------------------------------------------------------------------- /src/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Non-Blocking HTTP Server 5 | 6 | This library allow implementation of an HTTP/1.1 server that is 7 | "non-blocking," "asynchronous" and "event-driven" -- i.e., it achieves very 8 | high performance and concurrency, so long as the application code does not 9 | block (e.g., upon network, disk or database access). Blocking on one request 10 | will block the entire server. 11 | 12 | Instantiate a Server with the following parameters: 13 | - host (string) 14 | - port (int) 15 | - req_start (callable) 16 | 17 | req_start is called when a request starts. It must take the following 18 | arguments: 19 | - method (string) 20 | - uri (string) 21 | - req_hdrs (list of (name, value) tuples) 22 | - res_start (callable) 23 | - req_body_pause (callable) 24 | and return: 25 | - req_body (callable) 26 | - req_done (callable) 27 | 28 | req_body is called when part of the request body is available. It must take 29 | the following argument: 30 | - chunk (string) 31 | 32 | req_done is called when the request is complete, whether or not it contains a 33 | body. It must take the following argument: 34 | - err (error dictionary, or None for no error) 35 | 36 | Call req_body_pause when you want the server to temporarily stop sending the 37 | request body, or restart. You must provide the following argument: 38 | - paused (boolean; True means pause, False means unpause) 39 | 40 | Call res_start when you want to start the response, and provide the following 41 | arguments: 42 | - status_code (string) 43 | - status_phrase (string) 44 | - res_hdrs (list of (name, value) tuples) 45 | - res_body_pause 46 | It returns: 47 | - res_body (callable) 48 | - res_done (callable) 49 | 50 | Call res_body to send part of the response body to the client. Provide the 51 | following parameter: 52 | - chunk (string) 53 | 54 | Call res_done when the response is finished, and provide the 55 | following argument if appropriate: 56 | - err (error dictionary, or None for no error) 57 | 58 | See the error module for the complete list of valid error dictionaries. 59 | 60 | Where possible, errors in the request will be responded to with the 61 | appropriate 4xx HTTP status code. However, if a response has already been 62 | started, the connection will be dropped (for example, when the request 63 | chunking or indicated length are incorrect). 64 | """ 65 | 66 | __author__ = "Mark Nottingham " 67 | __copyright__ = """\ 68 | Copyright (c) 2008-2010 Mark Nottingham 69 | 70 | Permission is hereby granted, free of charge, to any person obtaining a copy 71 | of this software and associated documentation files (the "Software"), to deal 72 | in the Software without restriction, including without limitation the rights 73 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 74 | copies of the Software, and to permit persons to whom the Software is 75 | furnished to do so, subject to the following conditions: 76 | 77 | The above copyright notice and this permission notice shall be included in 78 | all copies or substantial portions of the Software. 79 | 80 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 81 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 82 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 83 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 84 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 85 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 86 | THE SOFTWARE. 87 | """ 88 | 89 | import os 90 | import sys 91 | import logging 92 | 93 | import push_tcp 94 | from http_common import HttpMessageHandler, \ 95 | CLOSE, COUNTED, CHUNKED, \ 96 | WAITING, \ 97 | hop_by_hop_hdrs, \ 98 | dummy, get_hdr 99 | 100 | from error import ERR_HTTP_VERSION, ERR_HOST_REQ, \ 101 | ERR_WHITESPACE_HDR, ERR_TRANSFER_CODE 102 | 103 | logging.basicConfig() 104 | log = logging.getLogger('server') 105 | log.setLevel(logging.WARNING) 106 | 107 | # FIXME: assure that the connection isn't closed before reading the entire 108 | # req body 109 | # TODO: filter out 100 responses to HTTP/1.0 clients that didn't ask for it. 110 | 111 | class Server: 112 | "An asynchronous HTTP server." 113 | def __init__(self, host, port, request_handler): 114 | self.request_handler = request_handler 115 | push_tcp.create_server(host, port, self.handle_connection) 116 | 117 | def handle_connection(self, tcp_conn): 118 | "Process a new push_tcp connection, tcp_conn." 119 | conn = HttpServerConnection(self.request_handler, tcp_conn) 120 | return conn._handle_input, conn._conn_closed, conn._res_body_pause 121 | 122 | 123 | class HttpServerConnection(HttpMessageHandler): 124 | "A handler for an HTTP server connection." 125 | def __init__(self, request_handler, tcp_conn): 126 | HttpMessageHandler.__init__(self) 127 | self.request_handler = request_handler 128 | self._tcp_conn = tcp_conn 129 | self.req_body_cb = None 130 | self.req_done_cb = None 131 | self.method = None 132 | self.req_version = None 133 | self.connection_hdr = [] 134 | self._res_body_pause_cb = None 135 | 136 | def res_start(self, status_code, status_phrase, res_hdrs, res_body_pause): 137 | "Start a response. Must only be called once per response." 138 | self._res_body_pause_cb = res_body_pause 139 | res_hdrs = [i for i in res_hdrs \ 140 | if not i[0].lower() in hop_by_hop_hdrs ] 141 | 142 | try: 143 | body_len = int(get_hdr(res_hdrs, "content-length").pop(0)) 144 | except (IndexError, ValueError): 145 | body_len = None 146 | if body_len is not None: 147 | delimit = COUNTED 148 | res_hdrs.append(("Connection", "keep-alive")) 149 | elif 2.0 > self.req_version >= 1.1: 150 | delimit = CHUNKED 151 | res_hdrs.append(("Transfer-Encoding", "chunked")) 152 | else: 153 | delimit = CLOSE 154 | res_hdrs.append(("Connection", "close")) 155 | 156 | self._output_start("HTTP/1.1 %s %s" % (status_code, status_phrase), 157 | res_hdrs, delimit 158 | ) 159 | return self.res_body, self.res_done 160 | 161 | def res_body(self, chunk): 162 | "Send part of the response body. May be called zero to many times." 163 | self._output_body(chunk) 164 | 165 | def res_done(self, err=None): 166 | """ 167 | Signal the end of the response, whether or not there was a body. MUST 168 | be called exactly once for each response. 169 | 170 | If err is not None, it is an error dictionary (see the error module) 171 | indicating that an HTTP-specific (i.e., non-application) error occured 172 | in the generation of the response; this is useful for debugging. 173 | """ 174 | self._output_end(err) 175 | 176 | def req_body_pause(self, paused): 177 | """ 178 | Indicate that the server should pause (True) or unpause (False) the 179 | request. 180 | """ 181 | if self._tcp_conn and self._tcp_conn.tcp_connected: 182 | self._tcp_conn.pause(paused) 183 | 184 | # Methods called by push_tcp 185 | 186 | def _res_body_pause(self, paused): 187 | "Pause/unpause sending the response body." 188 | if self._res_body_pause_cb: 189 | self._res_body_pause_cb(paused) 190 | 191 | def _conn_closed(self): 192 | "The server connection has closed." 193 | if self._output_state != WAITING: 194 | pass # FIXME: any cleanup necessary? 195 | # self.pause() 196 | # self._queue = [] 197 | # self.tcp_conn.handler = None 198 | # self.tcp_conn = None 199 | 200 | # Methods called by common.HttpRequestHandler 201 | 202 | def _output(self, chunk): 203 | self._tcp_conn.write(chunk) 204 | 205 | def _input_start(self, top_line, hdr_tuples, conn_tokens, 206 | transfer_codes, content_length): 207 | """ 208 | Take the top set of headers from the input stream, parse them 209 | and queue the request to be processed by the application. 210 | """ 211 | assert self._input_state == WAITING, "pipelining not supported" 212 | # FIXME: pipelining 213 | try: 214 | method, _req_line = top_line.split(None, 1) 215 | uri, req_version = _req_line.rsplit(None, 1) 216 | self.req_version = float(req_version.rsplit('/', 1)[1]) 217 | except (ValueError, IndexError): 218 | self._handle_error(ERR_HTTP_VERSION, top_line) 219 | # FIXME: more fine-grained 220 | raise ValueError 221 | if self.req_version == 1.1 \ 222 | and 'host' not in [t[0].lower() for t in hdr_tuples]: 223 | self._handle_error(ERR_HOST_REQ) 224 | raise ValueError 225 | if hdr_tuples[:1][:1][:1] in [" ", "\t"]: 226 | self._handle_error(ERR_WHITESPACE_HDR) 227 | for code in transfer_codes: 228 | # we only support 'identity' and chunked' codes 229 | if code not in ['identity', 'chunked']: 230 | # FIXME: SHOULD also close connection 231 | self._handle_error(ERR_TRANSFER_CODE) 232 | raise ValueError 233 | # FIXME: MUST 400 request messages with whitespace between 234 | # name and colon 235 | self.method = method 236 | self.connection_hdr = conn_tokens 237 | 238 | log.info("%s server req_start %s %s %s" % ( 239 | id(self), method, uri, self.req_version) 240 | ) 241 | self.req_body_cb, self.req_done_cb = self.request_handler( 242 | method, uri, hdr_tuples, self.res_start, self.req_body_pause) 243 | allows_body = (content_length) or (transfer_codes != []) 244 | return allows_body 245 | 246 | def _input_body(self, chunk): 247 | "Process a request body chunk from the wire." 248 | self.req_body_cb(chunk) 249 | 250 | def _input_end(self): 251 | "Indicate that the request body is complete." 252 | self.req_done_cb(None) 253 | 254 | def _input_error(self, err, detail=None): 255 | "Indicate a parsing problem with the request body." 256 | err['detail'] = detail 257 | if self._tcp_conn: 258 | self._tcp_conn.close() 259 | self._tcp_conn = None 260 | self.req_done_cb(err) 261 | 262 | def _handle_error(self, err, detail=None): 263 | """ 264 | Handle a problem with the request by generating an appropriate 265 | response. 266 | """ 267 | # self._queue.append(ErrorHandler(status_code, status_phrase, body, self)) 268 | assert self._output_state == WAITING 269 | if detail: 270 | err['detail'] = detail 271 | status_code, status_phrase = err.get('status', ('400', 'Bad Request')) 272 | hdrs = [ 273 | ('Content-Type', 'text/plain'), 274 | ] 275 | body = err['desc'] 276 | if err.has_key('detail'): 277 | body += " (%s)" % err['detail'] 278 | self.res_start(status_code, status_phrase, hdrs, dummy) 279 | self.res_body(body) 280 | self.res_done() 281 | 282 | 283 | def test_handler(method, uri, hdrs, res_start, req_pause): 284 | """ 285 | An extremely simple (and limited) server request_handler. 286 | """ 287 | code = "200" 288 | phrase = "OK" 289 | res_hdrs = [('Content-Type', 'text/plain')] 290 | res_body, res_done = res_start(code, phrase, res_hdrs, dummy) 291 | res_body('foo!') 292 | res_done(None) 293 | return dummy, dummy 294 | 295 | if __name__ == "__main__": 296 | sys.stderr.write("PID: %s\n" % os.getpid()) 297 | h, p = '127.0.0.1', int(sys.argv[1]) 298 | server = Server(h, p, test_handler) 299 | push_tcp.run() 300 | -------------------------------------------------------------------------------- /src/http_common.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | shared HTTP infrastructure 5 | 6 | This module contains utility functions for nbhttp and a base class 7 | for the parsing portions of the client and server. 8 | """ 9 | 10 | __author__ = "Mark Nottingham " 11 | __copyright__ = """\ 12 | Copyright (c) 2008-2010 Mark Nottingham 13 | 14 | Permission is hereby granted, free of charge, to any person obtaining a copy 15 | of this software and associated documentation files (the "Software"), to deal 16 | in the Software without restriction, including without limitation the rights 17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 18 | copies of the Software, and to permit persons to whom the Software is 19 | furnished to do so, subject to the following conditions: 20 | 21 | The above copyright notice and this permission notice shall be included in 22 | all copies or substantial portions of the Software. 23 | 24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 30 | THE SOFTWARE. 31 | """ 32 | 33 | import re 34 | lws = re.compile("\r?\n[ \t]+", re.M) 35 | hdr_end = re.compile(r"\r?\n\r?\n", re.M) 36 | linesep = "\r\n" 37 | 38 | # conn_modes 39 | CLOSE, COUNTED, CHUNKED, NOBODY = 'close', 'counted', 'chunked', 'nobody' 40 | 41 | # states 42 | WAITING, HEADERS_DONE = 1, 2 43 | 44 | idempotent_methods = ['GET', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE'] 45 | safe_methods = ['GET', 'HEAD', 'OPTIONS', 'TRACE'] 46 | no_body_status = ['100', '101', '204', '304'] 47 | hop_by_hop_hdrs = ['connection', 'keep-alive', 'proxy-authenticate', 48 | 'proxy-authorization', 'te', 'trailers', 49 | 'transfer-encoding', 'upgrade', 'proxy-connection'] 50 | 51 | 52 | from error import ERR_EXTRA_DATA, ERR_CHUNK, ERR_BODY_FORBIDDEN, \ 53 | ERR_TOO_MANY_MSGS 54 | 55 | def dummy(*args, **kw): 56 | "Dummy method that does nothing; useful to ignore a callback." 57 | pass 58 | 59 | def header_dict(header_tuple, strip=None): 60 | """ 61 | Given a header tuple, return a dictionary keyed upon the lower-cased 62 | header names. 63 | 64 | If strip is defined, each header listed (by lower-cased name) will not be 65 | returned in the dictionary. 66 | """ 67 | # TODO: return a list of values; currently destructive. 68 | if strip == None: 69 | strip = [] 70 | return dict([(n.strip().lower(), v.strip()) for (n, v) in header_tuple]) 71 | 72 | def get_hdr(hdr_tuples, name): 73 | """ 74 | Given a list of (name, value) header tuples and a header name (lowercase), 75 | return a list of all values for that header. 76 | 77 | This includes header lines with multiple values separated by a comma; 78 | such headers will be split into separate values. As a result, it is NOT 79 | safe to use this on headers whose values may include a comma (e.g., 80 | Set-Cookie, or any value with a quoted string). 81 | """ 82 | # TODO: support quoted strings 83 | return [v.strip() for v in sum( 84 | [l.split(',') for l in 85 | [i[1] for i in hdr_tuples if i[0].lower() == name] 86 | ] 87 | , [])] 88 | 89 | 90 | class HttpMessageHandler: 91 | """ 92 | This is a base class for something that has to parse and/or serialise 93 | HTTP messages, request or response. 94 | 95 | For parsing, it expects you to override _input_start, _input_body and 96 | _input_end, and call _handle_input when you get bytes from the network. 97 | 98 | For serialising, it expects you to override _output. 99 | """ 100 | 101 | def __init__(self): 102 | self.input_header_length = 0 103 | self.input_transfer_length = 0 104 | self._input_buffer = "" 105 | self._input_state = WAITING 106 | self._input_delimit = None 107 | self._input_body_left = 0 108 | self._output_state = WAITING 109 | self._output_delimit = None 110 | 111 | # input-related methods 112 | 113 | def _input_start(self, top_line, hdr_tuples, conn_tokens, 114 | transfer_codes, content_length): 115 | """ 116 | Take the top set of headers from the input stream, parse them 117 | and queue the request to be processed by the application. 118 | 119 | Returns boolean allows_body to indicate whether the message allows a 120 | body. 121 | """ 122 | raise NotImplementedError 123 | 124 | def _input_body(self, chunk): 125 | "Process a body chunk from the wire." 126 | raise NotImplementedError 127 | 128 | def _input_end(self): 129 | "Indicate that the response body is complete." 130 | raise NotImplementedError 131 | 132 | def _input_error(self, err, detail=None): 133 | "Indicate a parsing problem with the body." 134 | raise NotImplementedError 135 | 136 | def _handle_input(self, instr): 137 | """ 138 | Given a chunk of input, figure out what state we're in and handle it, 139 | making the appropriate calls. 140 | """ 141 | if self._input_buffer != "": 142 | # will need to move to a list if writev comes around 143 | instr = self._input_buffer + instr 144 | self._input_buffer = "" 145 | if self._input_state == WAITING: 146 | if hdr_end.search(instr): # found one 147 | rest = self._parse_headers(instr) 148 | try: 149 | self._handle_input(rest) 150 | except RuntimeError: 151 | self._input_error(ERR_TOO_MANY_MSGS) 152 | else: # partial headers; store it and wait for more 153 | self._input_buffer = instr 154 | elif self._input_state == HEADERS_DONE: 155 | try: 156 | input_parse = getattr(self, '_handle_%s' % 157 | self._input_delimit) 158 | except AttributeError: 159 | raise Exception, "Unknown input delimiter %s" % \ 160 | self._input_delimit 161 | input_parse(instr) 162 | else: 163 | raise Exception, "Unknown state %s" % self._input_state 164 | 165 | def _handle_nobody(self, instr): 166 | "Handle input that shouldn't have a body." 167 | if instr: 168 | # FIXME: will not work with pipelining 169 | self._input_error(ERR_BODY_FORBIDDEN, instr) 170 | else: 171 | self._input_end() 172 | self._input_state = WAITING 173 | # self._handle_input(instr) 174 | 175 | def _handle_close(self, instr): 176 | "Handle input where the body is delimited by the connection closing." 177 | self.input_transfer_length += len(instr) 178 | self._input_body(instr) 179 | 180 | def _handle_chunked(self, instr): 181 | "Handle input where the body is delimited by chunked encoding." 182 | while instr: 183 | if self._input_body_left < 0: # new chunk 184 | instr = self._handle_chunk_new(instr) 185 | elif self._input_body_left > 0: 186 | # we're in the middle of reading a chunk 187 | instr = self._handle_chunk_body(instr) 188 | elif self._input_body_left == 0: # body is done 189 | instr = self._handle_chunk_done(instr) 190 | 191 | def _handle_chunk_new(self, instr): 192 | try: 193 | # they really need to use CRLF 194 | chunk_size, rest = instr.split(linesep, 1) 195 | except ValueError: 196 | # got a CRLF without anything behind it.. wait a bit 197 | if len(instr) > 256: 198 | # OK, this is absurd... 199 | self._input_error(ERR_CHUNK, instr) 200 | else: 201 | self._input_buffer += instr 202 | return 203 | if chunk_size.strip() == "": # ignore bare lines 204 | self._handle_chunked(rest) # FIXME: recursion 205 | return 206 | if ";" in chunk_size: # ignore chunk extensions 207 | chunk_size = chunk_size.split(";", 1)[0] 208 | try: 209 | self._input_body_left = int(chunk_size, 16) 210 | except ValueError: 211 | self._input_error(ERR_CHUNK, chunk_size) 212 | return # blow up if we can't process a chunk. 213 | self.input_transfer_length += len(instr) - len(rest) 214 | return rest 215 | 216 | def _handle_chunk_body(self, instr): 217 | if self._input_body_left < len(instr): # got more than the chunk 218 | this_chunk = self._input_body_left 219 | self._input_body(instr[:this_chunk]) 220 | self.input_transfer_length += this_chunk 221 | self._input_body_left = -1 222 | return instr[this_chunk+2:] # +2 consumes the CRLF 223 | elif self._input_body_left == len(instr): 224 | # got the whole chunk exactly 225 | self._input_body(instr) 226 | self.input_transfer_length += self._input_body_left 227 | self._input_body_left = -1 228 | else: 229 | # got partial chunk 230 | self._input_body(instr) 231 | self.input_transfer_length += len(instr) 232 | self._input_body_left -= len(instr) 233 | 234 | def _handle_chunk_done(self, instr): 235 | if len(instr) >= 2 and instr[:2] == linesep: 236 | self._input_state = WAITING 237 | self._input_end() 238 | # self._handle_input(instr[2:]) # pipelining 239 | elif hdr_end.search(instr): # trailers 240 | self._input_state = WAITING 241 | self._input_end() 242 | trailers, rest = hdr_end.split(instr, 1) # TODO: process trailers 243 | # self._handle_input(rest) # pipelining 244 | else: # don't have full headers yet 245 | self._input_buffer = instr 246 | 247 | def _handle_counted(self, instr): 248 | "Handle input where the body is delimited by the Content-Length." 249 | assert self._input_body_left >= 0, \ 250 | "message counting problem (%s)" % self._input_body_left 251 | # process body 252 | if self._input_body_left <= len(instr): # got it all (and more?) 253 | self.input_transfer_length += self._input_body_left 254 | self._input_body(instr[:self._input_body_left]) 255 | self._input_state = WAITING 256 | if instr[self._input_body_left:]: 257 | # This will catch extra input that isn't on packet boundaries. 258 | self._input_error(ERR_EXTRA_DATA, 259 | instr[self._input_body_left:]) 260 | else: 261 | self._input_end() 262 | else: # got some of it 263 | self._input_body(instr) 264 | self.input_transfer_length += len(instr) 265 | self._input_body_left -= len(instr) 266 | 267 | def _parse_headers(self, instr): 268 | """ 269 | Given a string that we knows contains a header block (possibly more), 270 | parse the headers out and return the rest. Calls self._input_start 271 | to kick off processing. 272 | """ 273 | top, rest = hdr_end.split(instr, 1) 274 | self.input_header_length = len(top) 275 | hdr_lines = lws.sub(" ", top).splitlines() # Fold LWS 276 | try: 277 | top_line = hdr_lines.pop(0) 278 | except IndexError: # empty 279 | return "" 280 | hdr_tuples = [] 281 | conn_tokens = [] 282 | transfer_codes = [] 283 | content_length = None 284 | for line in hdr_lines: 285 | try: 286 | fn, fv = line.split(":", 1) 287 | hdr_tuples.append((fn, fv)) 288 | except ValueError: 289 | continue # TODO: flesh out bad header handling 290 | f_name = fn.strip().lower() 291 | f_val = fv.strip() 292 | 293 | # parse connection-related headers 294 | if f_name == "connection": 295 | conn_tokens += [v.strip().lower() for v in f_val.split(',')] 296 | elif f_name == "transfer-encoding": # FIXME: parameters 297 | transfer_codes += [v.strip().lower() for \ 298 | v in f_val.split(',')] 299 | elif f_name == "content-length": 300 | if content_length != None: 301 | continue # ignore any C-L past the first. 302 | try: 303 | content_length = int(f_val) 304 | except ValueError: 305 | continue 306 | 307 | # FIXME: WSP between name and colon; request = 400, response = discard 308 | # TODO: remove *and* ignore conn tokens if the message was 1.0 309 | 310 | # ignore content-length if transfer-encoding is present 311 | if transfer_codes != [] and content_length != None: 312 | content_length = None 313 | 314 | try: 315 | allows_body = self._input_start(top_line, hdr_tuples, 316 | conn_tokens, transfer_codes, content_length) 317 | except ValueError: # parsing error of some kind; abort. 318 | return "" 319 | 320 | self._input_state = HEADERS_DONE 321 | if not allows_body: 322 | self._input_delimit = NOBODY 323 | elif len(transfer_codes) > 0: 324 | if 'chunked' in transfer_codes: 325 | self._input_delimit = CHUNKED 326 | self._input_body_left = -1 # flag that we don't know 327 | else: 328 | self._input_delimit = CLOSE 329 | elif content_length != None: 330 | self._input_delimit = COUNTED 331 | self._input_body_left = content_length 332 | else: 333 | self._input_delimit = CLOSE 334 | return rest 335 | 336 | ### output-related methods 337 | 338 | def _output(self, out): 339 | raise NotImplementedError 340 | 341 | def _handle_error(self, err): 342 | raise NotImplementedError 343 | 344 | def _output_start(self, top_line, hdr_tuples, delimit): 345 | """ 346 | Start ouputting a HTTP message. 347 | """ 348 | self._output_delimit = delimit 349 | # TODO: strip whitespace? 350 | out = linesep.join( 351 | [top_line] + 352 | ["%s: %s" % (k, v) for k, v in hdr_tuples] + 353 | ["", ""] 354 | ) 355 | self._output(out) 356 | self._output_state = HEADERS_DONE 357 | 358 | def _output_body(self, chunk): 359 | """ 360 | Output a part of a HTTP message. 361 | """ 362 | if not chunk: 363 | return 364 | if self._output_delimit == CHUNKED: 365 | chunk = "%s\r\n%s\r\n" % (hex(len(chunk))[2:], chunk) 366 | self._output(chunk) 367 | #FIXME: body counting 368 | # self._output_body_sent += len(chunk) 369 | # assert self._output_body_sent <= self._output_content_length, \ 370 | # "Too many body bytes sent" 371 | 372 | def _output_end(self, err): 373 | """ 374 | Finish outputting a HTTP message. 375 | """ 376 | if err: 377 | self.output_body_cb, self.output_done_cb = dummy, dummy 378 | self._tcp_conn.close() 379 | self._tcp_conn = None 380 | elif self._output_delimit == NOBODY: 381 | pass # didn't have a body at all. 382 | elif self._output_delimit == CHUNKED: 383 | self._output("0\r\n\r\n") 384 | elif self._output_delimit == COUNTED: 385 | pass # TODO: double-check the length 386 | elif self._output_delimit == CLOSE: 387 | self._tcp_conn.close() # FIXME: abstract out? 388 | else: 389 | raise AssertionError, "Unknown request delimiter %s" % \ 390 | self._output_delimit 391 | self._output_state = WAITING 392 | -------------------------------------------------------------------------------- /src/client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Non-Blocking HTTP Client 5 | 6 | This library allow implementation of an HTTP/1.1 client that is 7 | "non-blocking," "asynchronous" and "event-driven" -- i.e., it achieves very 8 | high performance and concurrency, so long as the application code does not 9 | block (e.g., upon network, disk or database access). Blocking on one response 10 | will block the entire client. 11 | 12 | Instantiate a Client with the following parameter: 13 | - res_start (callable) 14 | 15 | Call req_start on the Client instance to begin a request. It takes the 16 | following arguments: 17 | - method (string) 18 | - uri (string) 19 | - req_hdrs (list of (name, value) tuples) 20 | - req_body_pause (callable) 21 | and returns: 22 | - req_body (callable) 23 | - req_done (callable) 24 | 25 | Call req_body to send part of the request body. It takes the following 26 | argument: 27 | - chunk (string) 28 | 29 | Call req_done when the request is complete, whether or not it contains a 30 | body. It takes the following argument: 31 | - err (error dictionary, or None for no error) 32 | 33 | req_body_pause is called when the client needs you to temporarily stop sending 34 | the request body, or restart. It must take the following argument: 35 | - paused (boolean; True means pause, False means unpause) 36 | 37 | res_start is called to start the response, and must take the following 38 | arguments: 39 | - status_code (string) 40 | - status_phrase (string) 41 | - res_hdrs (list of (name, value) tuples) 42 | - res_body_pause 43 | It must return: 44 | - res_body (callable) 45 | - res_done (callable) 46 | 47 | res_body is called when part of the response body is available. It must accept 48 | the following parameter: 49 | - chunk (string) 50 | 51 | res_done is called when the response is finished, and must accept the 52 | following argument: 53 | - err (error dictionary, or None if no error) 54 | 55 | See the error module for the complete list of valid error dictionaries. 56 | 57 | Where possible, errors in the response will be indicated with the appropriate 58 | 5xx HTTP status code (i.e., by calling res_start, res_body and res_done with 59 | an error dictionary). However, if a response has already been started, the 60 | connection will be dropped (for example, when the response chunking or 61 | indicated length are incorrect). In these cases, res_done will still be called 62 | with the appropriate error dictionary. 63 | """ 64 | 65 | __author__ = "Mark Nottingham " 66 | __copyright__ = """\ 67 | Copyright (c) 2008-2010 Mark Nottingham 68 | 69 | Permission is hereby granted, free of charge, to any person obtaining a copy 70 | of this software and associated documentation files (the "Software"), to deal 71 | in the Software without restriction, including without limitation the rights 72 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 73 | copies of the Software, and to permit persons to whom the Software is 74 | furnished to do so, subject to the following conditions: 75 | 76 | The above copyright notice and this permission notice shall be included in 77 | all copies or substantial portions of the Software. 78 | 79 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 80 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 81 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 82 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 83 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 84 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 85 | THE SOFTWARE. 86 | """ 87 | 88 | import errno 89 | import os 90 | from urlparse import urlsplit, urlunsplit 91 | 92 | import push_tcp 93 | from http_common import HttpMessageHandler, \ 94 | CLOSE, COUNTED, NOBODY, \ 95 | WAITING, \ 96 | idempotent_methods, no_body_status, hop_by_hop_hdrs, \ 97 | dummy, get_hdr 98 | from error import ERR_URL, ERR_CONNECT, \ 99 | ERR_READ_TIMEOUT, ERR_HTTP_VERSION 100 | 101 | req_remove_hdrs = hop_by_hop_hdrs + ['host'] 102 | 103 | # TODO: proxy support 104 | # TODO: next-hop version cache for Expect/Continue, etc. 105 | 106 | class Client(HttpMessageHandler): 107 | "An asynchronous HTTP client." 108 | connect_timeout = None 109 | read_timeout = None 110 | retry_limit = 2 111 | 112 | def __init__(self, res_start_cb): 113 | HttpMessageHandler.__init__(self) 114 | self.res_start_cb = res_start_cb 115 | self.res_body_cb = None 116 | self.res_done_cb = None 117 | self.method = None 118 | self.uri = None 119 | self.req_hdrs = [] 120 | self._host = None 121 | self._port = None 122 | self._tcp_conn = None 123 | self._conn_reusable = False 124 | self._req_body_pause_cb = None 125 | self._retries = 0 126 | self._read_timeout_ev = None 127 | self._output_buffer = [] 128 | 129 | def __getstate__(self): 130 | props = ['method', 'uri', 'req_hdrs', 131 | 'input_header_length', 'input_transfer_length'] 132 | return dict([(k, v) for (k, v) in self.__dict__.items() 133 | if k in props]) 134 | 135 | def req_start(self, method, uri, req_hdrs, req_body_pause): 136 | """ 137 | Start a request to uri using method, where 138 | req_hdrs is a list of (field_name, field_value) for 139 | the request headers. 140 | 141 | Returns a (req_body, req_done) tuple. 142 | """ 143 | self._req_body_pause_cb = req_body_pause 144 | req_hdrs = [i for i in req_hdrs \ 145 | if not i[0].lower() in req_remove_hdrs] 146 | (scheme, authority, path, query, fragment) = urlsplit(uri) 147 | if scheme.lower() != 'http': 148 | self._handle_error(ERR_URL, "Only HTTP URLs are supported") 149 | return dummy, dummy 150 | if "@" in authority: 151 | userinfo, authority = authority.split("@", 1) 152 | if ":" in authority: 153 | self._host, port = authority.rsplit(":", 1) 154 | try: 155 | self._port = int(port) 156 | except ValueError: 157 | self._handle_error(ERR_URL, "Non-integer port in URL") 158 | return dummy, dummy 159 | else: 160 | self._host, self._port = authority, 80 161 | if path == "": 162 | path = "/" 163 | uri = urlunsplit(('', '', path, query, '')) 164 | self.method, self.uri, self.req_hdrs = method, uri, req_hdrs 165 | self.req_hdrs.append(("Host", authority)) 166 | self.req_hdrs.append(("Connection", "keep-alive")) 167 | try: 168 | body_len = int(get_hdr(req_hdrs, "content-length").pop(0)) 169 | delimit = COUNTED 170 | except (IndexError, ValueError): 171 | body_len = None 172 | delimit = NOBODY 173 | self._output_start("%s %s HTTP/1.1" % (self.method, self.uri), 174 | self.req_hdrs, delimit 175 | ) 176 | _idle_pool.attach(self._host, self._port, self._handle_connect, 177 | self._handle_connect_error, self.connect_timeout 178 | ) 179 | return self.req_body, self.req_done 180 | # TODO: if we sent Expect: 100-continue, don't wait forever 181 | # (i.e., schedule something) 182 | 183 | def req_body(self, chunk): 184 | "Send part of the request body. May be called zero to many times." 185 | # FIXME: self._handle_error(ERR_LEN_REQ) 186 | self._output_body(chunk) 187 | 188 | def req_done(self, err=None): 189 | """ 190 | Signal the end of the request, whether or not there was a body. MUST 191 | be called exactly once for each request. 192 | 193 | If err is not None, it is an error dictionary (see the error module) 194 | indicating that an HTTP-specific (i.e., non-application) error 195 | occurred while satisfying the request; this is useful for debugging. 196 | """ 197 | self._output_end(err) 198 | 199 | def res_body_pause(self, paused): 200 | "Temporarily stop / restart sending the response body." 201 | if self._tcp_conn and self._tcp_conn.tcp_connected: 202 | self._tcp_conn.pause(paused) 203 | 204 | # Methods called by push_tcp 205 | 206 | def _handle_connect(self, tcp_conn): 207 | "The connection has succeeded." 208 | self._tcp_conn = tcp_conn 209 | self._output("") # kick the output buffer 210 | if self.read_timeout: 211 | self._read_timeout_ev = push_tcp.schedule( 212 | self.read_timeout, self._handle_error, 213 | ERR_READ_TIMEOUT, 'connect' 214 | ) 215 | return self._handle_input, self._conn_closed, self._req_body_pause 216 | 217 | def _handle_connect_error(self, err): 218 | "The connection has failed." 219 | if err[0] == errno.EINVAL: # weirdness. 220 | err = (errno.ECONNREFUSED, os.strerror(errno.ECONNREFUSED)) 221 | self._handle_error(ERR_CONNECT, err[1]) 222 | 223 | def _conn_closed(self): 224 | "The server closed the connection." 225 | if self.read_timeout: 226 | self._read_timeout_ev.delete() 227 | if self._input_buffer: 228 | self._handle_input("") 229 | if self._input_delimit == CLOSE: 230 | self._input_end() 231 | elif self._input_state == WAITING: 232 | if self.method in idempotent_methods: 233 | if self._retries < self.retry_limit: 234 | self._retry() 235 | else: 236 | self._handle_error(ERR_CONNECT, 237 | "Tried to connect %s times." % (self._retries + 1) 238 | ) 239 | else: 240 | self._handle_error(ERR_CONNECT, 241 | "Can't retry %s method" % self.method 242 | ) 243 | else: 244 | self._input_error(ERR_CONNECT, 245 | "Server dropped connection before the response was received." 246 | ) 247 | 248 | def _retry(self): 249 | "Retry the request." 250 | if self._read_timeout_ev: 251 | self._read_timeout_ev.delete() 252 | self._retries += 1 253 | _idle_pool.attach(self._host, self._port, self._handle_connect, 254 | self._handle_connect_error, self.connect_timeout 255 | ) 256 | 257 | def _req_body_pause(self, paused): 258 | "The client needs the application to pause/unpause the request body." 259 | if self._req_body_pause_cb: 260 | self._req_body_pause_cb(paused) 261 | 262 | # Methods called by common.HttpMessageHandler 263 | 264 | def _input_start(self, top_line, hdr_tuples, conn_tokens, 265 | transfer_codes, content_length): 266 | """ 267 | Take the top set of headers from the input stream, parse them 268 | and queue the request to be processed by the application. 269 | """ 270 | if self.read_timeout: 271 | self._read_timeout_ev.delete() 272 | try: 273 | res_version, status_txt = top_line.split(None, 1) 274 | res_version = float(res_version.rsplit('/', 1)[1]) 275 | # TODO: check that the protocol is HTTP 276 | except (ValueError, IndexError): 277 | self._handle_error(ERR_HTTP_VERSION, top_line) 278 | raise ValueError 279 | try: 280 | res_code, res_phrase = status_txt.split(None, 1) 281 | except ValueError: 282 | res_code = status_txt.rstrip() 283 | res_phrase = "" 284 | if 'close' not in conn_tokens: 285 | if (res_version == 1.0 and 'keep-alive' in conn_tokens) or \ 286 | res_version > 1.0: 287 | self._conn_reusable = True 288 | if self.read_timeout: 289 | self._read_timeout_ev = push_tcp.schedule( 290 | self.read_timeout, self._input_error, 291 | ERR_READ_TIMEOUT, 'start' 292 | ) 293 | self.res_body_cb, self.res_done_cb = self.res_start_cb( 294 | res_version, res_code, res_phrase, 295 | hdr_tuples, self.res_body_pause 296 | ) 297 | allows_body = (res_code not in no_body_status) \ 298 | or (self.method == "HEAD") 299 | return allows_body 300 | 301 | def _input_body(self, chunk): 302 | "Process a response body chunk from the wire." 303 | if self.read_timeout: 304 | self._read_timeout_ev.delete() 305 | self.res_body_cb(chunk) 306 | if self.read_timeout: 307 | self._read_timeout_ev = push_tcp.schedule(self.read_timeout, 308 | self._input_error, ERR_READ_TIMEOUT, 'body' 309 | ) 310 | 311 | def _input_end(self): 312 | "Indicate that the response body is complete." 313 | if self.read_timeout: 314 | self._read_timeout_ev.delete() 315 | if self._tcp_conn: 316 | if self._tcp_conn.tcp_connected and self._conn_reusable: 317 | # Note that we don't reset read_cb; if more bytes come in 318 | # before the next request, we'll still get them. 319 | _idle_pool.release(self._tcp_conn) 320 | else: 321 | self._tcp_conn.close() 322 | self._tcp_conn = None 323 | self.res_done_cb(None) 324 | 325 | def _input_error(self, err, detail=None): 326 | "Indicate a parsing problem with the response body." 327 | if self.read_timeout: 328 | self._read_timeout_ev.delete() 329 | if self._tcp_conn: 330 | self._tcp_conn.close() 331 | self._tcp_conn = None 332 | err['detail'] = detail 333 | self.res_done_cb(err) 334 | 335 | def _output(self, chunk): 336 | self._output_buffer.append(chunk) 337 | if self._tcp_conn and self._tcp_conn.tcp_connected: 338 | self._tcp_conn.write("".join(self._output_buffer)) 339 | self._output_buffer = [] 340 | 341 | # misc 342 | 343 | def _handle_error(self, err, detail=None): 344 | """ 345 | Handle a problem with the request by generating an appropriate 346 | response. 347 | """ 348 | assert self._input_state == WAITING 349 | if self._read_timeout_ev: 350 | self._read_timeout_ev.delete() 351 | if self._tcp_conn: 352 | self._tcp_conn.close() 353 | self._tcp_conn = None 354 | if detail: 355 | err['detail'] = detail 356 | status_code, status_phrase = err.get('status', 357 | ('504', 'Gateway Timeout') 358 | ) 359 | hdrs = [ 360 | ('Content-Type', 'text/plain'), 361 | ('Connection', 'close'), 362 | ] 363 | body = err['desc'] 364 | if err.has_key('detail'): 365 | body += " (%s)" % err['detail'] 366 | res_body_cb, res_done_cb = self.res_start_cb( 367 | "1.1", status_code, status_phrase, hdrs, dummy) 368 | res_body_cb(str(body)) 369 | push_tcp.schedule(0, res_done_cb, err) 370 | 371 | 372 | class _HttpConnectionPool: 373 | "A pool of idle TCP connections for use by the client." 374 | _conns = {} 375 | 376 | def attach(self, host, port, handle_connect, 377 | handle_connect_error, connect_timeout): 378 | "Find an idle connection for (host, port), or create a new one." 379 | while True: 380 | try: 381 | tcp_conn = self._conns[(host, port)].pop() 382 | except (IndexError, KeyError): 383 | push_tcp.create_client(host, port, 384 | handle_connect, handle_connect_error, connect_timeout 385 | ) 386 | break 387 | if tcp_conn.tcp_connected: 388 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \ 389 | handle_connect(tcp_conn) 390 | break 391 | 392 | def release(self, tcp_conn): 393 | "Add an idle connection back to the pool." 394 | if tcp_conn.tcp_connected: 395 | def idle_close(): 396 | "Remove the connection from the pool when it closes." 397 | try: 398 | self._conns[ 399 | (tcp_conn.host, tcp_conn.port) 400 | ].remove(tcp_conn) 401 | except ValueError: 402 | pass 403 | tcp_conn.close_cb = idle_close 404 | if not self._conns.has_key((tcp_conn.host, tcp_conn.port)): 405 | self._conns[(tcp_conn.host, tcp_conn.port)] = [tcp_conn] 406 | else: 407 | self._conns[(tcp_conn.host, tcp_conn.port)].append(tcp_conn) 408 | 409 | _idle_pool = _HttpConnectionPool() 410 | 411 | 412 | def test_client(request_uri, out, err): 413 | "A simple demonstration of a client." 414 | 415 | def printer(version, status, phrase, headers, res_pause): 416 | "Print the response headers." 417 | print "HTTP/%s" % version, status, phrase 418 | print "\n".join(["%s:%s" % header for header in headers]) 419 | print 420 | def body(chunk): 421 | out(chunk) 422 | def done(err_msg): 423 | if err_msg: 424 | err("\n*** ERROR: %s (%s)\n" % 425 | (err_msg['desc'], err_msg['detail']) 426 | ) 427 | push_tcp.stop() 428 | return body, done 429 | c = Client(printer) 430 | req_body_write, req_done = c.req_start("GET", request_uri, [], dummy) 431 | req_done(None) 432 | push_tcp.run() 433 | 434 | if __name__ == "__main__": 435 | import sys 436 | test_client(sys.argv[1], sys.stdout.write, sys.stderr.write) 437 | -------------------------------------------------------------------------------- /src/push_tcp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | push-based asynchronous TCP 5 | 6 | This is a generic library for building event-based / asynchronous 7 | TCP servers and clients. 8 | 9 | By default, it uses the asyncore library included with Python. 10 | However, if the pyevent library 11 | is available, it will 12 | use that, offering higher concurrency and, perhaps, performance. 13 | 14 | It uses a push model; i.e., the network connection pushes data to 15 | you (using a callback), and you push data to the network connection 16 | (using a direct method invocation). 17 | 18 | *** Building Clients 19 | 20 | To connect to a server, use create_client; 21 | > host = 'www.example.com' 22 | > port = '80' 23 | > push_tcp.create_client(host, port, conn_handler, error_handler) 24 | 25 | conn_handler will be called with the tcp_conn as the argument 26 | when the connection is made. See "Working with Connections" 27 | below for details. 28 | 29 | error_handler will be called if the connection can't be made for some reason. 30 | 31 | > def error_handler(host, port, reason): 32 | > print "can't connect to %s:%s: %s" % (host, port, reason) 33 | 34 | *** Building Servers 35 | 36 | To start listening, use create_server; 37 | 38 | > server = push_tcp.create_server(host, port, conn_handler) 39 | 40 | conn_handler is called every time a new client connects; see 41 | "Working with Connections" below for details. 42 | 43 | The server object itself keeps track of all of the open connections, and 44 | can be used to do things like idle connection management, etc. 45 | 46 | *** Working with Connections 47 | 48 | Every time a new connection is established -- whether as a client 49 | or as a server -- the conn_handler given is called with tcp_conn 50 | as its argument; 51 | 52 | > def conn_handler(tcp_conn): 53 | > print "connected to %s:%s" % tcp_conn.host, tcp_conn.port 54 | > return read_cb, close_cb, pause_cb 55 | 56 | It must return a (read_cb, close_cb, pause_cb) tuple. 57 | 58 | read_cb will be called every time incoming data is available from 59 | the connection; 60 | 61 | > def read_cb(data): 62 | > print "got some data:", data 63 | 64 | When you want to write to the connection, just write to it: 65 | 66 | > tcp_conn.write(data) 67 | 68 | If you want to close the connection from your side, just call close: 69 | 70 | > tcp_conn.close() 71 | 72 | Note that this will flush any data already written. 73 | 74 | If the other side closes the connection, close_cb will be called; 75 | 76 | > def close_cb(): 77 | > print "oops, they don't like us any more..." 78 | 79 | If you write too much data to the connection and the buffers fill up, 80 | pause_cb will be called with True to tell you to stop sending data 81 | temporarily; 82 | 83 | > def pause_cb(paused): 84 | > if paused: 85 | > # stop sending data 86 | > else: 87 | > # it's OK to start again 88 | 89 | Note that this is advisory; if you ignore it, the data will still be 90 | buffered, but the buffer will grow. 91 | 92 | Likewise, if you want to pause the connection because your buffers 93 | are full, call pause; 94 | 95 | > tcp_conn.pause(True) 96 | 97 | but don't forget to tell it when it's OK to send data again; 98 | 99 | > tcp_conn.pause(False) 100 | 101 | *** Timed Events 102 | 103 | It's often useful to schedule an event to be run some time in the future; 104 | 105 | > push_tcp.schedule(10, cb, "foo") 106 | 107 | This example will schedule the function 'cb' to be called with the argument 108 | "foo" ten seconds in the future. 109 | 110 | *** Running the loop 111 | 112 | In all cases (clients, servers, and timed events), you'll need to start 113 | the event loop before anything actually happens; 114 | 115 | > push_tcp.run() 116 | 117 | To stop it, just stop it; 118 | 119 | > push_tcp.stop() 120 | """ 121 | 122 | __author__ = "Mark Nottingham " 123 | __copyright__ = """\ 124 | Copyright (c) 2008-2010 Mark Nottingham 125 | 126 | Permission is hereby granted, free of charge, to any person obtaining a copy 127 | of this software and associated documentation files (the "Software"), to deal 128 | in the Software without restriction, including without limitation the rights 129 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 130 | copies of the Software, and to permit persons to whom the Software is 131 | furnished to do so, subject to the following conditions: 132 | 133 | The above copyright notice and this permission notice shall be included in 134 | all copies or substantial portions of the Software. 135 | 136 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 137 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 138 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 139 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 140 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 141 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 142 | THE SOFTWARE. 143 | """ 144 | 145 | import asyncore 146 | import bisect 147 | import errno 148 | import os 149 | import sys 150 | import socket 151 | import time 152 | 153 | try: 154 | import event # http://www.monkey.org/~dugsong/pyevent/ 155 | except ImportError: 156 | event = None 157 | 158 | class _TcpConnection(asyncore.dispatcher): 159 | "Base class for a TCP connection." 160 | write_bufsize = 16 161 | read_bufsize = 1024 * 16 162 | def __init__(self, sock, host, port): 163 | self.socket = sock 164 | self.host = host 165 | self.port = port 166 | self.read_cb = None 167 | self.close_cb = None 168 | self._close_cb_called = False 169 | self.pause_cb = None 170 | self.tcp_connected = True # we assume a connected socket 171 | self._paused = False # TODO: should be paused by default 172 | self._closing = False 173 | self._write_buffer = [] 174 | if event: 175 | self._revent = event.read(sock, self.handle_read) 176 | self._wevent = event.write(sock, self.handle_write) 177 | else: # asyncore 178 | asyncore.dispatcher.__init__(self, sock) 179 | 180 | def __repr__(self): 181 | status = [self.__class__.__module__+"."+self.__class__.__name__] 182 | if self.tcp_connected: 183 | status.append('connected') 184 | status.append('%s:%s' % (self.host, self.port)) 185 | if event: 186 | status.append('event-based') 187 | if self._paused: 188 | status.append('paused') 189 | if self._closing: 190 | status.append('closing') 191 | if self._close_cb_called: 192 | status.append('close cb called') 193 | if self._write_buffer: 194 | status.append('%s write buffered' % len(self._write_buffer)) 195 | return "<%s at %#x>" % (", ".join(status), id(self)) 196 | 197 | def handle_connect(self): # asyncore 198 | pass 199 | 200 | def handle_read(self): 201 | """ 202 | The connection has data read for reading; call read_cb 203 | if appropriate. 204 | """ 205 | try: 206 | data = self.socket.recv(self.read_bufsize) 207 | except socket.error, why: 208 | if why[0] in [errno.EBADF, errno.ECONNRESET, errno.ESHUTDOWN, 209 | errno.ECONNABORTED, errno.ECONNREFUSED, 210 | errno.ENOTCONN, errno.EPIPE]: 211 | self.conn_closed() 212 | return 213 | else: 214 | raise 215 | if data == "": 216 | self.conn_closed() 217 | else: 218 | self.read_cb(data) 219 | if event: 220 | if self.read_cb and self.tcp_connected and not self._paused: 221 | return self._revent 222 | 223 | def handle_write(self): 224 | "The connection is ready for writing; write any buffered data." 225 | if len(self._write_buffer) > 0: 226 | data = "".join(self._write_buffer) 227 | try: 228 | sent = self.socket.send(data) 229 | except socket.error, why: 230 | if why[0] == errno.EWOULDBLOCK: 231 | return 232 | elif why[0] in [errno.EBADF, errno.ECONNRESET, 233 | errno.ESHUTDOWN, errno.ECONNABORTED, 234 | errno.ECONNREFUSED, errno.ENOTCONN, 235 | errno.EPIPE]: 236 | self.conn_closed() 237 | return 238 | else: 239 | raise 240 | if sent < len(data): 241 | self._write_buffer = [data[sent:]] 242 | else: 243 | self._write_buffer = [] 244 | if self.pause_cb and len(self._write_buffer) < self.write_bufsize: 245 | self.pause_cb(False) 246 | if self._closing: 247 | self.close() 248 | if event: 249 | if self.tcp_connected \ 250 | and (len(self._write_buffer) > 0 or self._closing): 251 | return self._wevent 252 | 253 | def conn_closed(self): 254 | """ 255 | The connection has been closed by the other side. Do local cleanup 256 | and then call close_cb. 257 | """ 258 | self.tcp_connected = False 259 | if self._close_cb_called: 260 | return 261 | elif self.close_cb: 262 | self._close_cb_called = True 263 | self.close_cb() 264 | else: 265 | # uncomfortable race condition here, so we try again. 266 | # not great, but ok for now. 267 | schedule(1, self.conn_closed) 268 | handle_close = conn_closed # for asyncore 269 | 270 | def write(self, data): 271 | "Write data to the connection." 272 | # assert not self._paused 273 | self._write_buffer.append(data) 274 | if self.pause_cb and len(self._write_buffer) > self.write_bufsize: 275 | self.pause_cb(True) 276 | if event: 277 | if not self._wevent.pending(): 278 | self._wevent.add() 279 | 280 | def pause(self, paused): 281 | """ 282 | Temporarily stop/start reading from the connection and pushing 283 | it to the app. 284 | """ 285 | if event: 286 | if paused: 287 | if self._revent.pending(): 288 | self._revent.delete() 289 | else: 290 | if not self._revent.pending(): 291 | self._revent.add() 292 | self._paused = paused 293 | 294 | def close(self): 295 | "Flush buffered data (if any) and close the connection." 296 | self.pause(True) 297 | if len(self._write_buffer) > 0: 298 | self._closing = True 299 | else: 300 | self.tcp_connected = False 301 | if event: 302 | if self._revent.pending(): 303 | self._revent.delete() 304 | if self._wevent.pending(): 305 | self._wevent.delete() 306 | self.socket.close() 307 | else: 308 | asyncore.dispatcher.close(self) 309 | 310 | def readable(self): 311 | "asyncore-specific readable method" 312 | return self.read_cb and self.tcp_connected and not self._paused 313 | 314 | def writable(self): 315 | "asyncore-specific writable method" 316 | return self.tcp_connected and \ 317 | (len(self._write_buffer) > 0 or self._closing) 318 | 319 | def handle_error(self): 320 | """ 321 | asyncore-specific misc error method. 322 | """ 323 | raise 324 | 325 | 326 | def create_server(host, port, conn_handler): 327 | """Listen to host:port and send connections to conn_handler.""" 328 | sock = server_listen(host, port) 329 | attach_server(host, port, sock, conn_handler) 330 | 331 | def server_listen(host, port): 332 | "Return a socket listening to host:port." 333 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 334 | sock.setblocking(0) 335 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 336 | sock.bind((host, port)) 337 | sock.listen(socket.SOMAXCONN) 338 | return sock 339 | 340 | class attach_server(asyncore.dispatcher): 341 | "Attach a server to a listening socket." 342 | def __init__(self, host, port, sock, conn_handler): 343 | self.host = host 344 | self.port = port 345 | self.conn_handler = conn_handler 346 | if event: 347 | event.event(self.handle_accept, handle=sock, 348 | evtype=event.EV_READ|event.EV_PERSIST).add() 349 | else: # asyncore 350 | asyncore.dispatcher.__init__(self, sock=sock) 351 | self.accepting = True 352 | 353 | def handle_accept(self, *args): 354 | try: 355 | if event: 356 | conn, addr = args[1].accept() 357 | else: # asyncore 358 | conn, addr = self.accept() 359 | except TypeError: 360 | # sometimes accept() returns None if we have 361 | # multiple processes listening 362 | return 363 | tcp_conn = _TcpConnection(conn, self.host, self.port) 364 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \ 365 | self.conn_handler(tcp_conn) 366 | 367 | def handle_error(self): 368 | stop() # FIXME: handle unscheduled errors more gracefully 369 | raise 370 | 371 | class create_client(asyncore.dispatcher): 372 | "An asynchronous TCP client." 373 | def __init__(self, host, port, conn_handler, 374 | connect_error_handler, connect_timeout=None): 375 | self.host = host 376 | self.port = port 377 | self.conn_handler = conn_handler 378 | self.connect_error_handler = connect_error_handler 379 | self._timeout_ev = None 380 | self._error_sent = False 381 | # TODO: socket.getaddrinfo(); needs to be non-blocking. 382 | if event: 383 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 384 | sock.setblocking(0) 385 | event.write(sock, self.handle_connect, sock).add() 386 | try: 387 | # FIXME: check for DNS errors, etc. 388 | err = sock.connect_ex((host, port)) 389 | except socket.error, why: 390 | self.handle_conn_error() 391 | return 392 | except socket.gaierror, why: 393 | self.handle_conn_error() 394 | return 395 | if err != errno.EINPROGRESS: # FIXME: others? 396 | self.handle_conn_error((err, os.strerror(err))) 397 | return 398 | else: # asyncore 399 | asyncore.dispatcher.__init__(self) 400 | self.create_socket(socket.AF_INET, socket.SOCK_STREAM) 401 | try: 402 | self.connect((host, port)) 403 | # exceptions should be caught by handle_error 404 | except socket.error, why: 405 | self.handle_conn_error() 406 | return 407 | except socket.gaierror, why: 408 | self.handle_conn_error() 409 | return 410 | if connect_timeout: 411 | self._timeout_ev = schedule(connect_timeout, 412 | self.connect_error_handler, 413 | (errno.ETIMEDOUT, os.strerror(errno.ETIMEDOUT)) 414 | ) 415 | 416 | def handle_connect(self, sock=None): 417 | if self._timeout_ev: 418 | self._timeout_ev.delete() 419 | if self._error_sent: 420 | return 421 | if sock is None: # asyncore 422 | sock = self.socket 423 | tcp_conn = _TcpConnection(sock, self.host, self.port) 424 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \ 425 | self.conn_handler(tcp_conn) 426 | 427 | def handle_read(self): # asyncore 428 | pass 429 | 430 | def handle_write(self): # asyncore 431 | pass 432 | 433 | def handle_conn_error(self, ex_value=None): 434 | if ex_value is None: 435 | ex_type, ex_value = sys.exc_info()[:2] 436 | else: 437 | ex_type = socket.error 438 | if ex_type in [socket.error, socket.gaierror]: 439 | if ex_value[0] == errno.ECONNREFUSED: 440 | return # OS will retry 441 | if self._timeout_ev: 442 | self._timeout_ev.delete() 443 | if self._error_sent: 444 | return 445 | elif self.connect_error_handler: 446 | self._error_sent = True 447 | self.connect_error_handler(ex_value) 448 | else: 449 | if self._timeout_ev: 450 | self._timeout_ev.delete() 451 | raise 452 | 453 | def handle_error(self): 454 | stop() # FIXME: handle unscheduled errors more gracefully 455 | raise 456 | 457 | 458 | # adapted from Medusa 459 | class _AsyncoreLoop: 460 | "Asyncore main loop + event scheduling." 461 | def __init__(self): 462 | self.events = [] 463 | self.num_channels = 0 464 | self.max_channels = 0 465 | self.timeout = 1 466 | self.granularity = 1 467 | self.socket_map = asyncore.socket_map 468 | self._now = None 469 | self._running = False 470 | 471 | def run(self): 472 | "Start the loop." 473 | last_event_check = 0 474 | self._running = True 475 | while (self.socket_map or self.events) and self._running: 476 | self._now = time.time() 477 | if (self._now - last_event_check) >= self.granularity: 478 | last_event_check = self._now 479 | for event in self.events: 480 | when, what = event 481 | if self._now >= when: 482 | try: 483 | self.events.remove(event) 484 | except ValueError: 485 | # a previous event may have removed this one. 486 | continue 487 | what() 488 | else: 489 | break 490 | # sample the number of channels 491 | n = len(self.socket_map) 492 | self.num_channels = n 493 | if n > self.max_channels: 494 | self.max_channels = n 495 | asyncore.poll(self.timeout) # TODO: use poll2 when available 496 | 497 | def stop(self): 498 | "Stop the loop." 499 | self.socket_map.clear() 500 | self.events = [] 501 | self._now = None 502 | self._running = False 503 | 504 | def time(self): 505 | "Return the current time (to avoid a system call)." 506 | return self._now or time.time() 507 | 508 | def schedule(self, delta, callback, *args): 509 | "Schedule callable callback to be run in delta seconds with *args." 510 | def cb(): 511 | if callback: 512 | callback(*args) 513 | new_event = (self.time() + delta, cb) 514 | events = self.events 515 | bisect.insort(events, new_event) 516 | class event_holder: 517 | def __init__(self): 518 | self._deleted = False 519 | def delete(self): 520 | if not self._deleted: 521 | try: 522 | events.remove(new_event) 523 | self._deleted = True 524 | except ValueError: # already gone 525 | pass 526 | return event_holder() 527 | 528 | _event_running = False 529 | def _event_run(*args): 530 | _event_running = True 531 | event.dispatch(*args) 532 | 533 | def _event_stop(*args): 534 | _event_running = False 535 | event.abort(*args) 536 | 537 | if event: 538 | schedule = event.timeout 539 | run = _event_run 540 | stop = _event_stop 541 | now = time.time 542 | running = _event_running 543 | else: 544 | _loop = _AsyncoreLoop() 545 | schedule = _loop.schedule 546 | run = _loop.run 547 | stop = _loop.stop 548 | now = _loop.time 549 | running = _loop._running 550 | --------------------------------------------------------------------------------