├── src
├── __init__.py
├── error.py
├── server.py
├── http_common.py
├── client.py
└── push_tcp.py
├── setup.py
├── scripts
└── proxy.py
└── README
/src/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Non-blocking HTTP components.
5 | """
6 |
7 | from client import Client
8 | from server import Server
9 | from push_tcp import run, stop, schedule, now, running
10 | from http_common import dummy, header_dict, get_hdr, \
11 | safe_methods, idempotent_methods, hop_by_hop_hdrs
12 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from distutils.core import setup
4 |
5 | setup(name='nbhttp',
6 | version='0.7.3',
7 | description='Non-blocking HTTP components',
8 | author='Mark Nottingham',
9 | author_email='mnot@mnot.net',
10 | url='http://github.com/mnot/nbhttp/',
11 | download_url='http://github.com/mnot/nbhttp/tarball/nbhttp-0.7.3',
12 | packages=['nbhttp'],
13 | package_dir={'nbhttp': 'src'},
14 | scripts=['scripts/proxy.py'],
15 | classifiers=[
16 | 'Development Status :: 4 - Beta',
17 | 'Intended Audience :: Developers',
18 | 'License :: OSI Approved :: MIT License',
19 | 'Programming Language :: Python',
20 | 'Topic :: Internet :: WWW/HTTP',
21 | 'Topic :: Internet :: Proxy Servers',
22 | 'Topic :: Internet :: WWW/HTTP :: HTTP Servers',
23 | 'Topic :: Software Development :: Libraries :: Python Modules',
24 | ]
25 | )
--------------------------------------------------------------------------------
/scripts/proxy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | A simple HTTP proxy as a demonstration.
5 | """
6 |
7 |
8 | import sys
9 | try: # run from dist without installation
10 | sys.path.insert(0, "..")
11 | from src import Client, Server, header_dict, run, client, schedule
12 | except ImportError:
13 | from nbhttp import Client, Server, header_dict, run, client, schedule
14 |
15 | # TODO: CONNECT support
16 | # TODO: remove headers nominated by Connection
17 | # TODO: add Via
18 |
19 | class ProxyClient(Client):
20 | read_timeout = 10
21 | connect_timeout = 15
22 |
23 | def proxy_handler(method, uri, req_hdrs, s_res_start, req_pause):
24 | # can modify method, uri, req_hdrs here
25 | def c_res_start(version, status, phrase, res_hdrs, res_pause):
26 | # can modify status, phrase, res_hdrs here
27 | res_body, res_done = s_res_start(status, phrase, res_hdrs, res_pause)
28 | # can modify res_body here
29 | return res_body, res_done
30 | c = ProxyClient(c_res_start)
31 | req_body, req_done = c.req_start(method, uri, req_hdrs, req_pause)
32 | # can modify req_body here
33 | return req_body, req_done
34 |
35 |
36 | if __name__ == "__main__":
37 | import sys
38 | port = int(sys.argv[1])
39 | server = Server('', port, proxy_handler)
40 | run()
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 |
2 | nbhttp - Tools for building non-blocking HTTP components
3 |
4 |
5 | NOTE: This is no longer maintained! See mnot/thor.
6 |
7 | * About nbhttp
8 |
9 | nbhttp is a set of libraries for building non-blocking (a.k.a. asynchronous
10 | or event-driven) HTTP clients, servers and intermediaries.
11 |
12 | Its aims are to expose full HTTP functionality in a conformant manner, with
13 | the maximum potential for performance.
14 |
15 | It is NOT YET SUITABLE FOR PRODUCTION USE. In particular, the code has not
16 | been optimised or benchmarked, nor are there limits on resource usage, logging
17 | or any number of other things that make a practical web server or intermediary
18 | a useful thing.
19 |
20 | However, it's lots of fun to prototype and play with.
21 |
22 |
23 | * Requirements
24 |
25 | nbhttp needs Python 2.5 or greater; see
26 |
27 | Optionally, it will take advantage of the pyevent extension, if installed.
28 | See .
29 |
30 |
31 | * Installation
32 |
33 | If you have setuptools, you can install from the repository:
34 |
35 | > easy_install nbhttp
36 |
37 | Otherwise, download a tarball and install using:
38 |
39 | > python setup.py install
40 |
41 |
42 | * SUPPORT, REPORTING ISSUES AND CONTRIBUTING
43 |
44 | See to give feedback, report issues, and
45 | contribute.
46 |
47 |
48 | * License
49 |
50 | Copyright (c) 2008-2009 Mark Nottingham
51 |
52 | Permission is hereby granted, free of charge, to any person obtaining a copy
53 | of this software and associated documentation files (the "Software"), to deal
54 | in the Software without restriction, including without limitation the rights
55 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
56 | copies of the Software, and to permit persons to whom the Software is
57 | furnished to do so, subject to the following conditions:
58 |
59 | The above copyright notice and this permission notice shall be included in
60 | all copies or substantial portions of the Software.
61 |
62 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
63 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
64 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
65 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
66 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
67 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
68 | THE SOFTWARE.
69 |
--------------------------------------------------------------------------------
/src/error.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | errors
5 | """
6 |
7 | __author__ = "Mark Nottingham "
8 | __copyright__ = """\
9 | Copyright (c) 2008-2010 Mark Nottingham
10 |
11 | Permission is hereby granted, free of charge, to any person obtaining a copy
12 | of this software and associated documentation files (the "Software"), to deal
13 | in the Software without restriction, including without limitation the rights
14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 | copies of the Software, and to permit persons to whom the Software is
16 | furnished to do so, subject to the following conditions:
17 |
18 | The above copyright notice and this permission notice shall be included in
19 | all copies or substantial portions of the Software.
20 |
21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 | THE SOFTWARE.
28 | """
29 |
30 | # General parsing errors
31 |
32 | ERR_CHUNK = {
33 | 'desc': "Chunked encoding error",
34 | }
35 | ERR_EXTRA_DATA = {
36 | 'desc': "Extra data received",
37 | }
38 |
39 | ERR_BODY_FORBIDDEN = {
40 | 'desc': "This message does not allow a body",
41 | }
42 |
43 | ERR_HTTP_VERSION = {
44 | 'desc': "Unrecognised HTTP version", # FIXME: more specific status
45 | }
46 |
47 | ERR_READ_TIMEOUT = {
48 | 'desc': "Read timeout",
49 | }
50 |
51 | ERR_TRANSFER_CODE = {
52 | 'desc': "Unknown request transfer coding",
53 | 'status': ("501", "Not Implemented"),
54 | }
55 |
56 | ERR_WHITESPACE_HDR = {
57 | 'desc': "Whitespace between request-line and first header",
58 | 'status': ("400", "Bad Request"),
59 | }
60 |
61 | ERR_TOO_MANY_MSGS = {
62 | 'desc': "Too many messages to parse",
63 | 'status': ("400", "Bad Request"),
64 | }
65 |
66 | # client-specific errors
67 |
68 | ERR_URL = {
69 | 'desc': "Unsupported or invalid URI",
70 | 'status': ("400", "Bad Request"),
71 | }
72 | ERR_LEN_REQ = {
73 | 'desc': "Content-Length required",
74 | 'status': ("411", "Length Required"),
75 | }
76 |
77 | ERR_CONNECT = {
78 | 'desc': "Connection closed",
79 | 'status': ("504", "Gateway Timeout"),
80 | }
81 |
82 | # server-specific errors
83 |
84 | ERR_HOST_REQ = {
85 | 'desc': "Host header required",
86 | }
87 |
--------------------------------------------------------------------------------
/src/server.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Non-Blocking HTTP Server
5 |
6 | This library allow implementation of an HTTP/1.1 server that is
7 | "non-blocking," "asynchronous" and "event-driven" -- i.e., it achieves very
8 | high performance and concurrency, so long as the application code does not
9 | block (e.g., upon network, disk or database access). Blocking on one request
10 | will block the entire server.
11 |
12 | Instantiate a Server with the following parameters:
13 | - host (string)
14 | - port (int)
15 | - req_start (callable)
16 |
17 | req_start is called when a request starts. It must take the following
18 | arguments:
19 | - method (string)
20 | - uri (string)
21 | - req_hdrs (list of (name, value) tuples)
22 | - res_start (callable)
23 | - req_body_pause (callable)
24 | and return:
25 | - req_body (callable)
26 | - req_done (callable)
27 |
28 | req_body is called when part of the request body is available. It must take
29 | the following argument:
30 | - chunk (string)
31 |
32 | req_done is called when the request is complete, whether or not it contains a
33 | body. It must take the following argument:
34 | - err (error dictionary, or None for no error)
35 |
36 | Call req_body_pause when you want the server to temporarily stop sending the
37 | request body, or restart. You must provide the following argument:
38 | - paused (boolean; True means pause, False means unpause)
39 |
40 | Call res_start when you want to start the response, and provide the following
41 | arguments:
42 | - status_code (string)
43 | - status_phrase (string)
44 | - res_hdrs (list of (name, value) tuples)
45 | - res_body_pause
46 | It returns:
47 | - res_body (callable)
48 | - res_done (callable)
49 |
50 | Call res_body to send part of the response body to the client. Provide the
51 | following parameter:
52 | - chunk (string)
53 |
54 | Call res_done when the response is finished, and provide the
55 | following argument if appropriate:
56 | - err (error dictionary, or None for no error)
57 |
58 | See the error module for the complete list of valid error dictionaries.
59 |
60 | Where possible, errors in the request will be responded to with the
61 | appropriate 4xx HTTP status code. However, if a response has already been
62 | started, the connection will be dropped (for example, when the request
63 | chunking or indicated length are incorrect).
64 | """
65 |
66 | __author__ = "Mark Nottingham "
67 | __copyright__ = """\
68 | Copyright (c) 2008-2010 Mark Nottingham
69 |
70 | Permission is hereby granted, free of charge, to any person obtaining a copy
71 | of this software and associated documentation files (the "Software"), to deal
72 | in the Software without restriction, including without limitation the rights
73 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
74 | copies of the Software, and to permit persons to whom the Software is
75 | furnished to do so, subject to the following conditions:
76 |
77 | The above copyright notice and this permission notice shall be included in
78 | all copies or substantial portions of the Software.
79 |
80 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
81 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
82 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
83 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
84 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
85 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
86 | THE SOFTWARE.
87 | """
88 |
89 | import os
90 | import sys
91 | import logging
92 |
93 | import push_tcp
94 | from http_common import HttpMessageHandler, \
95 | CLOSE, COUNTED, CHUNKED, \
96 | WAITING, \
97 | hop_by_hop_hdrs, \
98 | dummy, get_hdr
99 |
100 | from error import ERR_HTTP_VERSION, ERR_HOST_REQ, \
101 | ERR_WHITESPACE_HDR, ERR_TRANSFER_CODE
102 |
103 | logging.basicConfig()
104 | log = logging.getLogger('server')
105 | log.setLevel(logging.WARNING)
106 |
107 | # FIXME: assure that the connection isn't closed before reading the entire
108 | # req body
109 | # TODO: filter out 100 responses to HTTP/1.0 clients that didn't ask for it.
110 |
111 | class Server:
112 | "An asynchronous HTTP server."
113 | def __init__(self, host, port, request_handler):
114 | self.request_handler = request_handler
115 | push_tcp.create_server(host, port, self.handle_connection)
116 |
117 | def handle_connection(self, tcp_conn):
118 | "Process a new push_tcp connection, tcp_conn."
119 | conn = HttpServerConnection(self.request_handler, tcp_conn)
120 | return conn._handle_input, conn._conn_closed, conn._res_body_pause
121 |
122 |
123 | class HttpServerConnection(HttpMessageHandler):
124 | "A handler for an HTTP server connection."
125 | def __init__(self, request_handler, tcp_conn):
126 | HttpMessageHandler.__init__(self)
127 | self.request_handler = request_handler
128 | self._tcp_conn = tcp_conn
129 | self.req_body_cb = None
130 | self.req_done_cb = None
131 | self.method = None
132 | self.req_version = None
133 | self.connection_hdr = []
134 | self._res_body_pause_cb = None
135 |
136 | def res_start(self, status_code, status_phrase, res_hdrs, res_body_pause):
137 | "Start a response. Must only be called once per response."
138 | self._res_body_pause_cb = res_body_pause
139 | res_hdrs = [i for i in res_hdrs \
140 | if not i[0].lower() in hop_by_hop_hdrs ]
141 |
142 | try:
143 | body_len = int(get_hdr(res_hdrs, "content-length").pop(0))
144 | except (IndexError, ValueError):
145 | body_len = None
146 | if body_len is not None:
147 | delimit = COUNTED
148 | res_hdrs.append(("Connection", "keep-alive"))
149 | elif 2.0 > self.req_version >= 1.1:
150 | delimit = CHUNKED
151 | res_hdrs.append(("Transfer-Encoding", "chunked"))
152 | else:
153 | delimit = CLOSE
154 | res_hdrs.append(("Connection", "close"))
155 |
156 | self._output_start("HTTP/1.1 %s %s" % (status_code, status_phrase),
157 | res_hdrs, delimit
158 | )
159 | return self.res_body, self.res_done
160 |
161 | def res_body(self, chunk):
162 | "Send part of the response body. May be called zero to many times."
163 | self._output_body(chunk)
164 |
165 | def res_done(self, err=None):
166 | """
167 | Signal the end of the response, whether or not there was a body. MUST
168 | be called exactly once for each response.
169 |
170 | If err is not None, it is an error dictionary (see the error module)
171 | indicating that an HTTP-specific (i.e., non-application) error occured
172 | in the generation of the response; this is useful for debugging.
173 | """
174 | self._output_end(err)
175 |
176 | def req_body_pause(self, paused):
177 | """
178 | Indicate that the server should pause (True) or unpause (False) the
179 | request.
180 | """
181 | if self._tcp_conn and self._tcp_conn.tcp_connected:
182 | self._tcp_conn.pause(paused)
183 |
184 | # Methods called by push_tcp
185 |
186 | def _res_body_pause(self, paused):
187 | "Pause/unpause sending the response body."
188 | if self._res_body_pause_cb:
189 | self._res_body_pause_cb(paused)
190 |
191 | def _conn_closed(self):
192 | "The server connection has closed."
193 | if self._output_state != WAITING:
194 | pass # FIXME: any cleanup necessary?
195 | # self.pause()
196 | # self._queue = []
197 | # self.tcp_conn.handler = None
198 | # self.tcp_conn = None
199 |
200 | # Methods called by common.HttpRequestHandler
201 |
202 | def _output(self, chunk):
203 | self._tcp_conn.write(chunk)
204 |
205 | def _input_start(self, top_line, hdr_tuples, conn_tokens,
206 | transfer_codes, content_length):
207 | """
208 | Take the top set of headers from the input stream, parse them
209 | and queue the request to be processed by the application.
210 | """
211 | assert self._input_state == WAITING, "pipelining not supported"
212 | # FIXME: pipelining
213 | try:
214 | method, _req_line = top_line.split(None, 1)
215 | uri, req_version = _req_line.rsplit(None, 1)
216 | self.req_version = float(req_version.rsplit('/', 1)[1])
217 | except (ValueError, IndexError):
218 | self._handle_error(ERR_HTTP_VERSION, top_line)
219 | # FIXME: more fine-grained
220 | raise ValueError
221 | if self.req_version == 1.1 \
222 | and 'host' not in [t[0].lower() for t in hdr_tuples]:
223 | self._handle_error(ERR_HOST_REQ)
224 | raise ValueError
225 | if hdr_tuples[:1][:1][:1] in [" ", "\t"]:
226 | self._handle_error(ERR_WHITESPACE_HDR)
227 | for code in transfer_codes:
228 | # we only support 'identity' and chunked' codes
229 | if code not in ['identity', 'chunked']:
230 | # FIXME: SHOULD also close connection
231 | self._handle_error(ERR_TRANSFER_CODE)
232 | raise ValueError
233 | # FIXME: MUST 400 request messages with whitespace between
234 | # name and colon
235 | self.method = method
236 | self.connection_hdr = conn_tokens
237 |
238 | log.info("%s server req_start %s %s %s" % (
239 | id(self), method, uri, self.req_version)
240 | )
241 | self.req_body_cb, self.req_done_cb = self.request_handler(
242 | method, uri, hdr_tuples, self.res_start, self.req_body_pause)
243 | allows_body = (content_length) or (transfer_codes != [])
244 | return allows_body
245 |
246 | def _input_body(self, chunk):
247 | "Process a request body chunk from the wire."
248 | self.req_body_cb(chunk)
249 |
250 | def _input_end(self):
251 | "Indicate that the request body is complete."
252 | self.req_done_cb(None)
253 |
254 | def _input_error(self, err, detail=None):
255 | "Indicate a parsing problem with the request body."
256 | err['detail'] = detail
257 | if self._tcp_conn:
258 | self._tcp_conn.close()
259 | self._tcp_conn = None
260 | self.req_done_cb(err)
261 |
262 | def _handle_error(self, err, detail=None):
263 | """
264 | Handle a problem with the request by generating an appropriate
265 | response.
266 | """
267 | # self._queue.append(ErrorHandler(status_code, status_phrase, body, self))
268 | assert self._output_state == WAITING
269 | if detail:
270 | err['detail'] = detail
271 | status_code, status_phrase = err.get('status', ('400', 'Bad Request'))
272 | hdrs = [
273 | ('Content-Type', 'text/plain'),
274 | ]
275 | body = err['desc']
276 | if err.has_key('detail'):
277 | body += " (%s)" % err['detail']
278 | self.res_start(status_code, status_phrase, hdrs, dummy)
279 | self.res_body(body)
280 | self.res_done()
281 |
282 |
283 | def test_handler(method, uri, hdrs, res_start, req_pause):
284 | """
285 | An extremely simple (and limited) server request_handler.
286 | """
287 | code = "200"
288 | phrase = "OK"
289 | res_hdrs = [('Content-Type', 'text/plain')]
290 | res_body, res_done = res_start(code, phrase, res_hdrs, dummy)
291 | res_body('foo!')
292 | res_done(None)
293 | return dummy, dummy
294 |
295 | if __name__ == "__main__":
296 | sys.stderr.write("PID: %s\n" % os.getpid())
297 | h, p = '127.0.0.1', int(sys.argv[1])
298 | server = Server(h, p, test_handler)
299 | push_tcp.run()
300 |
--------------------------------------------------------------------------------
/src/http_common.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | shared HTTP infrastructure
5 |
6 | This module contains utility functions for nbhttp and a base class
7 | for the parsing portions of the client and server.
8 | """
9 |
10 | __author__ = "Mark Nottingham "
11 | __copyright__ = """\
12 | Copyright (c) 2008-2010 Mark Nottingham
13 |
14 | Permission is hereby granted, free of charge, to any person obtaining a copy
15 | of this software and associated documentation files (the "Software"), to deal
16 | in the Software without restriction, including without limitation the rights
17 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18 | copies of the Software, and to permit persons to whom the Software is
19 | furnished to do so, subject to the following conditions:
20 |
21 | The above copyright notice and this permission notice shall be included in
22 | all copies or substantial portions of the Software.
23 |
24 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 | THE SOFTWARE.
31 | """
32 |
33 | import re
34 | lws = re.compile("\r?\n[ \t]+", re.M)
35 | hdr_end = re.compile(r"\r?\n\r?\n", re.M)
36 | linesep = "\r\n"
37 |
38 | # conn_modes
39 | CLOSE, COUNTED, CHUNKED, NOBODY = 'close', 'counted', 'chunked', 'nobody'
40 |
41 | # states
42 | WAITING, HEADERS_DONE = 1, 2
43 |
44 | idempotent_methods = ['GET', 'HEAD', 'PUT', 'DELETE', 'OPTIONS', 'TRACE']
45 | safe_methods = ['GET', 'HEAD', 'OPTIONS', 'TRACE']
46 | no_body_status = ['100', '101', '204', '304']
47 | hop_by_hop_hdrs = ['connection', 'keep-alive', 'proxy-authenticate',
48 | 'proxy-authorization', 'te', 'trailers',
49 | 'transfer-encoding', 'upgrade', 'proxy-connection']
50 |
51 |
52 | from error import ERR_EXTRA_DATA, ERR_CHUNK, ERR_BODY_FORBIDDEN, \
53 | ERR_TOO_MANY_MSGS
54 |
55 | def dummy(*args, **kw):
56 | "Dummy method that does nothing; useful to ignore a callback."
57 | pass
58 |
59 | def header_dict(header_tuple, strip=None):
60 | """
61 | Given a header tuple, return a dictionary keyed upon the lower-cased
62 | header names.
63 |
64 | If strip is defined, each header listed (by lower-cased name) will not be
65 | returned in the dictionary.
66 | """
67 | # TODO: return a list of values; currently destructive.
68 | if strip == None:
69 | strip = []
70 | return dict([(n.strip().lower(), v.strip()) for (n, v) in header_tuple])
71 |
72 | def get_hdr(hdr_tuples, name):
73 | """
74 | Given a list of (name, value) header tuples and a header name (lowercase),
75 | return a list of all values for that header.
76 |
77 | This includes header lines with multiple values separated by a comma;
78 | such headers will be split into separate values. As a result, it is NOT
79 | safe to use this on headers whose values may include a comma (e.g.,
80 | Set-Cookie, or any value with a quoted string).
81 | """
82 | # TODO: support quoted strings
83 | return [v.strip() for v in sum(
84 | [l.split(',') for l in
85 | [i[1] for i in hdr_tuples if i[0].lower() == name]
86 | ]
87 | , [])]
88 |
89 |
90 | class HttpMessageHandler:
91 | """
92 | This is a base class for something that has to parse and/or serialise
93 | HTTP messages, request or response.
94 |
95 | For parsing, it expects you to override _input_start, _input_body and
96 | _input_end, and call _handle_input when you get bytes from the network.
97 |
98 | For serialising, it expects you to override _output.
99 | """
100 |
101 | def __init__(self):
102 | self.input_header_length = 0
103 | self.input_transfer_length = 0
104 | self._input_buffer = ""
105 | self._input_state = WAITING
106 | self._input_delimit = None
107 | self._input_body_left = 0
108 | self._output_state = WAITING
109 | self._output_delimit = None
110 |
111 | # input-related methods
112 |
113 | def _input_start(self, top_line, hdr_tuples, conn_tokens,
114 | transfer_codes, content_length):
115 | """
116 | Take the top set of headers from the input stream, parse them
117 | and queue the request to be processed by the application.
118 |
119 | Returns boolean allows_body to indicate whether the message allows a
120 | body.
121 | """
122 | raise NotImplementedError
123 |
124 | def _input_body(self, chunk):
125 | "Process a body chunk from the wire."
126 | raise NotImplementedError
127 |
128 | def _input_end(self):
129 | "Indicate that the response body is complete."
130 | raise NotImplementedError
131 |
132 | def _input_error(self, err, detail=None):
133 | "Indicate a parsing problem with the body."
134 | raise NotImplementedError
135 |
136 | def _handle_input(self, instr):
137 | """
138 | Given a chunk of input, figure out what state we're in and handle it,
139 | making the appropriate calls.
140 | """
141 | if self._input_buffer != "":
142 | # will need to move to a list if writev comes around
143 | instr = self._input_buffer + instr
144 | self._input_buffer = ""
145 | if self._input_state == WAITING:
146 | if hdr_end.search(instr): # found one
147 | rest = self._parse_headers(instr)
148 | try:
149 | self._handle_input(rest)
150 | except RuntimeError:
151 | self._input_error(ERR_TOO_MANY_MSGS)
152 | else: # partial headers; store it and wait for more
153 | self._input_buffer = instr
154 | elif self._input_state == HEADERS_DONE:
155 | try:
156 | input_parse = getattr(self, '_handle_%s' %
157 | self._input_delimit)
158 | except AttributeError:
159 | raise Exception, "Unknown input delimiter %s" % \
160 | self._input_delimit
161 | input_parse(instr)
162 | else:
163 | raise Exception, "Unknown state %s" % self._input_state
164 |
165 | def _handle_nobody(self, instr):
166 | "Handle input that shouldn't have a body."
167 | if instr:
168 | # FIXME: will not work with pipelining
169 | self._input_error(ERR_BODY_FORBIDDEN, instr)
170 | else:
171 | self._input_end()
172 | self._input_state = WAITING
173 | # self._handle_input(instr)
174 |
175 | def _handle_close(self, instr):
176 | "Handle input where the body is delimited by the connection closing."
177 | self.input_transfer_length += len(instr)
178 | self._input_body(instr)
179 |
180 | def _handle_chunked(self, instr):
181 | "Handle input where the body is delimited by chunked encoding."
182 | while instr:
183 | if self._input_body_left < 0: # new chunk
184 | instr = self._handle_chunk_new(instr)
185 | elif self._input_body_left > 0:
186 | # we're in the middle of reading a chunk
187 | instr = self._handle_chunk_body(instr)
188 | elif self._input_body_left == 0: # body is done
189 | instr = self._handle_chunk_done(instr)
190 |
191 | def _handle_chunk_new(self, instr):
192 | try:
193 | # they really need to use CRLF
194 | chunk_size, rest = instr.split(linesep, 1)
195 | except ValueError:
196 | # got a CRLF without anything behind it.. wait a bit
197 | if len(instr) > 256:
198 | # OK, this is absurd...
199 | self._input_error(ERR_CHUNK, instr)
200 | else:
201 | self._input_buffer += instr
202 | return
203 | if chunk_size.strip() == "": # ignore bare lines
204 | self._handle_chunked(rest) # FIXME: recursion
205 | return
206 | if ";" in chunk_size: # ignore chunk extensions
207 | chunk_size = chunk_size.split(";", 1)[0]
208 | try:
209 | self._input_body_left = int(chunk_size, 16)
210 | except ValueError:
211 | self._input_error(ERR_CHUNK, chunk_size)
212 | return # blow up if we can't process a chunk.
213 | self.input_transfer_length += len(instr) - len(rest)
214 | return rest
215 |
216 | def _handle_chunk_body(self, instr):
217 | if self._input_body_left < len(instr): # got more than the chunk
218 | this_chunk = self._input_body_left
219 | self._input_body(instr[:this_chunk])
220 | self.input_transfer_length += this_chunk
221 | self._input_body_left = -1
222 | return instr[this_chunk+2:] # +2 consumes the CRLF
223 | elif self._input_body_left == len(instr):
224 | # got the whole chunk exactly
225 | self._input_body(instr)
226 | self.input_transfer_length += self._input_body_left
227 | self._input_body_left = -1
228 | else:
229 | # got partial chunk
230 | self._input_body(instr)
231 | self.input_transfer_length += len(instr)
232 | self._input_body_left -= len(instr)
233 |
234 | def _handle_chunk_done(self, instr):
235 | if len(instr) >= 2 and instr[:2] == linesep:
236 | self._input_state = WAITING
237 | self._input_end()
238 | # self._handle_input(instr[2:]) # pipelining
239 | elif hdr_end.search(instr): # trailers
240 | self._input_state = WAITING
241 | self._input_end()
242 | trailers, rest = hdr_end.split(instr, 1) # TODO: process trailers
243 | # self._handle_input(rest) # pipelining
244 | else: # don't have full headers yet
245 | self._input_buffer = instr
246 |
247 | def _handle_counted(self, instr):
248 | "Handle input where the body is delimited by the Content-Length."
249 | assert self._input_body_left >= 0, \
250 | "message counting problem (%s)" % self._input_body_left
251 | # process body
252 | if self._input_body_left <= len(instr): # got it all (and more?)
253 | self.input_transfer_length += self._input_body_left
254 | self._input_body(instr[:self._input_body_left])
255 | self._input_state = WAITING
256 | if instr[self._input_body_left:]:
257 | # This will catch extra input that isn't on packet boundaries.
258 | self._input_error(ERR_EXTRA_DATA,
259 | instr[self._input_body_left:])
260 | else:
261 | self._input_end()
262 | else: # got some of it
263 | self._input_body(instr)
264 | self.input_transfer_length += len(instr)
265 | self._input_body_left -= len(instr)
266 |
267 | def _parse_headers(self, instr):
268 | """
269 | Given a string that we knows contains a header block (possibly more),
270 | parse the headers out and return the rest. Calls self._input_start
271 | to kick off processing.
272 | """
273 | top, rest = hdr_end.split(instr, 1)
274 | self.input_header_length = len(top)
275 | hdr_lines = lws.sub(" ", top).splitlines() # Fold LWS
276 | try:
277 | top_line = hdr_lines.pop(0)
278 | except IndexError: # empty
279 | return ""
280 | hdr_tuples = []
281 | conn_tokens = []
282 | transfer_codes = []
283 | content_length = None
284 | for line in hdr_lines:
285 | try:
286 | fn, fv = line.split(":", 1)
287 | hdr_tuples.append((fn, fv))
288 | except ValueError:
289 | continue # TODO: flesh out bad header handling
290 | f_name = fn.strip().lower()
291 | f_val = fv.strip()
292 |
293 | # parse connection-related headers
294 | if f_name == "connection":
295 | conn_tokens += [v.strip().lower() for v in f_val.split(',')]
296 | elif f_name == "transfer-encoding": # FIXME: parameters
297 | transfer_codes += [v.strip().lower() for \
298 | v in f_val.split(',')]
299 | elif f_name == "content-length":
300 | if content_length != None:
301 | continue # ignore any C-L past the first.
302 | try:
303 | content_length = int(f_val)
304 | except ValueError:
305 | continue
306 |
307 | # FIXME: WSP between name and colon; request = 400, response = discard
308 | # TODO: remove *and* ignore conn tokens if the message was 1.0
309 |
310 | # ignore content-length if transfer-encoding is present
311 | if transfer_codes != [] and content_length != None:
312 | content_length = None
313 |
314 | try:
315 | allows_body = self._input_start(top_line, hdr_tuples,
316 | conn_tokens, transfer_codes, content_length)
317 | except ValueError: # parsing error of some kind; abort.
318 | return ""
319 |
320 | self._input_state = HEADERS_DONE
321 | if not allows_body:
322 | self._input_delimit = NOBODY
323 | elif len(transfer_codes) > 0:
324 | if 'chunked' in transfer_codes:
325 | self._input_delimit = CHUNKED
326 | self._input_body_left = -1 # flag that we don't know
327 | else:
328 | self._input_delimit = CLOSE
329 | elif content_length != None:
330 | self._input_delimit = COUNTED
331 | self._input_body_left = content_length
332 | else:
333 | self._input_delimit = CLOSE
334 | return rest
335 |
336 | ### output-related methods
337 |
338 | def _output(self, out):
339 | raise NotImplementedError
340 |
341 | def _handle_error(self, err):
342 | raise NotImplementedError
343 |
344 | def _output_start(self, top_line, hdr_tuples, delimit):
345 | """
346 | Start ouputting a HTTP message.
347 | """
348 | self._output_delimit = delimit
349 | # TODO: strip whitespace?
350 | out = linesep.join(
351 | [top_line] +
352 | ["%s: %s" % (k, v) for k, v in hdr_tuples] +
353 | ["", ""]
354 | )
355 | self._output(out)
356 | self._output_state = HEADERS_DONE
357 |
358 | def _output_body(self, chunk):
359 | """
360 | Output a part of a HTTP message.
361 | """
362 | if not chunk:
363 | return
364 | if self._output_delimit == CHUNKED:
365 | chunk = "%s\r\n%s\r\n" % (hex(len(chunk))[2:], chunk)
366 | self._output(chunk)
367 | #FIXME: body counting
368 | # self._output_body_sent += len(chunk)
369 | # assert self._output_body_sent <= self._output_content_length, \
370 | # "Too many body bytes sent"
371 |
372 | def _output_end(self, err):
373 | """
374 | Finish outputting a HTTP message.
375 | """
376 | if err:
377 | self.output_body_cb, self.output_done_cb = dummy, dummy
378 | self._tcp_conn.close()
379 | self._tcp_conn = None
380 | elif self._output_delimit == NOBODY:
381 | pass # didn't have a body at all.
382 | elif self._output_delimit == CHUNKED:
383 | self._output("0\r\n\r\n")
384 | elif self._output_delimit == COUNTED:
385 | pass # TODO: double-check the length
386 | elif self._output_delimit == CLOSE:
387 | self._tcp_conn.close() # FIXME: abstract out?
388 | else:
389 | raise AssertionError, "Unknown request delimiter %s" % \
390 | self._output_delimit
391 | self._output_state = WAITING
392 |
--------------------------------------------------------------------------------
/src/client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Non-Blocking HTTP Client
5 |
6 | This library allow implementation of an HTTP/1.1 client that is
7 | "non-blocking," "asynchronous" and "event-driven" -- i.e., it achieves very
8 | high performance and concurrency, so long as the application code does not
9 | block (e.g., upon network, disk or database access). Blocking on one response
10 | will block the entire client.
11 |
12 | Instantiate a Client with the following parameter:
13 | - res_start (callable)
14 |
15 | Call req_start on the Client instance to begin a request. It takes the
16 | following arguments:
17 | - method (string)
18 | - uri (string)
19 | - req_hdrs (list of (name, value) tuples)
20 | - req_body_pause (callable)
21 | and returns:
22 | - req_body (callable)
23 | - req_done (callable)
24 |
25 | Call req_body to send part of the request body. It takes the following
26 | argument:
27 | - chunk (string)
28 |
29 | Call req_done when the request is complete, whether or not it contains a
30 | body. It takes the following argument:
31 | - err (error dictionary, or None for no error)
32 |
33 | req_body_pause is called when the client needs you to temporarily stop sending
34 | the request body, or restart. It must take the following argument:
35 | - paused (boolean; True means pause, False means unpause)
36 |
37 | res_start is called to start the response, and must take the following
38 | arguments:
39 | - status_code (string)
40 | - status_phrase (string)
41 | - res_hdrs (list of (name, value) tuples)
42 | - res_body_pause
43 | It must return:
44 | - res_body (callable)
45 | - res_done (callable)
46 |
47 | res_body is called when part of the response body is available. It must accept
48 | the following parameter:
49 | - chunk (string)
50 |
51 | res_done is called when the response is finished, and must accept the
52 | following argument:
53 | - err (error dictionary, or None if no error)
54 |
55 | See the error module for the complete list of valid error dictionaries.
56 |
57 | Where possible, errors in the response will be indicated with the appropriate
58 | 5xx HTTP status code (i.e., by calling res_start, res_body and res_done with
59 | an error dictionary). However, if a response has already been started, the
60 | connection will be dropped (for example, when the response chunking or
61 | indicated length are incorrect). In these cases, res_done will still be called
62 | with the appropriate error dictionary.
63 | """
64 |
65 | __author__ = "Mark Nottingham "
66 | __copyright__ = """\
67 | Copyright (c) 2008-2010 Mark Nottingham
68 |
69 | Permission is hereby granted, free of charge, to any person obtaining a copy
70 | of this software and associated documentation files (the "Software"), to deal
71 | in the Software without restriction, including without limitation the rights
72 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
73 | copies of the Software, and to permit persons to whom the Software is
74 | furnished to do so, subject to the following conditions:
75 |
76 | The above copyright notice and this permission notice shall be included in
77 | all copies or substantial portions of the Software.
78 |
79 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
80 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
81 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
82 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
83 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
84 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
85 | THE SOFTWARE.
86 | """
87 |
88 | import errno
89 | import os
90 | from urlparse import urlsplit, urlunsplit
91 |
92 | import push_tcp
93 | from http_common import HttpMessageHandler, \
94 | CLOSE, COUNTED, NOBODY, \
95 | WAITING, \
96 | idempotent_methods, no_body_status, hop_by_hop_hdrs, \
97 | dummy, get_hdr
98 | from error import ERR_URL, ERR_CONNECT, \
99 | ERR_READ_TIMEOUT, ERR_HTTP_VERSION
100 |
101 | req_remove_hdrs = hop_by_hop_hdrs + ['host']
102 |
103 | # TODO: proxy support
104 | # TODO: next-hop version cache for Expect/Continue, etc.
105 |
106 | class Client(HttpMessageHandler):
107 | "An asynchronous HTTP client."
108 | connect_timeout = None
109 | read_timeout = None
110 | retry_limit = 2
111 |
112 | def __init__(self, res_start_cb):
113 | HttpMessageHandler.__init__(self)
114 | self.res_start_cb = res_start_cb
115 | self.res_body_cb = None
116 | self.res_done_cb = None
117 | self.method = None
118 | self.uri = None
119 | self.req_hdrs = []
120 | self._host = None
121 | self._port = None
122 | self._tcp_conn = None
123 | self._conn_reusable = False
124 | self._req_body_pause_cb = None
125 | self._retries = 0
126 | self._read_timeout_ev = None
127 | self._output_buffer = []
128 |
129 | def __getstate__(self):
130 | props = ['method', 'uri', 'req_hdrs',
131 | 'input_header_length', 'input_transfer_length']
132 | return dict([(k, v) for (k, v) in self.__dict__.items()
133 | if k in props])
134 |
135 | def req_start(self, method, uri, req_hdrs, req_body_pause):
136 | """
137 | Start a request to uri using method, where
138 | req_hdrs is a list of (field_name, field_value) for
139 | the request headers.
140 |
141 | Returns a (req_body, req_done) tuple.
142 | """
143 | self._req_body_pause_cb = req_body_pause
144 | req_hdrs = [i for i in req_hdrs \
145 | if not i[0].lower() in req_remove_hdrs]
146 | (scheme, authority, path, query, fragment) = urlsplit(uri)
147 | if scheme.lower() != 'http':
148 | self._handle_error(ERR_URL, "Only HTTP URLs are supported")
149 | return dummy, dummy
150 | if "@" in authority:
151 | userinfo, authority = authority.split("@", 1)
152 | if ":" in authority:
153 | self._host, port = authority.rsplit(":", 1)
154 | try:
155 | self._port = int(port)
156 | except ValueError:
157 | self._handle_error(ERR_URL, "Non-integer port in URL")
158 | return dummy, dummy
159 | else:
160 | self._host, self._port = authority, 80
161 | if path == "":
162 | path = "/"
163 | uri = urlunsplit(('', '', path, query, ''))
164 | self.method, self.uri, self.req_hdrs = method, uri, req_hdrs
165 | self.req_hdrs.append(("Host", authority))
166 | self.req_hdrs.append(("Connection", "keep-alive"))
167 | try:
168 | body_len = int(get_hdr(req_hdrs, "content-length").pop(0))
169 | delimit = COUNTED
170 | except (IndexError, ValueError):
171 | body_len = None
172 | delimit = NOBODY
173 | self._output_start("%s %s HTTP/1.1" % (self.method, self.uri),
174 | self.req_hdrs, delimit
175 | )
176 | _idle_pool.attach(self._host, self._port, self._handle_connect,
177 | self._handle_connect_error, self.connect_timeout
178 | )
179 | return self.req_body, self.req_done
180 | # TODO: if we sent Expect: 100-continue, don't wait forever
181 | # (i.e., schedule something)
182 |
183 | def req_body(self, chunk):
184 | "Send part of the request body. May be called zero to many times."
185 | # FIXME: self._handle_error(ERR_LEN_REQ)
186 | self._output_body(chunk)
187 |
188 | def req_done(self, err=None):
189 | """
190 | Signal the end of the request, whether or not there was a body. MUST
191 | be called exactly once for each request.
192 |
193 | If err is not None, it is an error dictionary (see the error module)
194 | indicating that an HTTP-specific (i.e., non-application) error
195 | occurred while satisfying the request; this is useful for debugging.
196 | """
197 | self._output_end(err)
198 |
199 | def res_body_pause(self, paused):
200 | "Temporarily stop / restart sending the response body."
201 | if self._tcp_conn and self._tcp_conn.tcp_connected:
202 | self._tcp_conn.pause(paused)
203 |
204 | # Methods called by push_tcp
205 |
206 | def _handle_connect(self, tcp_conn):
207 | "The connection has succeeded."
208 | self._tcp_conn = tcp_conn
209 | self._output("") # kick the output buffer
210 | if self.read_timeout:
211 | self._read_timeout_ev = push_tcp.schedule(
212 | self.read_timeout, self._handle_error,
213 | ERR_READ_TIMEOUT, 'connect'
214 | )
215 | return self._handle_input, self._conn_closed, self._req_body_pause
216 |
217 | def _handle_connect_error(self, err):
218 | "The connection has failed."
219 | if err[0] == errno.EINVAL: # weirdness.
220 | err = (errno.ECONNREFUSED, os.strerror(errno.ECONNREFUSED))
221 | self._handle_error(ERR_CONNECT, err[1])
222 |
223 | def _conn_closed(self):
224 | "The server closed the connection."
225 | if self.read_timeout:
226 | self._read_timeout_ev.delete()
227 | if self._input_buffer:
228 | self._handle_input("")
229 | if self._input_delimit == CLOSE:
230 | self._input_end()
231 | elif self._input_state == WAITING:
232 | if self.method in idempotent_methods:
233 | if self._retries < self.retry_limit:
234 | self._retry()
235 | else:
236 | self._handle_error(ERR_CONNECT,
237 | "Tried to connect %s times." % (self._retries + 1)
238 | )
239 | else:
240 | self._handle_error(ERR_CONNECT,
241 | "Can't retry %s method" % self.method
242 | )
243 | else:
244 | self._input_error(ERR_CONNECT,
245 | "Server dropped connection before the response was received."
246 | )
247 |
248 | def _retry(self):
249 | "Retry the request."
250 | if self._read_timeout_ev:
251 | self._read_timeout_ev.delete()
252 | self._retries += 1
253 | _idle_pool.attach(self._host, self._port, self._handle_connect,
254 | self._handle_connect_error, self.connect_timeout
255 | )
256 |
257 | def _req_body_pause(self, paused):
258 | "The client needs the application to pause/unpause the request body."
259 | if self._req_body_pause_cb:
260 | self._req_body_pause_cb(paused)
261 |
262 | # Methods called by common.HttpMessageHandler
263 |
264 | def _input_start(self, top_line, hdr_tuples, conn_tokens,
265 | transfer_codes, content_length):
266 | """
267 | Take the top set of headers from the input stream, parse them
268 | and queue the request to be processed by the application.
269 | """
270 | if self.read_timeout:
271 | self._read_timeout_ev.delete()
272 | try:
273 | res_version, status_txt = top_line.split(None, 1)
274 | res_version = float(res_version.rsplit('/', 1)[1])
275 | # TODO: check that the protocol is HTTP
276 | except (ValueError, IndexError):
277 | self._handle_error(ERR_HTTP_VERSION, top_line)
278 | raise ValueError
279 | try:
280 | res_code, res_phrase = status_txt.split(None, 1)
281 | except ValueError:
282 | res_code = status_txt.rstrip()
283 | res_phrase = ""
284 | if 'close' not in conn_tokens:
285 | if (res_version == 1.0 and 'keep-alive' in conn_tokens) or \
286 | res_version > 1.0:
287 | self._conn_reusable = True
288 | if self.read_timeout:
289 | self._read_timeout_ev = push_tcp.schedule(
290 | self.read_timeout, self._input_error,
291 | ERR_READ_TIMEOUT, 'start'
292 | )
293 | self.res_body_cb, self.res_done_cb = self.res_start_cb(
294 | res_version, res_code, res_phrase,
295 | hdr_tuples, self.res_body_pause
296 | )
297 | allows_body = (res_code not in no_body_status) \
298 | or (self.method == "HEAD")
299 | return allows_body
300 |
301 | def _input_body(self, chunk):
302 | "Process a response body chunk from the wire."
303 | if self.read_timeout:
304 | self._read_timeout_ev.delete()
305 | self.res_body_cb(chunk)
306 | if self.read_timeout:
307 | self._read_timeout_ev = push_tcp.schedule(self.read_timeout,
308 | self._input_error, ERR_READ_TIMEOUT, 'body'
309 | )
310 |
311 | def _input_end(self):
312 | "Indicate that the response body is complete."
313 | if self.read_timeout:
314 | self._read_timeout_ev.delete()
315 | if self._tcp_conn:
316 | if self._tcp_conn.tcp_connected and self._conn_reusable:
317 | # Note that we don't reset read_cb; if more bytes come in
318 | # before the next request, we'll still get them.
319 | _idle_pool.release(self._tcp_conn)
320 | else:
321 | self._tcp_conn.close()
322 | self._tcp_conn = None
323 | self.res_done_cb(None)
324 |
325 | def _input_error(self, err, detail=None):
326 | "Indicate a parsing problem with the response body."
327 | if self.read_timeout:
328 | self._read_timeout_ev.delete()
329 | if self._tcp_conn:
330 | self._tcp_conn.close()
331 | self._tcp_conn = None
332 | err['detail'] = detail
333 | self.res_done_cb(err)
334 |
335 | def _output(self, chunk):
336 | self._output_buffer.append(chunk)
337 | if self._tcp_conn and self._tcp_conn.tcp_connected:
338 | self._tcp_conn.write("".join(self._output_buffer))
339 | self._output_buffer = []
340 |
341 | # misc
342 |
343 | def _handle_error(self, err, detail=None):
344 | """
345 | Handle a problem with the request by generating an appropriate
346 | response.
347 | """
348 | assert self._input_state == WAITING
349 | if self._read_timeout_ev:
350 | self._read_timeout_ev.delete()
351 | if self._tcp_conn:
352 | self._tcp_conn.close()
353 | self._tcp_conn = None
354 | if detail:
355 | err['detail'] = detail
356 | status_code, status_phrase = err.get('status',
357 | ('504', 'Gateway Timeout')
358 | )
359 | hdrs = [
360 | ('Content-Type', 'text/plain'),
361 | ('Connection', 'close'),
362 | ]
363 | body = err['desc']
364 | if err.has_key('detail'):
365 | body += " (%s)" % err['detail']
366 | res_body_cb, res_done_cb = self.res_start_cb(
367 | "1.1", status_code, status_phrase, hdrs, dummy)
368 | res_body_cb(str(body))
369 | push_tcp.schedule(0, res_done_cb, err)
370 |
371 |
372 | class _HttpConnectionPool:
373 | "A pool of idle TCP connections for use by the client."
374 | _conns = {}
375 |
376 | def attach(self, host, port, handle_connect,
377 | handle_connect_error, connect_timeout):
378 | "Find an idle connection for (host, port), or create a new one."
379 | while True:
380 | try:
381 | tcp_conn = self._conns[(host, port)].pop()
382 | except (IndexError, KeyError):
383 | push_tcp.create_client(host, port,
384 | handle_connect, handle_connect_error, connect_timeout
385 | )
386 | break
387 | if tcp_conn.tcp_connected:
388 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \
389 | handle_connect(tcp_conn)
390 | break
391 |
392 | def release(self, tcp_conn):
393 | "Add an idle connection back to the pool."
394 | if tcp_conn.tcp_connected:
395 | def idle_close():
396 | "Remove the connection from the pool when it closes."
397 | try:
398 | self._conns[
399 | (tcp_conn.host, tcp_conn.port)
400 | ].remove(tcp_conn)
401 | except ValueError:
402 | pass
403 | tcp_conn.close_cb = idle_close
404 | if not self._conns.has_key((tcp_conn.host, tcp_conn.port)):
405 | self._conns[(tcp_conn.host, tcp_conn.port)] = [tcp_conn]
406 | else:
407 | self._conns[(tcp_conn.host, tcp_conn.port)].append(tcp_conn)
408 |
409 | _idle_pool = _HttpConnectionPool()
410 |
411 |
412 | def test_client(request_uri, out, err):
413 | "A simple demonstration of a client."
414 |
415 | def printer(version, status, phrase, headers, res_pause):
416 | "Print the response headers."
417 | print "HTTP/%s" % version, status, phrase
418 | print "\n".join(["%s:%s" % header for header in headers])
419 | print
420 | def body(chunk):
421 | out(chunk)
422 | def done(err_msg):
423 | if err_msg:
424 | err("\n*** ERROR: %s (%s)\n" %
425 | (err_msg['desc'], err_msg['detail'])
426 | )
427 | push_tcp.stop()
428 | return body, done
429 | c = Client(printer)
430 | req_body_write, req_done = c.req_start("GET", request_uri, [], dummy)
431 | req_done(None)
432 | push_tcp.run()
433 |
434 | if __name__ == "__main__":
435 | import sys
436 | test_client(sys.argv[1], sys.stdout.write, sys.stderr.write)
437 |
--------------------------------------------------------------------------------
/src/push_tcp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | push-based asynchronous TCP
5 |
6 | This is a generic library for building event-based / asynchronous
7 | TCP servers and clients.
8 |
9 | By default, it uses the asyncore library included with Python.
10 | However, if the pyevent library
11 | is available, it will
12 | use that, offering higher concurrency and, perhaps, performance.
13 |
14 | It uses a push model; i.e., the network connection pushes data to
15 | you (using a callback), and you push data to the network connection
16 | (using a direct method invocation).
17 |
18 | *** Building Clients
19 |
20 | To connect to a server, use create_client;
21 | > host = 'www.example.com'
22 | > port = '80'
23 | > push_tcp.create_client(host, port, conn_handler, error_handler)
24 |
25 | conn_handler will be called with the tcp_conn as the argument
26 | when the connection is made. See "Working with Connections"
27 | below for details.
28 |
29 | error_handler will be called if the connection can't be made for some reason.
30 |
31 | > def error_handler(host, port, reason):
32 | > print "can't connect to %s:%s: %s" % (host, port, reason)
33 |
34 | *** Building Servers
35 |
36 | To start listening, use create_server;
37 |
38 | > server = push_tcp.create_server(host, port, conn_handler)
39 |
40 | conn_handler is called every time a new client connects; see
41 | "Working with Connections" below for details.
42 |
43 | The server object itself keeps track of all of the open connections, and
44 | can be used to do things like idle connection management, etc.
45 |
46 | *** Working with Connections
47 |
48 | Every time a new connection is established -- whether as a client
49 | or as a server -- the conn_handler given is called with tcp_conn
50 | as its argument;
51 |
52 | > def conn_handler(tcp_conn):
53 | > print "connected to %s:%s" % tcp_conn.host, tcp_conn.port
54 | > return read_cb, close_cb, pause_cb
55 |
56 | It must return a (read_cb, close_cb, pause_cb) tuple.
57 |
58 | read_cb will be called every time incoming data is available from
59 | the connection;
60 |
61 | > def read_cb(data):
62 | > print "got some data:", data
63 |
64 | When you want to write to the connection, just write to it:
65 |
66 | > tcp_conn.write(data)
67 |
68 | If you want to close the connection from your side, just call close:
69 |
70 | > tcp_conn.close()
71 |
72 | Note that this will flush any data already written.
73 |
74 | If the other side closes the connection, close_cb will be called;
75 |
76 | > def close_cb():
77 | > print "oops, they don't like us any more..."
78 |
79 | If you write too much data to the connection and the buffers fill up,
80 | pause_cb will be called with True to tell you to stop sending data
81 | temporarily;
82 |
83 | > def pause_cb(paused):
84 | > if paused:
85 | > # stop sending data
86 | > else:
87 | > # it's OK to start again
88 |
89 | Note that this is advisory; if you ignore it, the data will still be
90 | buffered, but the buffer will grow.
91 |
92 | Likewise, if you want to pause the connection because your buffers
93 | are full, call pause;
94 |
95 | > tcp_conn.pause(True)
96 |
97 | but don't forget to tell it when it's OK to send data again;
98 |
99 | > tcp_conn.pause(False)
100 |
101 | *** Timed Events
102 |
103 | It's often useful to schedule an event to be run some time in the future;
104 |
105 | > push_tcp.schedule(10, cb, "foo")
106 |
107 | This example will schedule the function 'cb' to be called with the argument
108 | "foo" ten seconds in the future.
109 |
110 | *** Running the loop
111 |
112 | In all cases (clients, servers, and timed events), you'll need to start
113 | the event loop before anything actually happens;
114 |
115 | > push_tcp.run()
116 |
117 | To stop it, just stop it;
118 |
119 | > push_tcp.stop()
120 | """
121 |
122 | __author__ = "Mark Nottingham "
123 | __copyright__ = """\
124 | Copyright (c) 2008-2010 Mark Nottingham
125 |
126 | Permission is hereby granted, free of charge, to any person obtaining a copy
127 | of this software and associated documentation files (the "Software"), to deal
128 | in the Software without restriction, including without limitation the rights
129 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
130 | copies of the Software, and to permit persons to whom the Software is
131 | furnished to do so, subject to the following conditions:
132 |
133 | The above copyright notice and this permission notice shall be included in
134 | all copies or substantial portions of the Software.
135 |
136 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
137 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
138 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
139 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
140 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
141 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
142 | THE SOFTWARE.
143 | """
144 |
145 | import asyncore
146 | import bisect
147 | import errno
148 | import os
149 | import sys
150 | import socket
151 | import time
152 |
153 | try:
154 | import event # http://www.monkey.org/~dugsong/pyevent/
155 | except ImportError:
156 | event = None
157 |
158 | class _TcpConnection(asyncore.dispatcher):
159 | "Base class for a TCP connection."
160 | write_bufsize = 16
161 | read_bufsize = 1024 * 16
162 | def __init__(self, sock, host, port):
163 | self.socket = sock
164 | self.host = host
165 | self.port = port
166 | self.read_cb = None
167 | self.close_cb = None
168 | self._close_cb_called = False
169 | self.pause_cb = None
170 | self.tcp_connected = True # we assume a connected socket
171 | self._paused = False # TODO: should be paused by default
172 | self._closing = False
173 | self._write_buffer = []
174 | if event:
175 | self._revent = event.read(sock, self.handle_read)
176 | self._wevent = event.write(sock, self.handle_write)
177 | else: # asyncore
178 | asyncore.dispatcher.__init__(self, sock)
179 |
180 | def __repr__(self):
181 | status = [self.__class__.__module__+"."+self.__class__.__name__]
182 | if self.tcp_connected:
183 | status.append('connected')
184 | status.append('%s:%s' % (self.host, self.port))
185 | if event:
186 | status.append('event-based')
187 | if self._paused:
188 | status.append('paused')
189 | if self._closing:
190 | status.append('closing')
191 | if self._close_cb_called:
192 | status.append('close cb called')
193 | if self._write_buffer:
194 | status.append('%s write buffered' % len(self._write_buffer))
195 | return "<%s at %#x>" % (", ".join(status), id(self))
196 |
197 | def handle_connect(self): # asyncore
198 | pass
199 |
200 | def handle_read(self):
201 | """
202 | The connection has data read for reading; call read_cb
203 | if appropriate.
204 | """
205 | try:
206 | data = self.socket.recv(self.read_bufsize)
207 | except socket.error, why:
208 | if why[0] in [errno.EBADF, errno.ECONNRESET, errno.ESHUTDOWN,
209 | errno.ECONNABORTED, errno.ECONNREFUSED,
210 | errno.ENOTCONN, errno.EPIPE]:
211 | self.conn_closed()
212 | return
213 | else:
214 | raise
215 | if data == "":
216 | self.conn_closed()
217 | else:
218 | self.read_cb(data)
219 | if event:
220 | if self.read_cb and self.tcp_connected and not self._paused:
221 | return self._revent
222 |
223 | def handle_write(self):
224 | "The connection is ready for writing; write any buffered data."
225 | if len(self._write_buffer) > 0:
226 | data = "".join(self._write_buffer)
227 | try:
228 | sent = self.socket.send(data)
229 | except socket.error, why:
230 | if why[0] == errno.EWOULDBLOCK:
231 | return
232 | elif why[0] in [errno.EBADF, errno.ECONNRESET,
233 | errno.ESHUTDOWN, errno.ECONNABORTED,
234 | errno.ECONNREFUSED, errno.ENOTCONN,
235 | errno.EPIPE]:
236 | self.conn_closed()
237 | return
238 | else:
239 | raise
240 | if sent < len(data):
241 | self._write_buffer = [data[sent:]]
242 | else:
243 | self._write_buffer = []
244 | if self.pause_cb and len(self._write_buffer) < self.write_bufsize:
245 | self.pause_cb(False)
246 | if self._closing:
247 | self.close()
248 | if event:
249 | if self.tcp_connected \
250 | and (len(self._write_buffer) > 0 or self._closing):
251 | return self._wevent
252 |
253 | def conn_closed(self):
254 | """
255 | The connection has been closed by the other side. Do local cleanup
256 | and then call close_cb.
257 | """
258 | self.tcp_connected = False
259 | if self._close_cb_called:
260 | return
261 | elif self.close_cb:
262 | self._close_cb_called = True
263 | self.close_cb()
264 | else:
265 | # uncomfortable race condition here, so we try again.
266 | # not great, but ok for now.
267 | schedule(1, self.conn_closed)
268 | handle_close = conn_closed # for asyncore
269 |
270 | def write(self, data):
271 | "Write data to the connection."
272 | # assert not self._paused
273 | self._write_buffer.append(data)
274 | if self.pause_cb and len(self._write_buffer) > self.write_bufsize:
275 | self.pause_cb(True)
276 | if event:
277 | if not self._wevent.pending():
278 | self._wevent.add()
279 |
280 | def pause(self, paused):
281 | """
282 | Temporarily stop/start reading from the connection and pushing
283 | it to the app.
284 | """
285 | if event:
286 | if paused:
287 | if self._revent.pending():
288 | self._revent.delete()
289 | else:
290 | if not self._revent.pending():
291 | self._revent.add()
292 | self._paused = paused
293 |
294 | def close(self):
295 | "Flush buffered data (if any) and close the connection."
296 | self.pause(True)
297 | if len(self._write_buffer) > 0:
298 | self._closing = True
299 | else:
300 | self.tcp_connected = False
301 | if event:
302 | if self._revent.pending():
303 | self._revent.delete()
304 | if self._wevent.pending():
305 | self._wevent.delete()
306 | self.socket.close()
307 | else:
308 | asyncore.dispatcher.close(self)
309 |
310 | def readable(self):
311 | "asyncore-specific readable method"
312 | return self.read_cb and self.tcp_connected and not self._paused
313 |
314 | def writable(self):
315 | "asyncore-specific writable method"
316 | return self.tcp_connected and \
317 | (len(self._write_buffer) > 0 or self._closing)
318 |
319 | def handle_error(self):
320 | """
321 | asyncore-specific misc error method.
322 | """
323 | raise
324 |
325 |
326 | def create_server(host, port, conn_handler):
327 | """Listen to host:port and send connections to conn_handler."""
328 | sock = server_listen(host, port)
329 | attach_server(host, port, sock, conn_handler)
330 |
331 | def server_listen(host, port):
332 | "Return a socket listening to host:port."
333 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
334 | sock.setblocking(0)
335 | sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
336 | sock.bind((host, port))
337 | sock.listen(socket.SOMAXCONN)
338 | return sock
339 |
340 | class attach_server(asyncore.dispatcher):
341 | "Attach a server to a listening socket."
342 | def __init__(self, host, port, sock, conn_handler):
343 | self.host = host
344 | self.port = port
345 | self.conn_handler = conn_handler
346 | if event:
347 | event.event(self.handle_accept, handle=sock,
348 | evtype=event.EV_READ|event.EV_PERSIST).add()
349 | else: # asyncore
350 | asyncore.dispatcher.__init__(self, sock=sock)
351 | self.accepting = True
352 |
353 | def handle_accept(self, *args):
354 | try:
355 | if event:
356 | conn, addr = args[1].accept()
357 | else: # asyncore
358 | conn, addr = self.accept()
359 | except TypeError:
360 | # sometimes accept() returns None if we have
361 | # multiple processes listening
362 | return
363 | tcp_conn = _TcpConnection(conn, self.host, self.port)
364 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \
365 | self.conn_handler(tcp_conn)
366 |
367 | def handle_error(self):
368 | stop() # FIXME: handle unscheduled errors more gracefully
369 | raise
370 |
371 | class create_client(asyncore.dispatcher):
372 | "An asynchronous TCP client."
373 | def __init__(self, host, port, conn_handler,
374 | connect_error_handler, connect_timeout=None):
375 | self.host = host
376 | self.port = port
377 | self.conn_handler = conn_handler
378 | self.connect_error_handler = connect_error_handler
379 | self._timeout_ev = None
380 | self._error_sent = False
381 | # TODO: socket.getaddrinfo(); needs to be non-blocking.
382 | if event:
383 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
384 | sock.setblocking(0)
385 | event.write(sock, self.handle_connect, sock).add()
386 | try:
387 | # FIXME: check for DNS errors, etc.
388 | err = sock.connect_ex((host, port))
389 | except socket.error, why:
390 | self.handle_conn_error()
391 | return
392 | except socket.gaierror, why:
393 | self.handle_conn_error()
394 | return
395 | if err != errno.EINPROGRESS: # FIXME: others?
396 | self.handle_conn_error((err, os.strerror(err)))
397 | return
398 | else: # asyncore
399 | asyncore.dispatcher.__init__(self)
400 | self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
401 | try:
402 | self.connect((host, port))
403 | # exceptions should be caught by handle_error
404 | except socket.error, why:
405 | self.handle_conn_error()
406 | return
407 | except socket.gaierror, why:
408 | self.handle_conn_error()
409 | return
410 | if connect_timeout:
411 | self._timeout_ev = schedule(connect_timeout,
412 | self.connect_error_handler,
413 | (errno.ETIMEDOUT, os.strerror(errno.ETIMEDOUT))
414 | )
415 |
416 | def handle_connect(self, sock=None):
417 | if self._timeout_ev:
418 | self._timeout_ev.delete()
419 | if self._error_sent:
420 | return
421 | if sock is None: # asyncore
422 | sock = self.socket
423 | tcp_conn = _TcpConnection(sock, self.host, self.port)
424 | tcp_conn.read_cb, tcp_conn.close_cb, tcp_conn.pause_cb = \
425 | self.conn_handler(tcp_conn)
426 |
427 | def handle_read(self): # asyncore
428 | pass
429 |
430 | def handle_write(self): # asyncore
431 | pass
432 |
433 | def handle_conn_error(self, ex_value=None):
434 | if ex_value is None:
435 | ex_type, ex_value = sys.exc_info()[:2]
436 | else:
437 | ex_type = socket.error
438 | if ex_type in [socket.error, socket.gaierror]:
439 | if ex_value[0] == errno.ECONNREFUSED:
440 | return # OS will retry
441 | if self._timeout_ev:
442 | self._timeout_ev.delete()
443 | if self._error_sent:
444 | return
445 | elif self.connect_error_handler:
446 | self._error_sent = True
447 | self.connect_error_handler(ex_value)
448 | else:
449 | if self._timeout_ev:
450 | self._timeout_ev.delete()
451 | raise
452 |
453 | def handle_error(self):
454 | stop() # FIXME: handle unscheduled errors more gracefully
455 | raise
456 |
457 |
458 | # adapted from Medusa
459 | class _AsyncoreLoop:
460 | "Asyncore main loop + event scheduling."
461 | def __init__(self):
462 | self.events = []
463 | self.num_channels = 0
464 | self.max_channels = 0
465 | self.timeout = 1
466 | self.granularity = 1
467 | self.socket_map = asyncore.socket_map
468 | self._now = None
469 | self._running = False
470 |
471 | def run(self):
472 | "Start the loop."
473 | last_event_check = 0
474 | self._running = True
475 | while (self.socket_map or self.events) and self._running:
476 | self._now = time.time()
477 | if (self._now - last_event_check) >= self.granularity:
478 | last_event_check = self._now
479 | for event in self.events:
480 | when, what = event
481 | if self._now >= when:
482 | try:
483 | self.events.remove(event)
484 | except ValueError:
485 | # a previous event may have removed this one.
486 | continue
487 | what()
488 | else:
489 | break
490 | # sample the number of channels
491 | n = len(self.socket_map)
492 | self.num_channels = n
493 | if n > self.max_channels:
494 | self.max_channels = n
495 | asyncore.poll(self.timeout) # TODO: use poll2 when available
496 |
497 | def stop(self):
498 | "Stop the loop."
499 | self.socket_map.clear()
500 | self.events = []
501 | self._now = None
502 | self._running = False
503 |
504 | def time(self):
505 | "Return the current time (to avoid a system call)."
506 | return self._now or time.time()
507 |
508 | def schedule(self, delta, callback, *args):
509 | "Schedule callable callback to be run in delta seconds with *args."
510 | def cb():
511 | if callback:
512 | callback(*args)
513 | new_event = (self.time() + delta, cb)
514 | events = self.events
515 | bisect.insort(events, new_event)
516 | class event_holder:
517 | def __init__(self):
518 | self._deleted = False
519 | def delete(self):
520 | if not self._deleted:
521 | try:
522 | events.remove(new_event)
523 | self._deleted = True
524 | except ValueError: # already gone
525 | pass
526 | return event_holder()
527 |
528 | _event_running = False
529 | def _event_run(*args):
530 | _event_running = True
531 | event.dispatch(*args)
532 |
533 | def _event_stop(*args):
534 | _event_running = False
535 | event.abort(*args)
536 |
537 | if event:
538 | schedule = event.timeout
539 | run = _event_run
540 | stop = _event_stop
541 | now = time.time
542 | running = _event_running
543 | else:
544 | _loop = _AsyncoreLoop()
545 | schedule = _loop.schedule
546 | run = _loop.run
547 | stop = _loop.stop
548 | now = _loop.time
549 | running = _loop._running
550 |
--------------------------------------------------------------------------------