├── .gitignore ├── README.md ├── requirements.txt ├── setup.py ├── test.py └── tornado_proxy ├── __init__.py └── proxy.py /.gitignore: -------------------------------------------------------------------------------- 1 | /MANIFEST 2 | /build/ 3 | /dist/ 4 | /doc/_build/ 5 | *.pyc 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Asynchronous HTTP proxy with tunnelling support 2 | 3 | Built using Tornado (tested with version 2.2), supports HTTP GET, POST and 4 | CONNECT methods. 5 | 6 | Can be used as standalone script, or integrated with your Tornado app. 7 | 8 | 9 | ### Setup 10 | 11 | # run self tests 12 | python setup.py test 13 | 14 | # install it 15 | python setup.py install 16 | 17 | ### Command-line usage 18 | 19 | python tornado_proxy/proxy.py 8888 20 | 21 | 22 | ### Module usage 23 | 24 | from tornado_proxy import run_proxy 25 | run_proxy(port, start_ioloop=False) 26 | ... 27 | tornado.ioloop.IOLoop.instance().start() 28 | 29 | 30 | ### Based on 31 | 32 | GET and POST proxying is heavily based on the code by Bill Janssen posted to: 33 | http://groups.google.com/group/python-tornado/msg/7bea08e7a049cf26 34 | 35 | 36 | ### License and copyright 37 | 38 | Copyright (C) 2012 Senko Rasic 39 | 40 | Permission is hereby granted, free of charge, to any person obtaining a copy 41 | of this software and associated documentation files (the "Software"), to deal 42 | in the Software without restriction, including without limitation the rights 43 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 44 | copies of the Software, and to permit persons to whom the Software is 45 | furnished to do so, subject to the following conditions: 46 | 47 | The above copyright notice and this permission notice shall be included in 48 | all copies or substantial portions of the Software. 49 | 50 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 51 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 52 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 53 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 54 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 55 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 56 | THE SOFTWARE. 57 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tornado==5.1.1 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from distutils.core import setup, Command 4 | from unittest import TextTestRunner, TestLoader 5 | import os 6 | import os.path 7 | 8 | 9 | class TestCommand(Command): 10 | user_options = [] 11 | 12 | def initialize_options(self): 13 | pass 14 | 15 | def finalize_options(self): 16 | pass 17 | 18 | def run(self): 19 | retval = os.system('python -m test') 20 | if retval != 0: 21 | raise Exception('tests failed') 22 | 23 | 24 | setup( 25 | name='tornado-proxy', 26 | version='0.1', 27 | description='Simple asynchronous HTTP proxy', 28 | url='http://senko.net/en/', 29 | author='Senko Rasic', 30 | author_email='senko.rasic@dobarkod.hr', 31 | cmdclass={ 32 | 'test': TestCommand 33 | }, 34 | install_requires=['tornado'], 35 | packages=['tornado_proxy'], 36 | ) 37 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import unittest 4 | import urllib2 5 | import subprocess 6 | import os 7 | import time 8 | 9 | import tornado.ioloop 10 | import tornado.httpclient 11 | 12 | import sys 13 | sys.path.append('../') 14 | from tornado_proxy import run_proxy 15 | 16 | class TestStandaloneProxy(unittest.TestCase): 17 | def setUp(self): 18 | self.proxy = subprocess.Popen(['python', 'tornado_proxy/proxy.py', 19 | '8888']) 20 | proxy_support = urllib2.ProxyHandler({ 21 | "https": "http://localhost:8888", 22 | "http": "http://localhost:8888" 23 | }) 24 | opener = urllib2.build_opener(proxy_support) 25 | urllib2.install_opener(opener) 26 | # make sure the subprocess started listening on the port 27 | time.sleep(1) 28 | 29 | def tearDown(self): 30 | os.kill(self.proxy.pid, 15) 31 | time.sleep(1) 32 | os.kill(self.proxy.pid, 9) 33 | 34 | def test(self): 35 | base_url = '//httpbin.org/' 36 | urllib2.urlopen('https:' + base_url + 'get').read() 37 | urllib2.urlopen('http:' + base_url + 'get').read() 38 | urllib2.urlopen('https:' + base_url + 'post', '').read() 39 | urllib2.urlopen('http:' + base_url + 'post', '').read() 40 | 41 | 42 | class TestTornadoProxy(unittest.TestCase): 43 | def setUp(self): 44 | self.ioloop = tornado.ioloop.IOLoop.instance() 45 | run_proxy(8889, start_ioloop=False) 46 | 47 | def tearDown(self): 48 | pass 49 | 50 | def test(self): 51 | def handle_response(resp): 52 | self.assertIsNone(resp.error) 53 | self.ioloop.stop() 54 | 55 | tornado.httpclient.AsyncHTTPClient.configure( 56 | "tornado.curl_httpclient.CurlAsyncHTTPClient") 57 | client = tornado.httpclient.AsyncHTTPClient() 58 | 59 | req = tornado.httpclient.HTTPRequest('http://httpbin.org/', 60 | proxy_host='127.0.0.1', proxy_port=8889) 61 | client.fetch(req, handle_response) 62 | self.ioloop.start() 63 | 64 | 65 | if __name__ == '__main__': 66 | unittest.main() 67 | -------------------------------------------------------------------------------- /tornado_proxy/__init__.py: -------------------------------------------------------------------------------- 1 | from proxy import run_proxy 2 | -------------------------------------------------------------------------------- /tornado_proxy/proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Simple asynchronous HTTP proxy with tunnelling (CONNECT). 4 | # 5 | # GET/POST proxying based on 6 | # http://groups.google.com/group/python-tornado/msg/7bea08e7a049cf26 7 | # 8 | # Copyright (C) 2012 Senko Rasic 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | # 17 | # The above copyright notice and this permission notice shall be included in 18 | # all copies or substantial portions of the Software. 19 | # 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 | # THE SOFTWARE. 27 | 28 | import asyncio 29 | import logging 30 | import os 31 | import sys 32 | import socket 33 | if sys.version_info[0] == 2: 34 | from urlparse import urlparse 35 | else: 36 | from urllib.parse import urlparse 37 | 38 | import tornado.httpserver 39 | import tornado.ioloop 40 | import tornado.iostream 41 | import tornado.web 42 | import tornado.httpclient 43 | import tornado.httputil 44 | 45 | logger = logging.getLogger('tornado_proxy') 46 | 47 | __all__ = ['ProxyHandler', 'run_proxy'] 48 | 49 | 50 | def get_proxy(url): 51 | url_parsed = urlparse(url, scheme='http') 52 | proxy_key = '%s_proxy' % url_parsed.scheme 53 | return os.environ.get(proxy_key) 54 | 55 | 56 | def parse_proxy(proxy): 57 | proxy_parsed = urlparse(proxy, scheme='http') 58 | return proxy_parsed.hostname, proxy_parsed.port 59 | 60 | 61 | def fetch_request(url, **kwargs): 62 | proxy = get_proxy(url) 63 | if proxy: 64 | logger.debug('Forward request via upstream proxy %s', proxy) 65 | tornado.httpclient.AsyncHTTPClient.configure( 66 | 'tornado.curl_httpclient.CurlAsyncHTTPClient') 67 | host, port = parse_proxy(proxy) 68 | kwargs['proxy_host'] = host 69 | kwargs['proxy_port'] = port 70 | 71 | req = tornado.httpclient.HTTPRequest(url, **kwargs) 72 | client = tornado.httpclient.AsyncHTTPClient() 73 | return client.fetch(req, raise_error=False) 74 | 75 | 76 | class ProxyHandler(tornado.web.RequestHandler): 77 | SUPPORTED_METHODS = ['GET', 'POST', 'CONNECT'] 78 | 79 | def compute_etag(self): 80 | return None # disable tornado Etag 81 | 82 | async def get(self): 83 | logger.debug('Handle %s request to %s', self.request.method, 84 | self.request.uri) 85 | 86 | def handle_response(response): 87 | if (response.error and not 88 | isinstance(response.error, tornado.httpclient.HTTPError)): 89 | self.set_status(500) 90 | self.write('Internal server error:\n' + str(response.error)) 91 | else: 92 | self.set_status(response.code, response.reason) 93 | self._headers = tornado.httputil.HTTPHeaders() # clear tornado default header 94 | 95 | for header, v in response.headers.get_all(): 96 | if header not in ('Content-Length', 'Transfer-Encoding', 'Content-Encoding', 'Connection'): 97 | self.add_header(header, v) # some header appear multiple times, eg 'Set-Cookie' 98 | 99 | if response.body: 100 | self.set_header('Content-Length', len(response.body)) 101 | self.write(response.body) 102 | self.finish() 103 | 104 | body = self.request.body 105 | if not body: 106 | body = None 107 | try: 108 | if 'Proxy-Connection' in self.request.headers: 109 | del self.request.headers['Proxy-Connection'] 110 | resp = await fetch_request( 111 | self.request.uri, 112 | method=self.request.method, body=body, 113 | headers=self.request.headers, follow_redirects=False, 114 | allow_nonstandard_methods=True) 115 | handle_response(resp) 116 | except tornado.httpclient.HTTPError as e: 117 | if hasattr(e, 'response') and e.response: 118 | handle_response(e.response) 119 | else: 120 | self.set_status(500) 121 | self.write('Internal server error:\n' + str(e)) 122 | self.finish() 123 | 124 | async def post(self): 125 | return await self.get() 126 | 127 | async def connect(self): 128 | logger.debug('Start CONNECT to %s', self.request.uri) 129 | host, port = self.request.uri.split(':') 130 | client = self.request.connection.stream 131 | 132 | async def relay(reader, writer): 133 | try: 134 | while True: 135 | data = await reader.read_bytes(1024*64, partial=True) 136 | if writer.closed(): 137 | return 138 | if data: 139 | writer.write(data) 140 | else: 141 | break 142 | except tornado.iostream.StreamClosedError: 143 | pass 144 | 145 | async def start_tunnel(): 146 | logger.debug('CONNECT tunnel established to %s', self.request.uri) 147 | client.write(b'HTTP/1.0 200 Connection established\r\n\r\n') 148 | await asyncio.gather( 149 | relay(client, upstream), 150 | relay(upstream, client) 151 | ) 152 | client.close() 153 | upstream.close() 154 | 155 | async def on_proxy_response(data=None): 156 | if data: 157 | first_line = data.splitlines()[0] 158 | http_v, status, text = first_line.split(None, 2) 159 | if int(status) == 200: 160 | logger.debug('Connected to upstream proxy %s', proxy) 161 | await start_tunnel() 162 | return 163 | 164 | self.set_status(500) 165 | self.finish() 166 | 167 | async def start_proxy_tunnel(): 168 | upstream.write('CONNECT %s HTTP/1.1\r\n' % self.request.uri) 169 | upstream.write('Host: %s\r\n' % self.request.uri) 170 | upstream.write('Proxy-Connection: Keep-Alive\r\n\r\n') 171 | data = await upstream.read_until('\r\n\r\n') 172 | on_proxy_response(data) 173 | 174 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 175 | upstream = tornado.iostream.IOStream(s) 176 | 177 | proxy = get_proxy(self.request.uri) 178 | if proxy: 179 | proxy_host, proxy_port = parse_proxy(proxy) 180 | await upstream.connect((proxy_host, proxy_port)) 181 | await start_proxy_tunnel() 182 | else: 183 | await upstream.connect((host, int(port))) 184 | await start_tunnel() 185 | 186 | 187 | def run_proxy(port, start_ioloop=True): 188 | """ 189 | Run proxy on the specified port. If start_ioloop is True (default), 190 | the tornado IOLoop will be started immediately. 191 | """ 192 | app = tornado.web.Application([ 193 | (r'.*', ProxyHandler), 194 | ]) 195 | app.listen(port) 196 | ioloop = tornado.ioloop.IOLoop.instance() 197 | if start_ioloop: 198 | ioloop.start() 199 | 200 | if __name__ == '__main__': 201 | port = 8888 202 | if len(sys.argv) > 1: 203 | port = int(sys.argv[1]) 204 | 205 | print ("Starting HTTP proxy on port %d" % port) 206 | run_proxy(port) 207 | --------------------------------------------------------------------------------