├── MANIFEST.in ├── requirements.txt ├── Dockerfile ├── setup.py ├── README.md └── pyproxy.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tornado>=2.1.1 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2-alpine 2 | 3 | ADD . /opt/pyproxy 4 | WORKDIR /opt/pyproxy 5 | RUN pip install . 6 | 7 | ENTRYPOINT ["pyproxy"] 8 | 9 | EXPOSE 80 8888 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: 4 | # Author: Binux 5 | # http://binux.me 6 | # Created on 2015-01-28 21:31:13 7 | 8 | from setuptools import setup 9 | from codecs import open 10 | from os import path 11 | 12 | here = path.abspath(path.dirname(__file__)) 13 | with open(path.join(here, 'README.md'), encoding='utf-8') as f: 14 | long_description = f.read() 15 | 16 | 17 | setup( 18 | name='pyproxy', 19 | version='0.1.6', 20 | 21 | description='HTTP(s) proxy server base on tornado, in one file!', 22 | long_description=long_description, 23 | 24 | url='https://github.com/binux/pyproxy', 25 | 26 | author='Roy Binux', 27 | author_email='roy@binux.me', 28 | 29 | license='Apache License, Version 2.0', 30 | 31 | classifiers=[ 32 | 'Development Status :: 4 - Beta', 33 | 'Programming Language :: Python :: 2', 34 | 'Programming Language :: Python :: 2.6', 35 | 'Programming Language :: Python :: 2.7', 36 | 'Programming Language :: Python :: 3', 37 | 'Programming Language :: Python :: 3.3', 38 | 'Programming Language :: Python :: 3.4', 39 | 40 | 'License :: OSI Approved :: Apache Software License', 41 | 42 | 'Intended Audience :: Developers', 43 | 'Operating System :: OS Independent', 44 | 45 | 'Topic :: Internet :: WWW/HTTP', 46 | ], 47 | 48 | keywords='proxy https http', 49 | 50 | py_modules=['pyproxy'], 51 | 52 | install_requires=[ 53 | 'tornado>=2.1.1', 54 | ], 55 | 56 | entry_points={ 57 | 'console_scripts': [ 58 | 'pyproxy=pyproxy:main' 59 | ] 60 | }, 61 | ) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | ``` 5 | Usage: pyproxy [OPTIONS] 6 | 7 | Options: 8 | 9 | --bind addrs that debugger bind to (default 10 | 127.0.0.1) 11 | --config config file 12 | --debug debug mode (default False) 13 | --help show this help information 14 | --username proxy username 15 | --password proxy password 16 | --port the port that debugger listen to (default 17 | 8888) 18 | ``` 19 | 20 | API 21 | === 22 | 23 | 1 Use as http/https proxy 24 | 25 | `curl -x http://localhost:8888/ http://httpbin.org/get` 26 | 27 | 2 with GET/POST parameters 28 | 29 | `curl http://localhost:8888/anypath?method=POST&url=http://httpbin.org/post` 30 | 31 | 3 pass params with JSON (work with GET parameters as well) 32 | 33 | `curl -d '{"url": "http://httpbin.org/get","method": "GET", "headers": {"User-Agent":"Baidu"}}' http://localhost:8888/?callback=callback` 34 | 35 | Auth 36 | ==== 37 | 38 | 1 http proxy auth 39 | 40 | `curl -x http://username:password@localhost:8888/ http://httpbin.org/get` 41 | 42 | 2 http basic auth 43 | 44 | `curl http://username:password@localhost:8888/anypath?method=POST&url=http://httpbin.org/post` 45 | 46 | 3 username & password in GET/POST parameters / JSON 47 | 48 | `curl http://localhost:8888/anypath?method=POST&url=http://httpbin.org/post&username=usernmae&password=password` 49 | 50 | 4 host_sign / path_sign / url_sign 51 | 52 | sign a host / path / url with current username/password: 53 | 54 | visit http://username:password@localhost:8888/sign?url=http://httpbin.org/get to get sign 55 | 56 | request with: `http://localhost:8888/?url=http://httpbin.org/get&path_sign=abc123` 57 | -------------------------------------------------------------------------------- /pyproxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # vim: set et sw=4 ts=4 sts=4 ff=unix fenc=utf8: 4 | # Author: Binux<17175297.hk@gmail.com> 5 | # http://binux.me 6 | # Created on 2012-12-15 16:11:13 7 | 8 | import logging 9 | import tornado.web 10 | from base64 import b64decode, b64encode 11 | from tornado.options import define, options 12 | 13 | define("bind", default="127.0.0.1", help="addrs that debugger bind to") 14 | define("port", default=8888, help="the port that debugger listen to") 15 | define("username", default="", help="proxy username") 16 | define("password", default="", help="proxy password") 17 | define("debug", default=False, help="debug mode") 18 | define("config", default="", help="config file") 19 | define('forward', default="", help="pass request to another proxy with format " 20 | "[https?://][username:password@]host:port (or a file wilth multiple proxies)") 21 | 22 | import os 23 | import re 24 | import json 25 | import urllib 26 | import random 27 | import socket 28 | import hashlib 29 | try: 30 | from urlparse import urlparse, urlsplit 31 | except ImportError: 32 | from urllib.parse import urlparse, urlsplit 33 | from tornado import gen 34 | from tornado.web import HTTPError 35 | from tornado.ioloop import IOLoop 36 | import tornado.iostream 37 | import tornado.httputil 38 | import tornado.tcpclient 39 | import tornado.httpclient 40 | try: 41 | import pycurl 42 | tornado.httpclient.AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") 43 | except ImportError: 44 | pycurl = None 45 | 46 | 47 | def utf8(string): 48 | if isinstance(string, unicode): 49 | return string.encode('utf8') 50 | return string 51 | 52 | 53 | def link(a, b): 54 | """ 55 | link stream a to stream b 56 | """ 57 | a.read_until_close(lambda x: not b.closed() and b.write(b'', callback=b.close), 58 | streaming_callback=lambda x: not b.closed() and b.write(x)) 59 | b.set_close_callback(lambda: not a.closed() and a.write(b'', callback=a.close)) 60 | 61 | 62 | class ProxyHandler(tornado.web.RequestHandler): 63 | SUPPORTED_METHODS = ['GET', 'POST', 'HEAD', 'CONNECT', 'PUT', 'OPTIONS'] 64 | set_cookie_re = re.compile(";?\s*(domain|path)\s*=\s*[^,;]+", re.I) 65 | 66 | def options(self): 67 | cors = self.get_argument('cors', None) 68 | if not cors: 69 | return self.get() 70 | 71 | self.set_header('Access-Control-Allow-Credentials', 'true') 72 | self.set_header('Access-Control-Max-Age', 86400) 73 | if 'Access-Control-Request-Headers' in self.request.headers: 74 | self.set_header('Access-Control-Allow-Headers', 75 | self.request.headers.get('Access-Control-Request-Headers')) 76 | if 'Access-Control-Request-Method' in self.request.headers: 77 | self.set_header('Access-Control-Allow-Methods', 78 | self.request.headers.get('Access-Control-Request-Method')) 79 | self.set_status(204) 80 | self.finish() 81 | 82 | def get(self): 83 | method = self.request.method 84 | url = self.request.uri 85 | headers = tornado.httputil.HTTPHeaders(self.request.headers) 86 | body = self.request.body 87 | 88 | if self.request.uri.startswith('http'): 89 | return self.proxy(method, url, headers, body, http_proxy=True) 90 | 91 | method = self.get_argument('method', method) 92 | url = self.get_argument('url', self.get_argument('rl', url)) 93 | callback = self.get_argument('callback', None) 94 | 95 | try: 96 | request = json.loads(self.get_argument('request', self.request.body)) 97 | except: 98 | request = {} 99 | url = request.get('url', url) 100 | method = request.get('method', method) 101 | if 'headers' in request: 102 | for key, value in request['headers'].items(): 103 | headers[key] = value 104 | body = request.get('body', body) 105 | callback = request.get('callback', callback) 106 | 107 | for key, values in self.request.arguments.items(): 108 | key = key.lower() 109 | if key.startswith('h-'): 110 | headers[key[2:]] = values[0].strip() 111 | elif key.startswith('d-'): 112 | if key[2:] in headers: 113 | del headers[key[2:]] 114 | 115 | if 'del_headers' in request: 116 | for key in request['del_headers']: 117 | if key in headers: 118 | del headers[key] 119 | 120 | for keyword in ('Host', 'Content-Type', 'Content-Length'): 121 | if keyword in headers: 122 | del headers[keyword] 123 | if body.startswith('base64,'): 124 | try: 125 | body = b64decode(body[7:]) 126 | except: 127 | pass 128 | 129 | if self.request.path == '/sign' and self.auth(url): 130 | return self.finish(self.sign(url)) 131 | 132 | if not url.startswith('http'): 133 | return self.finish('hello world!') 134 | 135 | self.request.method = method 136 | self.request.uri = url 137 | 138 | return self.proxy(method, url, headers, body, _callback=callback) 139 | 140 | put = get 141 | post = get 142 | head = get 143 | 144 | def sign(self, url): 145 | parsed = urlparse(url) 146 | return { 147 | 'host_sign': hashlib.md5( 148 | ('%s:%s:%s' % (options.username, options.password, parsed.netloc)).encode('utf8') 149 | ).hexdigest()[5:11], 150 | 'path_sign': hashlib.md5( 151 | ('%s:%s:%s:%s' % (options.username, options.password, parsed.netloc, parsed.path)).encode('utf8') 152 | ).hexdigest()[5:11], 153 | 'url_sign': hashlib.md5( 154 | ('%s:%s:%s' % (options.username, options.password, url)).encode('utf8') 155 | ).hexdigest()[5:11], 156 | } 157 | 158 | @gen.coroutine 159 | def proxy(self, method, url, headers, body, **kwargs): 160 | if not self.auth(url): 161 | if kwargs.get('http_proxy'): 162 | self.set_header('Proxy-Authenticate', 'Basic realm="hello"') 163 | self.set_status(407) 164 | self.finish() 165 | raise gen.Return() 166 | else: 167 | raise HTTPError(403) 168 | 169 | req = tornado.httpclient.HTTPRequest( 170 | method = method, 171 | url = url, 172 | headers = headers, 173 | body = body or None, 174 | decompress_response = False, 175 | follow_redirects = False, 176 | allow_nonstandard_methods = True) 177 | 178 | if self.application.forward_proxies: 179 | self.via_proxy = proxy = random.choice(self.application.forward_proxies) 180 | try: 181 | remote = yield gen.with_timeout(IOLoop.current().time()+10, tornado.tcpclient.TCPClient().connect( 182 | proxy.hostname, int(proxy.port), ssl_options={} if proxy.scheme == 'https' else None)) 183 | except gen.TimeoutError: 184 | raise HTTPError(504) 185 | 186 | parsed = urlparse(req.url) 187 | userpass = None 188 | netloc = parsed.netloc 189 | if '@' in parsed.netloc: 190 | userpass, _, netloc = netloc.rpartition("@") 191 | headers = tornado.httputil.HTTPHeaders(headers) 192 | 193 | if parsed.scheme == 'https': 194 | remote.write(utf8('CONNECT %s:%s HTTP/1.1\r\n' % (parsed.hostname, parsed.port or 443))) 195 | remote.write(utf8('Host: %s\r\n' % netloc)) 196 | if proxy.username: 197 | remote.write(utf8('Proxy-Authorization: Basic %s\r\n' % 198 | b64encode('%s:%s' % (proxy.username, proxy.password)))) 199 | remote.write('\r\n') 200 | yield remote.read_until('\r\n\r\n') 201 | 202 | channel_a, channel_b = socket.socketpair() 203 | if not hasattr(channel_a, '_sock'): 204 | channel_a, channel_b = socket._socketobject(_sock=channel_a), socket._socketobject(_sock=channel_b) 205 | channel_a, channel_b = tornado.iostream.IOStream(channel_a), tornado.iostream.IOStream(channel_b) 206 | 207 | link(channel_a, remote) 208 | link(remote, channel_a) 209 | 210 | remote = yield channel_b.start_tls(False, {}, netloc) 211 | 212 | request_path = parsed.path 213 | if parsed.query: 214 | request_path += '?%s' % parsed.query 215 | remote.write(utf8('%s %s HTTP/1.1\r\n' % (req.method.upper(), urllib.quote(request_path)))) 216 | else: 217 | remote.write(utf8('%s %s HTTP/1.1\r\n' % (req.method.upper(), req.url))) 218 | if proxy.username: 219 | headers['Proxy-Authorization'] = 'Basic %s' % b64encode('%s:%s' % (proxy.username, proxy.password)) 220 | 221 | if 'Host' not in headers: 222 | headers['Host'] = netloc 223 | # force disable connection 224 | if not kwargs.get('http_proxy'): 225 | headers['Connection'] = b'close' 226 | if userpass: 227 | headers['Authorization'] = utf8('basic %s' % b64encode(userpass)) 228 | if req.body: 229 | headers['Content-Length'] = str(len(utf8(req.body))) 230 | for key, value in headers.get_all(): 231 | remote.write(utf8('%s: %s\r\n' % (key, value))) 232 | remote.write('\r\n') 233 | if req.body: 234 | remote.write(utf8(body)) 235 | yield remote.write(b'') 236 | 237 | self._auto_finish = False 238 | client = self.request.connection.detach() 239 | # not forward any further message to remote, as current request had finished 240 | if kwargs.get('http_proxy'): 241 | link(client, remote) 242 | link(remote, client) 243 | 244 | self._log() 245 | return 246 | 247 | if kwargs.get('http_proxy'): 248 | # streaming in http proxy mode 249 | self._auto_finish = False 250 | 251 | stream = self.request.connection.detach() 252 | req.header_callback = lambda line, stream=stream: not stream.closed() and stream.write(line) if not line.startswith('Transfer-Encoding') else None 253 | req.streaming_callback = lambda chunk, stream=stream: not stream.closed() and stream.write(chunk) 254 | 255 | client = tornado.httpclient.AsyncHTTPClient() 256 | try: 257 | result = yield client.fetch(req) 258 | except tornado.httpclient.HTTPError as e: 259 | pass 260 | finally: 261 | stream.close() 262 | self._log() 263 | return 264 | 265 | client = tornado.httpclient.AsyncHTTPClient() 266 | try: 267 | result = yield client.fetch(req) 268 | except tornado.httpclient.HTTPError as e: 269 | if e.response: 270 | result = e.response 271 | else: 272 | self.set_status(502) 273 | self.write('Bad Gateway error:\n' + str(e)) 274 | self.finish() 275 | raise gen.Return() 276 | 277 | self.set_status(result.code, result.reason) 278 | if result.headers.get('Transfer-Encoding') == 'chunked': 279 | del result.headers['Transfer-Encoding'] 280 | if 'set-cookie' in result.headers: 281 | set_cookie = result.headers.get_list('set-cookie') 282 | del result.headers['set-cookie'] 283 | for each in set_cookie: 284 | result.headers.add('set-cookie', self.set_cookie_re.sub('', each)) 285 | 286 | if kwargs.get('_callback'): 287 | self.set_header('Content-Type', 'application/javascript') 288 | self.finish('%s(%s)' % (kwargs['_callback'], json.dumps(result.body))) 289 | else: 290 | cors = self.get_argument('cors', None) 291 | if cors: 292 | result.headers["Access-Control-Allow-Origin"] = "*" 293 | self._headers = result.headers 294 | if result.code == 304: 295 | self.finish() 296 | else: 297 | self.finish(result.body) 298 | 299 | def auth(self, url): 300 | if not options.username: 301 | return True 302 | 303 | username, password = None, None 304 | 305 | if not (username and password) and 'Proxy-Authorization' in self.request.headers: 306 | try: 307 | method, b64 = self.request.headers['Proxy-Authorization'].strip().split(' ', 1) 308 | username, password = b64decode(b64.split(',', 1)[0]).decode('utf8').split(':', 1) 309 | except: 310 | raise 311 | pass 312 | del self.request.headers['Proxy-Authorization'] 313 | 314 | if not (username and password) and 'Authorization' in self.request.headers: 315 | try: 316 | method, b64 = self.request.headers['Authorization'].split(' ', 1) 317 | username, password = b64decode(b64).decode('utf8').split(':', 1) 318 | except: 319 | pass 320 | 321 | if not (username and password): 322 | username = self.get_argument('username', username) 323 | password = self.get_argument('password', password) 324 | 325 | request = {} 326 | if not (username and password): 327 | try: 328 | request = json.loads(self.get_argument('request')) 329 | except: 330 | pass 331 | username = request.get('username', username) 332 | password = request.get('username', password) 333 | 334 | if options.username == username and options.password == password: 335 | return True 336 | 337 | # auth by sign 338 | sign = self.sign(url) 339 | for key, value in sign.items(): 340 | if request.get(key, self.get_argument(key, None)) == value: 341 | return True 342 | 343 | return False 344 | 345 | @gen.coroutine 346 | def connect(self): 347 | url = self.request.uri 348 | if not self.auth(url): 349 | self.set_header('Proxy-Authenticate', 'Basic realm="hello"') 350 | self.set_status(407) 351 | self.finish() 352 | raise gen.Return() 353 | 354 | self._auto_finish = False 355 | client = self.request.connection.detach() 356 | 357 | if self.application.forward_proxies: 358 | self.via_proxy = proxy = random.choice(self.application.forward_proxies) 359 | try: 360 | remote = yield gen.with_timeout(IOLoop.current().time()+10, tornado.tcpclient.TCPClient().connect( 361 | proxy.hostname, int(proxy.port), ssl_options={} if proxy.scheme == 'https' else None)) 362 | remote.write(utf8('CONNECT %s HTTP/1.1\r\n' % self.request.uri)) 363 | remote.write(utf8('Host: %s\r\n' % self.request.uri)) 364 | if proxy.username: 365 | remote.write(utf8('Proxy-Authorization: Basic %s\r\n' % 366 | b64encode('%s:%s' % (proxy.username, proxy.password)))) 367 | remote.write('\r\n') 368 | except gen.TimeoutError: 369 | raise HTTPError(504) 370 | else: 371 | try: 372 | host, port = self.request.uri.split(':') 373 | remote = yield gen.with_timeout(IOLoop.current().time()+10, 374 | tornado.tcpclient.TCPClient().connect(host, int(port))) 375 | except gen.TimeoutError: 376 | raise HTTPError(504) 377 | yield client.write(b'HTTP/1.0 200 Connection established\r\n\r\n') 378 | 379 | link(client, remote) 380 | link(remote, client) 381 | 382 | yield [ 383 | gen.Task(client.set_close_callback), 384 | gen.Task(remote.set_close_callback), 385 | ] 386 | self._log() 387 | 388 | def _request_summary(self): 389 | if getattr(self, 'via_proxy', None): 390 | return "%s %s via %s (%s)" % (self.request.method, self.request.uri, 391 | self.via_proxy.hostname, self.request.remote_ip) 392 | else: 393 | return "%s %s (%s)" % (self.request.method, self.request.uri, 394 | self.request.remote_ip) 395 | 396 | 397 | class Application(tornado.web.Application): 398 | def __init__(self): 399 | forward_proxies = [] 400 | if options.forward: 401 | if os.path.exists(options.forward): 402 | with open(options.forward) as fp: 403 | for line in fp: 404 | if not line.startswith('http'): 405 | line = 'http://' + line 406 | url = urlsplit(line) 407 | if not url.hostname: 408 | continue 409 | forward_proxies.append(url) 410 | elif urlsplit(options.forward): 411 | if not options.forward.startswith('http'): 412 | options.forward = 'http://' + options.forward 413 | forward_proxies.append(urlsplit(options.forward)) 414 | else: 415 | raise Exception('unknown proxy %s' % options.forward) 416 | self.forward_proxies = forward_proxies 417 | 418 | settings = dict( 419 | debug=options.debug, 420 | ) 421 | super(Application, self).__init__([(".*", ProxyHandler), ], **settings) 422 | 423 | 424 | def main(**kwargs): 425 | import tornado.options 426 | from tornado.ioloop import IOLoop 427 | from tornado.httpserver import HTTPServer 428 | 429 | tornado.options.parse_command_line() 430 | if options.config: 431 | tornado.options.parse_config_file(options.config) 432 | tornado.options.parse_command_line() 433 | 434 | for key in kwargs: 435 | setattr(options, key, kwargs[key]) 436 | 437 | http_server = HTTPServer(Application(), xheaders=True) 438 | http_server.bind(options.port, options.bind) 439 | http_server.start() 440 | 441 | logging.info("http server started on %s:%s" % (options.bind, options.port)) 442 | IOLoop.instance().start() 443 | 444 | if __name__ == "__main__": 445 | main() 446 | --------------------------------------------------------------------------------