├── README.md └── server.py /README.md: -------------------------------------------------------------------------------- 1 | A simple proxy server with configurable upstream proxy support. 2 | 3 | Usages 4 | ------ 5 | 6 | ./server.py 7 | ./server.py -u socks://localhost:1080 8 | ./server.py -u rules:rules.lst 9 | 10 | The rules file looks like the following: 11 | 12 | .*twitter\.com socks://localhost:2091 13 | .*twimg\.com socks://localhost:2091 14 | 15 | .*tsinghua\.edu\.cn direct: 16 | 17 | .* socks://localhost:2090 18 | 19 | To Do 20 | ----- 21 | 22 | + HttpProxyConnector 23 | + Keep-alive connections (both proxy connection and outgoing connection) 24 | + Authentication 25 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | # 4 | # Author: Jianfei Wang 5 | # Author: Fengyuan Chen 6 | # License: MIT 7 | 8 | """ Proxy Server based on tornado. """ 9 | 10 | import base64 11 | import tornado.options 12 | import os 13 | import re 14 | import struct 15 | import socket 16 | import logging 17 | import tornado.ioloop 18 | import tornado.tcpserver 19 | import tornado.iostream 20 | import argparse 21 | from urllib.parse import urlparse, urlunparse 22 | from collections import OrderedDict 23 | 24 | logging.getLogger().setLevel(logging.INFO) 25 | 26 | 27 | def header_parser(headers): 28 | for header in headers.split(b'\r\n'): 29 | i = header.find(b':') 30 | if i >= 0: 31 | yield header[:i], header[i + 2:] 32 | 33 | 34 | def hostport_parser(hostport, default_port): 35 | i = hostport.find(b':' if isinstance(hostport, bytes) else ':') 36 | if i >= 0: 37 | return hostport[:i], int(hostport[i + 1:]) 38 | else: 39 | return hostport, default_port 40 | 41 | 42 | def netloc_parser(netloc, default_port=-1): 43 | assert default_port 44 | i = netloc.rfind(b'@' if isinstance(netloc, bytes) else '@') 45 | if i >= 0: 46 | return netloc[:i], netloc[i + 1:] 47 | else: 48 | return None, netloc 49 | 50 | 51 | def write_to(stream): 52 | def on_data(data): 53 | if data == b'': 54 | stream.close() 55 | else: 56 | if not stream.closed(): 57 | stream.write(data) 58 | 59 | return on_data 60 | 61 | 62 | def pipe(stream_a, stream_b): 63 | writer_a = write_to(stream_a) 64 | writer_b = write_to(stream_b) 65 | stream_a.read_until_close(writer_b, writer_b) 66 | stream_b.read_until_close(writer_a, writer_a) 67 | 68 | 69 | def subclasses(cls, _seen=None): 70 | if _seen is None: 71 | _seen = set() 72 | subs = cls.__subclasses__() 73 | for sub in subs: 74 | if sub not in _seen: 75 | _seen.add(sub) 76 | yield sub 77 | for sub_ in subclasses(sub, _seen): 78 | yield sub_ 79 | 80 | 81 | class Connector: 82 | 83 | def __init__(self, netloc=None, path=None): 84 | self.netloc = netloc 85 | self.path = path 86 | 87 | @classmethod 88 | def accept(cls, scheme): 89 | raise NotImplementedError() 90 | 91 | def connect(self, host, port, callback): 92 | raise NotImplementedError() 93 | 94 | @classmethod 95 | def get(cls, url): 96 | parts = urlparse(url) 97 | for sub_cls in subclasses(cls): 98 | if sub_cls.accept(parts.scheme): 99 | return sub_cls(parts.netloc, parts.path) 100 | raise NotImplementedError('Unsupported scheme', parts.scheme) 101 | 102 | def __str__(self): 103 | return '%s(netloc=%s, path=%s)' % (self.__class__.__name__, repr(self.netloc), repr(self.path)) 104 | 105 | 106 | class RejectConnector(Connector): 107 | 108 | @classmethod 109 | def accept(cls, scheme): 110 | return scheme == 'reject' 111 | 112 | def connect(self, host, port, callback): 113 | callback(RejectConnector) 114 | 115 | @classmethod 116 | def write(cls, _): 117 | pass 118 | 119 | @classmethod 120 | def read_until_close(cls, req_callback, _): 121 | req_callback(b'HTTP/1.1 410 Gone\r\n\r\n') 122 | req_callback(b'') 123 | 124 | 125 | class DirectConnector(Connector): 126 | 127 | @classmethod 128 | def accept(cls, scheme): 129 | return scheme == 'direct' 130 | 131 | def connect(self, host, port, callback): 132 | def on_close(): 133 | callback(None) 134 | 135 | def on_connected(): 136 | stream.set_close_callback(None) 137 | callback(stream) 138 | 139 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 140 | stream = tornado.iostream.IOStream(s) 141 | stream.set_close_callback(on_close) 142 | stream.connect((host, port), on_connected) 143 | 144 | 145 | class SocksConnector(Connector): 146 | 147 | def __init__(self, netloc, path=None): 148 | Connector.__init__(self, netloc, path) 149 | self.socks_server, self.socks_port = hostport_parser(netloc, 1080) 150 | 151 | @classmethod 152 | def accept(cls, scheme): 153 | return scheme == 'socks' 154 | 155 | def connect(self, host, port, callback): 156 | 157 | def socks_close(): 158 | callback(None) 159 | 160 | def socks_response(data): 161 | stream.set_close_callback(None) 162 | if data[1] == 0x5a: 163 | callback(stream) 164 | else: 165 | callback(None) 166 | 167 | def socks_connected(): 168 | try: 169 | stream.write(b'\x04\x01' + struct.pack('>H', port) 170 | + b'\x00\x00\x00\x09userid\x00' + host + b'\x00') 171 | stream.read_bytes(8, socks_response) 172 | except tornado.iostream.StreamClosedError: 173 | socks_close() 174 | 175 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 176 | stream = tornado.iostream.IOStream(s) 177 | stream.set_close_callback(socks_close) 178 | stream.connect((self.socks_server, self.socks_port), socks_connected) 179 | 180 | 181 | class HttpConnector(Connector): 182 | 183 | def __init__(self, netloc, path=None): 184 | Connector.__init__(self, netloc, path) 185 | auth, host = netloc_parser(netloc) 186 | self.auth = base64.encodebytes(auth.encode()).strip() if auth else None 187 | self.http_server, self.http_port = hostport_parser(host, 3128) 188 | 189 | @classmethod 190 | def accept(cls, scheme): 191 | return scheme == 'http' 192 | 193 | def connect(self, host, port, callback): 194 | 195 | def http_close(): 196 | callback(None) 197 | 198 | def http_response(data): 199 | stream.set_close_callback(None) 200 | code = int(data.split()[1]) 201 | if code == 200: 202 | callback(stream) 203 | else: 204 | callback(None) 205 | 206 | def http_connected(): 207 | try: 208 | stream.write(b'CONNECT ' + host + b':' + 209 | str(port).encode() + b' HTTP/1.1\r\n') 210 | if self.auth: 211 | stream.write( 212 | b'Proxy-Authorization: Basic ' + self.auth + b'\r\n') 213 | stream.write(b'Proxy-Connection: closed\r\n') 214 | stream.write(b'\r\n') 215 | stream.read_until(b'\r\n\r\n', http_response) 216 | except tornado.iostream.StreamClosedError: 217 | http_close() 218 | 219 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM, 0) 220 | stream = tornado.iostream.IOStream(s) 221 | stream.set_close_callback(http_close) 222 | stream.connect((self.http_server, self.http_port), http_connected) 223 | 224 | 225 | class RulesConnector(Connector): 226 | 227 | def __init__(self, netloc=None, path=None): 228 | Connector.__init__(self, netloc, path) 229 | self.rules = None 230 | self._connectors = {} 231 | self._modify_time = None 232 | self.check_update() 233 | tornado.ioloop.PeriodicCallback(self.check_update, 1000).start() 234 | 235 | def load_rules(self): 236 | self.rules = [] 237 | with open(self.path) as f: 238 | for l in f: 239 | l = l.strip() 240 | if not l or l.startswith('#'): 241 | continue 242 | try: 243 | rule_pattern, upstream = l.split() 244 | Connector.get(upstream) 245 | rule_pattern = re.compile(rule_pattern, re.I) 246 | except KeyboardInterrupt: 247 | raise 248 | except: 249 | logging.error('Invalid rule: %s', l) 250 | continue 251 | self.rules.append([rule_pattern, upstream]) 252 | self.rules.append(['.*', 'direct://']) 253 | 254 | def check_update(self): 255 | modified = os.stat(self.path).st_mtime 256 | if modified != self._modify_time: 257 | logging.info('loading %s', self.path) 258 | self._modify_time = modified 259 | self.load_rules() 260 | 261 | @classmethod 262 | def accept(cls, scheme): 263 | return scheme == 'rules' 264 | 265 | def connect(self, host, port, callback): 266 | s = host.decode() + ':' + str(port) 267 | for rule, upstream in self.rules: 268 | if re.match(rule, s): 269 | if upstream not in self._connectors: 270 | self._connectors[upstream] = Connector.get(upstream) 271 | self._connectors[upstream].connect(host, port, callback) 272 | break 273 | else: 274 | raise RuntimeError('no available rule for %s' % s) 275 | 276 | 277 | class ProxyHandler(object): 278 | 279 | def __init__(self, stream, connector): 280 | self.connector = connector 281 | 282 | self.incoming = stream 283 | self.incoming.read_until(b'\r\n', self.on_method) 284 | 285 | self.method = None 286 | self.url = None 287 | self.ver = None 288 | self.headers = None 289 | self.outgoing = None 290 | 291 | def on_method(self, method): 292 | self.method, self.url, self.ver = method.strip().split(b' ') 293 | # XXX would fail if the request doesn't have any more headers 294 | self.incoming.read_until(b'\r\n\r\n', self.on_headers) 295 | logging.debug(method.strip().decode()) 296 | 297 | def on_connected(self, outgoing): 298 | if outgoing: 299 | try: 300 | path = urlunparse((b'', b'') + urlparse(self.url)[2:]) 301 | outgoing.write(b' '.join((self.method, path, self.ver)) + b'\r\n') 302 | for k, v in self.headers.items(): 303 | outgoing.write(k + b': ' + v + b'\r\n') 304 | outgoing.write(b'\r\n') 305 | writer_in = write_to(self.incoming) 306 | if b'Content-Length' in self.headers: 307 | self.incoming.read_bytes( 308 | int(self.headers[b'Content-Length']), outgoing.write, outgoing.write) 309 | outgoing.read_until_close(writer_in, writer_in) 310 | except tornado.iostream.StreamClosedError: 311 | self.incoming.close() 312 | outgoing.close() 313 | else: 314 | self.incoming.close() 315 | 316 | def on_connect_connected(self, outgoing): 317 | if outgoing: 318 | try: 319 | self.incoming.write(b'HTTP/1.1 200 Connection Established\r\n\r\n') 320 | except tornado.iostream.StreamClosedError: 321 | self.incoming.close() 322 | outgoing.close() 323 | pipe(self.incoming, outgoing) 324 | else: 325 | self.incoming.close() 326 | 327 | def on_headers(self, headers_buffer): 328 | self.headers = OrderedDict(header_parser(headers_buffer)) 329 | if self.method == b'CONNECT': 330 | host, port = hostport_parser(self.url, 443) 331 | self.outgoing = self.connector.connect( 332 | host, port, self.on_connect_connected) 333 | else: 334 | if b'Proxy-Connection' in self.headers: 335 | del self.headers[b'Proxy-Connection'] 336 | self.headers[b'Connection'] = b'close' 337 | if b'Host' in self.headers: 338 | host, port = hostport_parser(self.headers[b'Host'], 80) 339 | self.outgoing = self.connector.connect( 340 | host, port, self.on_connected) 341 | else: 342 | self.incoming.close() 343 | 344 | 345 | class ProxyServer(tornado.tcpserver.TCPServer): 346 | 347 | def __init__(self, connector=None): 348 | tornado.tcpserver.TCPServer.__init__(self) 349 | self.connector = connector or DirectConnector() 350 | 351 | def handle_stream(self, stream, address): 352 | ProxyHandler(stream, self.connector) 353 | 354 | 355 | def main(): 356 | tornado.options.options.parse_config_file('/dev/null') 357 | 358 | parser = argparse.ArgumentParser( 359 | description='Simple proxy server based on tornado') 360 | parser.add_argument('-u', '--upstream', type=str, 361 | help='upstream proxy like socks://localhost:1080') 362 | parser.add_argument('-b', '--bind', type=str, default=':8000', 363 | help='bind address and port, default is :8000') 364 | args = parser.parse_args() 365 | 366 | if args.upstream: 367 | connector = Connector.get(args.upstream) 368 | else: 369 | connector = DirectConnector() 370 | logging.info('using connector: %s', connector) 371 | host, port = hostport_parser(args.bind, 8000) 372 | server = ProxyServer(connector) 373 | logging.info('listening on %s:%s', host, port) 374 | server.listen(port, host) 375 | 376 | tornado.ioloop.IOLoop.instance().start() 377 | 378 | 379 | if __name__ == '__main__': 380 | main() 381 | --------------------------------------------------------------------------------