├── LICENSE ├── README ├── __init__.py ├── server.py ├── smart_relay.py └── socks_relay.py /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, yinghuocho@gmail.com 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | a gevent based HTTP/HTTPS proxy with SOCKS5 forwarding. 2 | 3 | dependencies 4 | ============ 5 | * gevent 6 | * gsocks (for SOCKS5 forwarding) 7 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yinghuocho/ghttproxy/19ad665bb32b67a5bfa6a107fb98cc016b3c20c6/__init__.py -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import urlparse 3 | import time 4 | from httplib import HTTPConnection 5 | 6 | import gevent 7 | from gevent import socket 8 | from gevent.pywsgi import WSGIHandler, WSGIServer 9 | from gevent.pool import Pool 10 | from gevent.event import Event 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | CHUNKSIZE = 65536 15 | 16 | def pipe_socket(client, remote): 17 | def copy(a, b, finish): 18 | while not finish.is_set(): 19 | try: 20 | data = a.recv(CHUNKSIZE) 21 | if not data: 22 | break 23 | b.sendall(data) 24 | except: 25 | break 26 | finish.set() 27 | 28 | finish = Event() 29 | finish.clear() 30 | threads = [ 31 | gevent.spawn(copy, client, remote, finish), 32 | gevent.spawn(copy, remote, client, finish), 33 | ] 34 | [t.join() for t in threads] 35 | client.close() 36 | remote.close() 37 | 38 | class ProxyHandler(WSGIHandler): 39 | """ override WSGIHandler.handle() to process https proxy 40 | """ 41 | def handle(self): 42 | try: 43 | while self.socket is not None: 44 | self.time_start = time.time() 45 | self.time_finish = 0 46 | result = self.handle_one_request() 47 | if result is None: 48 | break 49 | if result is True: 50 | if self.command == "CONNECT": 51 | break 52 | else: 53 | continue 54 | self.status, response_body = result 55 | self.socket.sendall(response_body) 56 | if self.time_finish == 0: 57 | self.time_finish = time.time() 58 | self.log_request() 59 | break 60 | 61 | if self.socket and hasattr(self, 'command') and \ 62 | self.command == "CONNECT" and self.environ.get('__ghttproxy.tunnelconn', None): 63 | pipe_socket(self.socket, self.environ.get('__ghttproxy.tunnelconn')) 64 | finally: 65 | if self.socket is not None: 66 | try: 67 | try: 68 | self.socket._sock.recv(16384) 69 | finally: 70 | self.socket._sock.close() 71 | self.socket.close() 72 | except socket.error: # @UndefinedVariable 73 | pass 74 | self.__dict__.pop('socket', None) 75 | self.__dict__.pop('rfile', None) 76 | 77 | """ override WSGIHandler.get_environ() to pass raw headers and raw path to environ 78 | """ 79 | def get_environ(self): 80 | env = super(ProxyHandler, self).get_environ() 81 | env['__ghttproxy.rawheaders'] = self.headers.headers 82 | env['PATH_INFO'] = self.path.split('?', 1)[0] 83 | return env 84 | 85 | # some of below code are copied and modifed from "meek/wsgi/reflect.py" 86 | # at https://git.torproject.org/pluggable-transports/meek.git 87 | 88 | # Limits a file-like object to reading only n bytes. Used to keep limit 89 | # wsgi.input to the Content-Length, otherwise it blocks. 90 | class LimitedReader(object): 91 | def __init__(self, f, n): 92 | self.f = f 93 | self.n = n 94 | 95 | def __getattr__(self, name): 96 | return getattr(self.f, name) 97 | 98 | def read(self, size=None): 99 | if self.n <= 0: 100 | return "" 101 | if size is not None and size > self.n: 102 | size = self.n 103 | data = self.f.read(size) 104 | self.n -= len(data) 105 | return data 106 | 107 | def set_forwarded_for(environ, headers): 108 | if environ['REMOTE_ADDR'] in ('127.0.0.1', '::1') and \ 109 | 'X-Forwarded-For' not in headers: 110 | # don't add header if we are forwarding localhost, 111 | return 112 | 113 | s = headers.get('X-Forwarded-For', '') 114 | if s: 115 | forwarders = s.split(", ") 116 | else: 117 | forwarders = [] 118 | addr = environ['REMOTE_ADDR'] 119 | if addr: 120 | forwarders.append(addr) 121 | if forwarders: 122 | headers['X-Forwarded-For'] = ", ".join(forwarders) 123 | 124 | def reconstruct_url(environ): 125 | path = environ.get('PATH_INFO') 126 | if path.startswith("http://"): 127 | url = path 128 | else: 129 | host = environ.get('HTTP_HOST') 130 | url = 'http://' + host + path 131 | 132 | query = environ.get('QUERY_STRING', '') 133 | if query: 134 | url += '?' + query 135 | return url 136 | 137 | def get_destination(environ): 138 | if environ["REQUEST_METHOD"] == "CONNECT": 139 | port = 443 140 | else: 141 | port = 80 142 | 143 | host = environ.get('HTTP_HOST', '').lower().split(":") 144 | path = environ.get('PATH_INFO', '').lower() 145 | req = urlparse.urlparse(path) 146 | # first process requeset line 147 | if req.scheme: 148 | if req.scheme != "http": 149 | raise Exception('invalid scheme in request line') 150 | netloc = req.netloc.split(":") 151 | if len(netloc) == 2: 152 | return netloc[0], int(netloc[1]) 153 | else: 154 | return req.netloc, port 155 | elif req.netloc: 156 | raise Exception('invalid scheme in request line') 157 | 158 | # then process host 159 | if len(host) == 2: 160 | return host[0], int(host[1]) 161 | else: 162 | return host[0], port 163 | 164 | NON_FORWARD_HEADERS = ( 165 | 'proxy-connection', 166 | 'host', 167 | ) 168 | 169 | def copy_request(environ): 170 | method = environ["REQUEST_METHOD"] 171 | url = reconstruct_url(environ) 172 | 173 | headers = [] 174 | content_length = environ.get("CONTENT_LENGTH") 175 | if content_length: 176 | body = LimitedReader(environ["wsgi.input"], int(content_length)) 177 | else: 178 | body = "" 179 | 180 | raw = environ['__ghttproxy.rawheaders'] 181 | for header in raw: 182 | key, value = header.split(':', 1) 183 | if not key: 184 | continue 185 | if key.strip().lower() in NON_FORWARD_HEADERS: 186 | continue 187 | headers.append((key.strip(), value.strip())) 188 | headers.append(("Connection", "Keep-Alive")) 189 | headers = dict(headers) 190 | return method, url, body, headers 191 | 192 | class ProxyApplication(object): 193 | def __init__(self, timeout=60): 194 | self.timeout = timeout 195 | 196 | def http(self, environ, start_response): 197 | try: 198 | host, port = get_destination(environ) 199 | log.info("HTTP request to (%s:%d)" % (host, port)) 200 | method, url, body, headers = copy_request(environ) 201 | except Exception, e: 202 | log.error("[Exception][http]: %s" % str(e)) 203 | start_response("400 Bad Request", [("Content-Type", "text/plain; charset=utf-8")]) 204 | yield "Bad Request" 205 | return 206 | 207 | try: 208 | set_forwarded_for(environ, headers) 209 | http_conn = socket.create_connection((host, port), timeout=self.timeout) 210 | conn = HTTPConnection(host, port=port) 211 | conn.sock = http_conn 212 | u = urlparse.urlsplit(url) 213 | path = urlparse.urlunsplit(("", "", u.path, u.query, "")) 214 | # Host header put by conn.request 215 | conn.request(method, path, body, headers) 216 | resp = conn.getresponse() 217 | start_response("%d %s" % (resp.status, resp.reason), resp.getheaders()) 218 | while True: 219 | data = resp.read(CHUNKSIZE) 220 | if not data: 221 | break 222 | yield data 223 | conn.close() 224 | except Exception, e: 225 | log.error("[Exception][http]: %s" % str(e)) 226 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 227 | yield "Internal Server Error" 228 | return 229 | 230 | def tunnel(self, environ, start_response): 231 | try: 232 | host, port = get_destination(environ) 233 | log.info("CONNECT request to (%s:%d)" % (host, port)) 234 | except Exception, e: 235 | log.error("[Exception][tunnel]: %s" % str(e)) 236 | start_response("400 Bad Request", [("Content-Type", "text/plain; charset=utf-8")]) 237 | return ["Bad Request"] 238 | 239 | try: 240 | tunnel_conn = socket.create_connection((host, port), timeout=self.timeout) 241 | environ['__ghttproxy.tunnelconn'] = tunnel_conn 242 | start_response("200 Connection established", []) 243 | return [] 244 | except socket.timeout: # @UndefinedVariable 245 | log.error("Connection Timeout") 246 | start_response("504 Gateway Timeout", [("Content-Type", "text/plain; charset=utf-8")]) 247 | return ["Gateway Timeout"] 248 | except Exception, e: 249 | log.error("[Exception][https]: %s" % str(e)) 250 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 251 | return ["Internal Server Error"] 252 | 253 | def application(self, environ, start_response): 254 | if environ["REQUEST_METHOD"] == "CONNECT": 255 | return self.tunnel(environ, start_response) 256 | else: 257 | return self.http(environ, start_response) 258 | 259 | class HTTPProxyServer(object): 260 | def __init__(self, ip, port, app, log='default'): 261 | self.ip = ip 262 | self.port = port 263 | self.app = app 264 | self.server = WSGIServer((self.ip, self.port), log=log, 265 | application=self.app.application, spawn=Pool(500), handler_class=ProxyHandler) 266 | 267 | def start(self): 268 | self.server.start() 269 | 270 | def run(self): 271 | self.server.serve_forever() 272 | 273 | def stop(self): 274 | self.server.stop() 275 | 276 | @property 277 | def closed(self): 278 | return self.server.closed 279 | 280 | if __name__ == '__main__': 281 | logging.basicConfig( 282 | format='[%(asctime)s][%(name)s][%(levelname)s] - %(message)s', 283 | datefmt='%Y-%d-%m %H:%M:%S', 284 | level=logging.DEBUG, 285 | ) 286 | HTTPProxyServer("127.0.0.1", 8000, ProxyApplication()).run() 287 | 288 | -------------------------------------------------------------------------------- /smart_relay.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from urlparse import urlparse 3 | import re 4 | 5 | from server import HTTPProxyServer, ProxyApplication, get_destination 6 | from socks_relay import HTTP2SocksProxyApplication 7 | from gsocks.smart_relay import RESocksMatcher, ForwardDestination 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | class HTTP2SocksSmartApplication(ProxyApplication): 12 | def __init__(self, matcher, timeout=60): 13 | self.matcher = matcher 14 | self.timeout = timeout 15 | self.forwarders = {} 16 | self.register_forwarder("socks5", self.forward_socks5) 17 | 18 | def set_matcher(self, matcher): 19 | self.matcher = matcher 20 | 21 | def register_forwarder(self, scheme_name, forwarder): 22 | self.forwarders[scheme_name] = forwarder 23 | 24 | def find_forwarder(self, scheme_name): 25 | return self.forwarders.get(scheme_name, None) 26 | 27 | def forward_socks5(self, url, host, port, environ, start_response): 28 | app = HTTP2SocksProxyApplication(url.hostname, int(url.port)) 29 | return app.application(environ, start_response) 30 | 31 | def forward(self, scheme, host, port, environ, start_response): 32 | forwarder = self.find_forwarder(scheme.scheme) 33 | if forwarder: 34 | return forwarder(scheme.data, host, port, environ, start_response) 35 | else: 36 | log.error("Unsupported forwarding scheme %s" % scheme.scheme) 37 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 38 | return ["Internal Server Error"] 39 | 40 | def application(self, environ, start_response): 41 | try: 42 | host, port = get_destination(environ) 43 | except Exception, e: 44 | log.error("[Exception][http]: %s" % str(e)) 45 | start_response("400 Bad Request", [("Content-Type", "text/plain; charset=utf-8")]) 46 | return ["Bad Request"] 47 | 48 | try: 49 | scheme = self.matcher.find(host, port) 50 | if not scheme: 51 | return super(HTTP2SocksSmartApplication, self).application(environ, start_response) 52 | else: 53 | return self.forward(scheme, host, port, environ, start_response) 54 | except Exception, e: 55 | log.error("[Exception][application]: %s" % str(e)) 56 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 57 | return ["Internal Server Error"] 58 | 59 | if __name__ == '__main__': 60 | logging.basicConfig( 61 | format='[%(asctime)s][%(name)s][%(levelname)s] - %(message)s', 62 | datefmt='%Y-%d-%m %H:%M:%S', 63 | level=logging.DEBUG, 64 | ) 65 | scheme = ForwardDestination("socks5", urlparse('socks5://127.0.0.1:1080/')) 66 | rules = { 67 | (re.compile(r'.*\.whereisip\.net$'), re.compile(r'.*'), re.compile(r'.*')): scheme, 68 | (re.compile(r'.*google\.com$'), re.compile(r'.*'), re.compile(r'.*')): scheme, 69 | } 70 | matcher = RESocksMatcher(rules) 71 | HTTPProxyServer("127.0.0.1", 8000, HTTP2SocksSmartApplication(matcher)).run() 72 | -------------------------------------------------------------------------------- /socks_relay.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import urlparse 3 | from httplib import HTTPConnection 4 | 5 | from gevent import socket 6 | 7 | from server import HTTPProxyServer, ProxyApplication, \ 8 | copy_request, CHUNKSIZE, get_destination, set_forwarded_for 9 | from gsocks import utils as socksutils 10 | from gsocks import msg as socksmsg 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | class HTTP2SocksProxyApplication(ProxyApplication): 15 | def __init__(self, socksip, socksport, timeout=60): 16 | super(HTTP2SocksProxyApplication, self).__init__(timeout) 17 | 18 | self.socksip = socksip 19 | self.socksport = socksport 20 | 21 | def connect_socks(self, host, port): 22 | socksconn = None 23 | try: 24 | socksconn = socket.create_connection((self.socksip, self.socksport), timeout=self.timeout) 25 | if not socksutils.basic_handshake_client(socksconn): 26 | socksconn.close() 27 | addrtype = socksutils.addr_type(host) 28 | socksutils.send_request(socksconn, cmd=socksmsg.CONNECT, 29 | addrtype=addrtype, dstaddr=host, dstport=port) 30 | reply = socksutils.read_reply(socksconn) 31 | if reply.rep != socksmsg.SUCCEEDED: 32 | log.info("error response %d returned from socks server" % reply.rep) 33 | socksconn.close() 34 | return None 35 | return socksconn 36 | except Exception, e: 37 | log.error("[Exception][connect_socks]: %s" % str(e)) 38 | if socksconn: 39 | socksconn.close() 40 | return None 41 | 42 | def tunnel(self, environ, start_response): 43 | try: 44 | host, port = get_destination(environ) 45 | except Exception, e: 46 | log.error("[Exception][tunnel]: %s" % str(e)) 47 | start_response("400 Bad Request", [("Content-Type", "text/plain; charset=utf-8")]) 48 | return ["Bad Request"] 49 | 50 | socksconn = self.connect_socks(host, port) 51 | if not socksconn: 52 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 53 | return ["Internal Server Error"] 54 | else: 55 | environ['TUNNEL_CONN'] = socksconn 56 | start_response("200 Connection Established", []) 57 | return [] 58 | 59 | def http(self, environ, start_response): 60 | try: 61 | method, url, body, headers = copy_request(environ) 62 | host, port = get_destination(environ) 63 | except Exception, e: 64 | log.error("[Exception][http]: %s" % str(e)) 65 | start_response("400 Bad Request", [("Content-Type", "text/plain; charset=utf-8")]) 66 | yield "Bad Request" 67 | return 68 | 69 | socksconn = self.connect_socks(host, port) 70 | if not socksconn: 71 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 72 | yield "Internal Server Error" 73 | return 74 | 75 | try: 76 | conn = HTTPConnection(host, port=port) 77 | conn.sock = socksconn 78 | set_forwarded_for(environ, headers) 79 | u = urlparse.urlsplit(url) 80 | path = urlparse.urlunsplit(("", "", u.path, u.query, "")) 81 | conn.request(method, path, body, headers) 82 | resp = conn.getresponse() 83 | start_response("%d %s" % (resp.status, resp.reason), resp.getheaders()) 84 | while True: 85 | data = resp.read(CHUNKSIZE) 86 | if not data: 87 | break 88 | yield data 89 | conn.close() 90 | except Exception, e: 91 | log.error("[Exception][http]: %s" % str(e)) 92 | start_response("500 Internal Server Error", [("Content-Type", "text/plain; charset=utf-8")]) 93 | yield "Internal Server Error" 94 | return 95 | 96 | if __name__ == '__main__': 97 | logging.basicConfig( 98 | format='[%(asctime)s][%(name)s][%(levelname)s] - %(message)s', 99 | datefmt='%Y-%d-%m %H:%M:%S', 100 | level=logging.DEBUG, 101 | ) 102 | HTTPProxyServer("127.0.0.1", 8000, 103 | HTTP2SocksProxyApplication("127.0.0.1", 1080)).run() 104 | 105 | --------------------------------------------------------------------------------