├── .gitignore ├── LICENSE └── proxy.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Michael Mollard 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from http.server import BaseHTTPRequestHandler,HTTPServer 3 | import argparse, os, random, sys, requests 4 | 5 | from socketserver import ThreadingMixIn 6 | import threading 7 | 8 | 9 | hostname = 'en.wikipedia.org' 10 | 11 | def merge_two_dicts(x, y): 12 | return x | y 13 | 14 | def set_header(): 15 | headers = { 16 | 'Host': hostname 17 | } 18 | 19 | return headers 20 | 21 | class ProxyHTTPRequestHandler(BaseHTTPRequestHandler): 22 | protocol_version = 'HTTP/1.0' 23 | def do_HEAD(self): 24 | self.do_GET(body=False) 25 | return 26 | 27 | def do_GET(self, body=True): 28 | sent = False 29 | try: 30 | url = 'https://{}{}'.format(hostname, self.path) 31 | req_header = self.parse_headers() 32 | 33 | print(req_header) 34 | print(url) 35 | resp = requests.get(url, headers=merge_two_dicts(req_header, set_header()), verify=False) 36 | sent = True 37 | 38 | self.send_response(resp.status_code) 39 | self.send_resp_headers(resp) 40 | msg = resp.text 41 | if body: 42 | self.wfile.write(msg.encode(encoding='UTF-8',errors='strict')) 43 | return 44 | finally: 45 | if not sent: 46 | self.send_error(404, 'error trying to proxy') 47 | 48 | def do_POST(self, body=True): 49 | sent = False 50 | try: 51 | url = 'https://{}{}'.format(hostname, self.path) 52 | content_len = int(self.headers.getheader('content-length', 0)) 53 | post_body = self.rfile.read(content_len) 54 | req_header = self.parse_headers() 55 | 56 | resp = requests.post(url, data=post_body, headers=merge_two_dicts(req_header, set_header()), verify=False) 57 | sent = True 58 | 59 | self.send_response(resp.status_code) 60 | self.send_resp_headers(resp) 61 | if body: 62 | self.wfile.write(resp.content) 63 | return 64 | finally: 65 | if not sent: 66 | self.send_error(404, 'error trying to proxy') 67 | 68 | def parse_headers(self): 69 | req_header = {} 70 | for line in self.headers: 71 | line_parts = [o.strip() for o in line.split(':', 1)] 72 | if len(line_parts) == 2: 73 | req_header[line_parts[0]] = line_parts[1] 74 | return req_header 75 | 76 | def send_resp_headers(self, resp): 77 | respheaders = resp.headers 78 | print ('Response Header') 79 | for key in respheaders: 80 | if key not in ['Content-Encoding', 'Transfer-Encoding', 'content-encoding', 'transfer-encoding', 'content-length', 'Content-Length']: 81 | print (key, respheaders[key]) 82 | self.send_header(key, respheaders[key]) 83 | self.send_header('Content-Length', len(resp.content)) 84 | self.end_headers() 85 | 86 | def parse_args(argv=sys.argv[1:]): 87 | parser = argparse.ArgumentParser(description='Proxy HTTP requests') 88 | parser.add_argument('--port', dest='port', type=int, default=9999, 89 | help='serve HTTP requests on specified port (default: random)') 90 | parser.add_argument('--hostname', dest='hostname', type=str, default='en.wikipedia.org', 91 | help='hostname to be processd (default: en.wikipedia.org)') 92 | args = parser.parse_args(argv) 93 | return args 94 | 95 | class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): 96 | """Handle requests in a separate thread.""" 97 | 98 | def main(argv=sys.argv[1:]): 99 | global hostname 100 | args = parse_args(argv) 101 | hostname = args.hostname 102 | print('http server is starting on {} port {}...'.format(args.hostname, args.port)) 103 | server_address = ('127.0.0.1', args.port) 104 | httpd = ThreadedHTTPServer(server_address, ProxyHTTPRequestHandler) 105 | print('http server is running as reverse proxy') 106 | httpd.serve_forever() 107 | 108 | if __name__ == '__main__': 109 | main() 110 | --------------------------------------------------------------------------------