├── MANIFEST ├── requirements.txt ├── demo ├── client_https.py ├── client_http.py ├── delete_example.py ├── get_example.py ├── head_example.py ├── auth_example.py ├── post_example.py ├── put_example.py ├── multiple_requests.py └── create_gist.py ├── setup.py ├── LICENSE ├── .gitignore ├── escs.sh ├── tests.py ├── README.md └── websnake.py /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | setup.py 3 | websnake.py 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | untwisted==3.2.2 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /demo/client_https.py: -------------------------------------------------------------------------------- 1 | from websnake import Get, ResponseHandle, core, die 2 | 3 | def handle_done(request, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.content()) 9 | die('Request done.') 10 | 11 | if __name__ == '__main__': 12 | request = Get('https://www.google.com.br/') 13 | 14 | request.add_map('200', handle_done) 15 | core.gear.mainloop() 16 | 17 | -------------------------------------------------------------------------------- /demo/client_http.py: -------------------------------------------------------------------------------- 1 | from websnake import Get, ResponseHandle, core, die 2 | 3 | def on_response(request, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.fd.read()) 9 | die('Request done.') 10 | 11 | if __name__ == '__main__': 12 | request = Get('http://codepad.org/') 13 | request.add_map(ResponseHandle.RESPONSE, on_response) 14 | core.gear.mainloop() 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /demo/delete_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Delete, ResponseHandle, core, die 2 | 3 | def on_done(con, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.fd.read()) 9 | die() 10 | 11 | if __name__ == '__main__': 12 | url = 'http://httpbin.org/delete' 13 | request = Delete(url) 14 | 15 | request.add_map(ResponseHandle.DONE, on_done) 16 | core.gear.mainloop() 17 | 18 | 19 | -------------------------------------------------------------------------------- /demo/get_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Get, ResponseHandle, core, die 2 | 3 | def handle_response(request, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | 9 | if __name__ == '__main__': 10 | request = Get('https://facebook.com/') 11 | 12 | request.add_map(ResponseHandle.RESPONSE, handle_response) 13 | request.add_map(ResponseHandle.DONE, lambda req, resp: die('Bye!')) 14 | 15 | core.gear.mainloop() 16 | 17 | 18 | -------------------------------------------------------------------------------- /demo/head_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Head, ResponseHandle, core, die, FormData, TokenAuth 2 | 3 | def handle_done(con, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.fd.read()) 9 | die() 10 | 11 | if __name__ == '__main__': 12 | url = 'http://httpbin.org/head' 13 | request = Head(url) 14 | 15 | request.add_map(ResponseHandle.DONE, handle_done) 16 | core.gear.mainloop() 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /demo/auth_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Get, BasicAuth, ResponseHandle, core, die 2 | 3 | def handle_response(request, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | 9 | if __name__ == '__main__': 10 | request = Get('http://httpbin.org/get', auth=BasicAuth('foo', 'bar')) 11 | 12 | request.add_map(ResponseHandle.RESPONSE, handle_response) 13 | request.add_map(ResponseHandle.DONE, lambda req, resp: die('Bye!')) 14 | 15 | core.gear.mainloop() 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /demo/post_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Post, ResponseHandle, core, die, FormData, TokenAuth 2 | 3 | def on_done(con, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.fd.read()) 9 | die() 10 | 11 | if __name__ == '__main__': 12 | url = 'http://httpbin.org/post' 13 | data = {'somekey': 'somevalue'} 14 | 15 | request = Post(url, payload=FormData(data)) 16 | 17 | request.add_map(ResponseHandle.DONE, on_done) 18 | core.gear.mainloop() 19 | 20 | -------------------------------------------------------------------------------- /demo/put_example.py: -------------------------------------------------------------------------------- 1 | from websnake import Put, ResponseHandle, core, die, FormData, TokenAuth 2 | 3 | def on_done(con, response): 4 | print('Headers:', response.headers) 5 | print('Code:', response.code) 6 | print('Version:', response.version) 7 | print('Reason:', response.reason) 8 | print('Data:', response.fd.read()) 9 | die() 10 | 11 | if __name__ == '__main__': 12 | url = 'http://httpbin.org/put' 13 | data = {'somekey': 'somevalue'} 14 | 15 | request = Put(url, payload=FormData(data)) 16 | 17 | request.add_map(ResponseHandle.DONE, on_done) 18 | core.gear.mainloop() 19 | 20 | 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | from distutils.core import setup 4 | setup(name="websnake", 5 | version="3.1.1", 6 | py_modules=['websnake'], 7 | author="Iury O. G. Figueiredo", 8 | author_email="ioliveira@id.uff.br", 9 | url='https://github.com/iogf/websnake', 10 | download_url='https://github.com/iogf/websnake/releases', 11 | keywords=['web server', 'requests', 'http', 'https', 'web framework', 'untwisted'], 12 | classifiers=[], 13 | description="Asynchronous web requests in python.",) 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /demo/multiple_requests.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | 4 | from websnake import Get, ResponseHandle, core, RequestPool, die 5 | 6 | def handle_done(request, response): 7 | print('Headers:', response.headers) 8 | print('Code:', response.code) 9 | print('Version:', response.version) 10 | print('Reason:', response.reason) 11 | 12 | def handle_empty(pool): 13 | print('All requests done!') 14 | die('Stopping...') 15 | 16 | if __name__ == '__main__': 17 | urls = ('https://en.wikipedia.org/wiki/Leonhard_Euler', 18 | 'https://www.google.com.br','https://facebook.com/') 19 | 20 | pool = RequestPool() 21 | pool.add_map(RequestPool.EMPTY, handle_empty) 22 | 23 | for ind in urls: 24 | Get(ind, pool=pool).add_map(ResponseHandle.DONE, handle_done) 25 | core.gear.mainloop() 26 | 27 | -------------------------------------------------------------------------------- /demo/create_gist.py: -------------------------------------------------------------------------------- 1 | """ 2 | Overview 3 | ======== 4 | 5 | Create an anonymous gist on github. 6 | 7 | """ 8 | 9 | from websnake import Post, ResponseHandle, core, die, JSon, TokenAuth 10 | 11 | def handle_done(con, response): 12 | print('Headers:', response.headers.headers) 13 | print('Code:', response.code) 14 | print('Version:', response.version) 15 | print('Reason:', response.reason) 16 | print('Data:', response.content()) 17 | die() 18 | 19 | if __name__ == '__main__': 20 | data = { 21 | "description": "the description for this gist1", 22 | "public": True, "files": { 23 | "file1.txt": {"content": "String file contents"}}} 24 | 25 | request = Post('https://api.github.com/gists', args = {'scope': 'gist'}, 26 | payload=JSon(data), auth = TokenAuth('API_TOKEN')) 27 | 28 | request.add_map(ResponseHandle.DONE, handle_done) 29 | core.gear.mainloop() 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Iury de oliveira gomes figueiredo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /escs.sh: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # push websnake. 3 | cd ~/projects/websnake-code 4 | git status 5 | git add * 6 | git commit -a 7 | git push 8 | ############################################################################## 9 | # create the develop branch, websnake. 10 | git branch -a 11 | git checkout -b development 12 | git push --set-upstream origin development 13 | ############################################################################## 14 | # merge master into development, websnake. 15 | cd ~/projects/websnake-code 16 | git checkout development 17 | git merge master 18 | git push 19 | ############################################################################## 20 | # merge development into master, websnake. 21 | cd ~/projects/websnake-code 22 | git checkout master 23 | git merge development 24 | git push 25 | git checkout development 26 | ############################################################################## 27 | # check diffs, websnake. 28 | cd ~/projects/websnake-code 29 | git diff 30 | ############################################################################## 31 | # delete the development branch, websnake. 32 | git branch -d development 33 | git push origin :development 34 | git fetch -p 35 | ############################################################################## 36 | # undo, changes, websnake, github. 37 | cd ~/projects/websnake-code 38 | git checkout * 39 | ############################################################################## 40 | # install, websnake. 41 | sudo bash -i 42 | cd /home/tau/projects/websnake-code 43 | python2 setup.py install 44 | rm -fr build 45 | exit 46 | ############################################################################## 47 | # build, websnake, package, disutils. 48 | cd /home/tau/projects/websnake-code 49 | python2.6 setup.py sdist 50 | rm -fr dist 51 | rm MANIFEST 52 | ############################################################################## 53 | # Update on pypi. 54 | cd ~/projects/websnake-code 55 | python setup.py sdist 56 | twine upload dist/* 57 | rm -fr dist 58 | ############################################################################## 59 | # futurize code. 60 | 61 | cd ~/projects/untwisted-code 62 | futurize --stage1 -w **/*.py 63 | 64 | # Check changes. 65 | futurize --stage2 **/*.py 66 | 67 | # Apply the changes. 68 | futurize --stage2 -w **/*.py 69 | 70 | # Clear stuff. 71 | find . -name "*.bak" -exec rm -f {} \; 72 | ############################################################################## 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | from websnake import Head, ResponseHandle, core, die, \ 2 | FormData, TokenAuth, Get, Post, Put, Delete, default_headers, Headers,\ 3 | BasicAuth, TokenAuth 4 | from urllib.parse import urlencode, urlparse 5 | import unittest 6 | import json 7 | 8 | class TestGet0(unittest.TestCase): 9 | def setUp(self): 10 | url = 'http://httpbin.org/get' 11 | self.args = {'key0': 'value0', 'key1': 'value2'} 12 | 13 | self.request = Get(url, args=self.args) 14 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 15 | 16 | def handle_done(self, request, response): 17 | response_data = json.loads(response.content()) 18 | 19 | self.assertTrue(response_data['args'], self.args) 20 | die() 21 | 22 | def test_get(self): 23 | core.gear.mainloop() 24 | 25 | class TestGet1(unittest.TestCase): 26 | def setUp(self): 27 | url = 'https://httpbin.org/get' 28 | 29 | auth = BasicAuth('username', 'foobar') 30 | self.request = Get(url, auth=auth) 31 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 32 | 33 | def handle_done(self, request, response): 34 | response_data = response.content() 35 | response_data = json.loads(response_data) 36 | authorization = response_data['headers']['Authorization'] 37 | self.assertEqual(authorization, request.headers['authorization']) 38 | die() 39 | 40 | def test_get(self): 41 | core.gear.mainloop() 42 | 43 | class TestGet2(unittest.TestCase): 44 | def setUp(self): 45 | self.url = 'https://httpbin.org/get?e=1&u=2' 46 | 47 | auth = TokenAuth('fooobar') 48 | self.request = Get(self.url, auth=auth) 49 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 50 | 51 | def handle_done(self, request, response): 52 | response_data = response.content() 53 | response_data = json.loads(response_data) 54 | self.assertEqual(self.url, response_data['url']) 55 | die() 56 | 57 | def test_get(self): 58 | core.gear.mainloop() 59 | 60 | class TestPost0(unittest.TestCase): 61 | def setUp(self): 62 | self.url = 'https://httpbin.org/post' 63 | self.args = {'key0': 'value0', 'key1': 'value2'} 64 | 65 | self.request = Post(self.url, args=self.args) 66 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 67 | 68 | def handle_done(self, request, response): 69 | response_data = json.loads(response.content()) 70 | 71 | self.assertTrue(response_data['args'], self.args) 72 | die() 73 | 74 | def test_get(self): 75 | core.gear.mainloop() 76 | 77 | class TestPost1(unittest.TestCase): 78 | def setUp(self): 79 | self.url = 'https://httpbin.org/post' 80 | self.data = {'key0': 'value0', 'key1': 'value2'} 81 | 82 | self.request = Post(self.url, payload=FormData(self.data)) 83 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 84 | 85 | def handle_done(self, request, response): 86 | response_data = json.loads(response.content()) 87 | 88 | self.assertTrue(response_data['form'], self.data) 89 | die() 90 | 91 | def test_get(self): 92 | core.gear.mainloop() 93 | 94 | class TestPost2(unittest.TestCase): 95 | def setUp(self): 96 | self.url = 'https://httpbin.org/post' 97 | self.data = {'key0': 'value0', 'key1': 'value2'} 98 | self.args = {'key0': 'value0', 'key1': 'value2'} 99 | 100 | self.request = Post(self.url, args=self.args, payload=FormData(self.data)) 101 | self.request.add_map(ResponseHandle.DONE, self.handle_done) 102 | 103 | def handle_done(self, request, response): 104 | response_data = json.loads(response.content()) 105 | 106 | self.assertTrue(response_data['form'], self.data) 107 | self.assertTrue(response_data['args'], self.args) 108 | 109 | url = '%s?%s' % (self.url, urlencode(self.args)) 110 | self.assertTrue(response_data['url'], url) 111 | 112 | die() 113 | 114 | def test_get(self): 115 | core.gear.mainloop() 116 | 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Websnake 2 | 3 | Asynchronous HTTP/HTTPS requests in Python. 4 | 5 | Websnake allows multiple requests to be fired asynchronously. It is built on top of 6 | [Untwisted](https://github.com/untwisted/untwisted/) that is an Event Driven Framework for Python. 7 | 8 | A Web request in Websnake is an event emitter and its possible events are HTTP 9 | response codes. 10 | 11 | Websnake allows multiple handles to be executed for a specific request's response. 12 | It also allows to fire new Web requests from response handles and a fine control on when 13 | HTTP redirects happen. 14 | 15 | Websnake makes it easy to extract content that depends on the extraction of previous content from the Web. 16 | It makes Websnake an ideal tool to handle a variety of complex or simple scenaries like 17 | implementing Web Crawlers etc. 18 | 19 | # Features 20 | 21 | - **HTTP/HTTPS Requests** 22 | 23 | - **Basic Authentication support** 24 | 25 | - **Token Authentication support** 26 | 27 | - **Automatic Content Decoding** 28 | 29 | - **Anti Throttle Mechanism** 30 | 31 | - **Response Size Limit** 32 | 33 | - **Non-blocking I/O** 34 | 35 | ### Multiple Requests 36 | 37 | Websnake has a RequestPool event emitter to bind handles to be executed when specific 38 | requests are finished. In the below example when all requests are finished it just quits. 39 | 40 | ~~~python 41 | from websnake import Get, ResponseHandle, core, RequestPool, die 42 | 43 | def handle_done(request, response): 44 | print('Headers:', response.headers) 45 | print('Code:', response.code) 46 | print('Version:', response.version) 47 | print('Reason:', response.reason) 48 | 49 | def handle_empty(pool): 50 | print('All requests done!') 51 | die('Stopping...') 52 | 53 | if __name__ == '__main__': 54 | urls = ('https://en.wikipedia.org/wiki/Leonhard_Euler', 55 | 'https://www.google.com.br','https://facebook.com/') 56 | 57 | pool = RequestPool() 58 | pool.add_map(RequestPool.EMPTY, handle_empty) 59 | 60 | for ind in urls: 61 | Get(ind, pool=pool).add_map(ResponseHandle.DONE, handle_done) 62 | core.gear.mainloop() 63 | ~~~ 64 | 65 | ### Basic GET 66 | 67 | The following example just makes a request and wait for the response to be printed. 68 | 69 | ~~~python 70 | from websnake import Get, ResponseHandle, core, die 71 | 72 | def handle_done(request, response): 73 | print('Headers:', response.headers) 74 | print('Code:', response.code) 75 | print('Version:', response.version) 76 | print('Reason:', response.reason) 77 | print('Data:', response.content()) 78 | die('Request done.') 79 | 80 | if __name__ == '__main__': 81 | request = Get('https://www.google.com.br/') 82 | 83 | request.add_map('200', handle_done) 84 | core.gear.mainloop() 85 | ~~~ 86 | 87 | Websnake requests are event emitters, you can bind a status code like '400' to a handle 88 | then getting the handle executed when the response status code is '400'. 89 | 90 | When you don't care for a specific HTTP response code you use ResponseHandle.DONE as event 91 | to map your handles. There is also ResponseHandle.ERROR that happens when some unexpected error 92 | occurred. 93 | 94 | ### Basic POST 95 | 96 | The example below creates a simple gist on github. 97 | 98 | ~~~python 99 | from websnake import Post, ResponseHandle, core, die, JSon, TokenAuth 100 | 101 | def handle_done(con, response): 102 | print('Headers:', response.headers.headers) 103 | print('Code:', response.code) 104 | print('Version:', response.version) 105 | print('Reason:', response.reason) 106 | print('Data:', response.content()) 107 | die() 108 | 109 | if __name__ == '__main__': 110 | data = { 111 | "description": "the description for this gist1", 112 | "public": True, "files": { 113 | "file1.txt": {"content": "String file contents"}}} 114 | 115 | request = Post('https://api.github.com/gists', args = {'scope': 'gist'}, 116 | payload=JSon(data), auth = TokenAuth('API_TOKEN')) 117 | 118 | request.add_map(ResponseHandle.DONE, handle_done) 119 | core.gear.mainloop() 120 | ~~~ 121 | 122 | # install 123 | 124 | **Note:** Websnake should work with python3 only. 125 | 126 | ~~~ 127 | pip install -r requirements.txt 128 | pip install websnake 129 | ~~~ 130 | 131 | Documentation 132 | ============= 133 | 134 | [Websnake Documentation](https://github.com/untwisted/websnake/wiki) 135 | -------------------------------------------------------------------------------- /websnake.py: -------------------------------------------------------------------------------- 1 | from untwisted.client import lose, create_client, create_client_ssl 2 | from urllib3.filepost import encode_multipart_formdata 3 | from urllib.parse import urlencode, urlparse 4 | from untwisted.task import Task, DONE 5 | from untwisted.splits import AccUntil, TmpFile 6 | from untwisted.dispatcher import Dispatcher 7 | from untwisted.event import Event, SSL_CONNECT, SSL_CONNECT_ERR, CLOSE, CONNECT, CONNECT_ERR 8 | from cgi import FieldStorage, parse_header 9 | from base64 import encodebytes 10 | from tempfile import TemporaryFile 11 | from socket import getservbyname 12 | from untwisted.core import die 13 | from re import split 14 | from untwisted import core 15 | import json 16 | 17 | default_headers = { 18 | 'user-agent':'Websnake', 19 | 'accept-charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 20 | 'connection':'close'} 21 | 22 | RESP_ERR = 0 23 | CON_ERR = 1 24 | SIZE_ERR = 2 25 | 26 | ERR_CODES = { 27 | RESP_ERR : 'Corrupted response.', 28 | CON_ERR : 'Connection refused.', 29 | SIZE_ERR : 'Content-length too long' 30 | } 31 | 32 | class Headers: 33 | def __init__(self, data): 34 | self.headers = dict() 35 | lines = data.split('\r\n') 36 | 37 | for ind in lines: 38 | field, value = split(' *: *', ind, 1) 39 | self.headers[field.lower()] = value 40 | 41 | def get(self, field, default=None): 42 | field = field.lower() 43 | return self.headers.get(field, default) 44 | 45 | def set(self, field, value): 46 | self.headers[field.lower()] == value 47 | 48 | def update(self, other): 49 | for ind in other.headers.items(): 50 | self.headers[ind[0].lower()] = ind[1] 51 | 52 | def __str__(self): 53 | return self.headers.__str__() 54 | 55 | def __repr__(self): 56 | return self.headers.__repr__() 57 | 58 | class ResponseHandle: 59 | class DONE(Event): 60 | pass 61 | 62 | class ERROR(Event): 63 | pass 64 | 65 | class RESPONSE(Event): 66 | pass 67 | 68 | MAX_SIZE = 1024 ** 8 69 | def __init__(self, request): 70 | self.request = request 71 | self.response = None 72 | self.acc = AccUntil(request.con) 73 | 74 | request.con.add_map(AccUntil.DONE, self.handle_terminator) 75 | request.con.add_map(CLOSE, self.handle_close) 76 | self.acc.start() 77 | 78 | def handle_terminator(self, con, header, bdata): 79 | """ 80 | """ 81 | 82 | self.response = Response(header) 83 | size = self.response.headers.get('content-length', self.MAX_SIZE) 84 | size = int(size) 85 | 86 | if self.MAX_SIZE < size: 87 | self.handle_size_err() 88 | else: 89 | self.recv_data(size, bdata) 90 | 91 | def recv_data(self, size, bdata): 92 | tmpfile = TmpFile(self.request.con) 93 | 94 | self.request.con.add_map(TmpFile.DONE, self.handle_bdata) 95 | tmpfile.start(self.response.fd, size, bdata) 96 | 97 | def handle_bdata(self, con, fd, data): 98 | lose(con) 99 | self.handle_response() 100 | 101 | def handle_size_err(self): 102 | self.request.drive(self.ERROR, self.response, SIZE_ERR) 103 | self.request.con.destroy() 104 | self.request.con.close() 105 | self.response.fd.close() 106 | 107 | def handle_redirect(self): 108 | # When a code means a redirect but no location then it is an error. 109 | location = self.response.headers.get('location') 110 | if location is not None: 111 | self.request.redirect(location) 112 | else: 113 | self.request.drive(self.ERROR, self.response, RESP_ERR) 114 | 115 | def handle_response(self): 116 | self.response.fd.seek(0) 117 | self.request.drive(self.response.code, self.response) 118 | self.request.drive(ResponseHandle.RESPONSE, self.response) 119 | 120 | REDIRECT_CODES = ('301', '308', '302', '303', '307') 121 | if self.response.code in REDIRECT_CODES: 122 | self.handle_redirect() 123 | else: 124 | self.request.drive(self.DONE, self.response) 125 | self.response.fd.close() 126 | 127 | def handle_close(self, con, err): 128 | if self.response is not None: 129 | self.handle_response() 130 | else: 131 | self.handle_resp_err() 132 | 133 | def handle_resp_err(self): 134 | if self.request.c_attempts >= self.request.attempts: 135 | self.request.drive(self.ERROR, self.response, RESP_ERR) 136 | else: 137 | self.request.reconnect() 138 | 139 | class Response: 140 | def __init__(self, data): 141 | data = data.decode('utf8') 142 | method, hdata = data.split('\r\n', 1) 143 | code = method.split(' ', 2) 144 | self.version = code[0] 145 | self.code = code[1] 146 | self.reason = code[2] 147 | self.headers = Headers(hdata) 148 | self.fd = TemporaryFile('w+b') 149 | 150 | def content(self): 151 | data = self.fd.read() 152 | self.fd.seek(0) 153 | 154 | encoding = self.header_encoding() 155 | if encoding is None: 156 | return data 157 | 158 | try: 159 | return data.decode(encoding) 160 | except UnicodeDecodeError as e: 161 | return data 162 | 163 | def header_encoding(self): 164 | ctype = self.headers.get('content-type') 165 | if ctype is not None: 166 | return parse_header(ctype)[1].get('charset') 167 | 168 | class RequestData: 169 | pass 170 | 171 | class FormData(RequestData): 172 | def __init__(self, data): 173 | self.data = data 174 | 175 | def dumps(self, request): 176 | data, type = encode_multipart_formdata(self.data) 177 | request.headers['content-type'] = type 178 | request.headers['content-length'] = len(data) 179 | return data 180 | 181 | class JSon(RequestData): 182 | def __init__(self, data): 183 | self.data = data 184 | 185 | def dumps(self, request): 186 | request.headers['content-type'] = 'application/json; charset=utf-8' 187 | data = json.dumps(self.data).encode('utf8') 188 | request.headers['content-length'] = len(data) 189 | return data 190 | 191 | class RequestAuth: 192 | pass 193 | 194 | class BasicAuth(RequestAuth): 195 | def __init__(self, key, value): 196 | self.key = key 197 | self.value = value 198 | 199 | def dumps(self, request): 200 | self.key = self.key.encode('utf8') 201 | self.value = self.value.encode('utf8') 202 | 203 | base = encodebytes(b'%s:%s' % (self.key, self.value)) 204 | base = base.replace(b'\n', b'').decode('utf8') 205 | request.headers['authorization'] = base 206 | 207 | class TokenAuth(RequestAuth): 208 | def __init__(self, token_value): 209 | self.token_value = token_value 210 | 211 | def dumps(self, request): 212 | request.headers['authorization'] = 'token %s' % self.token_value 213 | 214 | class Request(Dispatcher): 215 | def __init__(self, addr, headers, version, auth, attempts, pool): 216 | super(Request, self).__init__() 217 | self.headers = default_headers.copy() 218 | self.version = version 219 | self.attempts = attempts 220 | self.c_attempts = 0 221 | self.auth = auth 222 | self.addr = addr 223 | self.pool = pool 224 | self.method = None 225 | 226 | self.headers.update(headers) 227 | if auth is not None: 228 | auth.dumps(self) 229 | 230 | self.con = self.connect(self.addr) 231 | if pool is not None: 232 | pool.register(self) 233 | 234 | def handle_connect(self, con): 235 | pass 236 | 237 | def handle_connect_err(self, con, err): 238 | # Response is None. 239 | self.request.drive(self.ERROR, None, CON_ERR) 240 | 241 | def create_con_ssl(self, addr, port): 242 | con = create_client_ssl(addr, port) 243 | con.add_map(SSL_CONNECT, self.handle_connect) 244 | con.add_map(SSL_CONNECT_ERR, self.handle_connect_err) 245 | return con 246 | 247 | def create_con(self, addr, port): 248 | con = create_client(addr, port) 249 | con.add_map(CONNECT, self.handle_connect) 250 | con.add_map(CONNECT_ERR, self.handle_connect_err) 251 | return con 252 | 253 | def reconnect(self): 254 | self.con = self.connect(self.addr) 255 | 256 | def connect(self, addr): 257 | self.addr = addr.strip().rstrip() 258 | urlparser = urlparse(self.addr) 259 | 260 | port = urlparser.port 261 | if not port: 262 | port = getservbyname(urlparser.scheme) 263 | self.c_attempts = self.c_attempts + 1 264 | 265 | # The hostname has to be here in case of redirect. 266 | self.headers['host'] = urlparser.hostname 267 | if urlparser.scheme == 'https': 268 | return self.create_con_ssl(urlparser.hostname, port) 269 | return self.create_con(urlparser.hostname, port) 270 | 271 | def redirect(self, addr): 272 | self.con = self.connect(addr) 273 | 274 | class Get(Request): 275 | def __init__(self, addr, args={}, 276 | headers={}, version='HTTP/1.1', auth=None, attempts=1, pool=None): 277 | super(Get, self).__init__(addr, headers, version, auth, attempts, pool) 278 | 279 | self.args = args 280 | self.method = 'GET' 281 | 282 | def handle_connect(self, con): 283 | ResponseHandle(self) 284 | 285 | request_text = make_method(self.method, self.addr, self.args, self.version) 286 | headers_text = build_headers(self.headers) 287 | request_text = request_text + headers_text 288 | con.dump(request_text) 289 | 290 | class Post(Request): 291 | def __init__(self, addr, args={}, payload=FormData({}), 292 | headers={}, version='HTTP/1.1', auth=None, attempts=1, pool=None): 293 | 294 | super(Post, self).__init__(addr, headers, version, auth, attempts, pool) 295 | 296 | self.args = args 297 | self.payload = payload 298 | self.method = 'POST' 299 | 300 | def handle_connect(self, con): 301 | ResponseHandle(self) 302 | 303 | request_text = make_method(self.method, self.addr, self.args, self.version) 304 | data = self.payload.dumps(self) 305 | 306 | headers_text = build_headers(self.headers) 307 | request_text = request_text + headers_text 308 | 309 | request_text = request_text + data 310 | con.dump(request_text) 311 | 312 | class Put(Post): 313 | def __init__(self, addr, args={}, payload=FormData({}), 314 | headers={}, version='HTTP/1.1', auth=None, attempts=1, pool=None): 315 | 316 | super(Put, self).__init__(addr, args, payload, headers, 317 | version, auth, attempts, pool) 318 | self.method = 'PUT' 319 | 320 | class Delete(Get): 321 | def __init__(self, addr, args={}, headers={}, 322 | version='HTTP/1.1', auth=None, attempts=1, pool=None): 323 | 324 | super(Delete, self).__init__(addr, args, headers, 325 | version, auth, attempts, pool) 326 | 327 | self.method = 'DELETE' 328 | 329 | class Head(Get): 330 | def __init__(self, addr, args={}, headers={}, 331 | version='HTTP/1.1', auth=None, attempts=1, pool=None): 332 | 333 | super(Head, self).__init__(addr, args, headers, 334 | version, auth, attempts, pool) 335 | 336 | self.method = 'HEAD' 337 | 338 | class RequestPool(Task): 339 | class EMPTY(Event): 340 | pass 341 | 342 | def __init__(self): 343 | super(RequestPool, self).__init__() 344 | self.add_map(DONE, self.handle_done) 345 | self.responses = [] 346 | self.errors = [] 347 | self.start() 348 | 349 | def handle_done(self, task): 350 | self.drive(self.EMPTY) 351 | die() 352 | 353 | def register(self, request): 354 | self.add(request, ResponseHandle.DONE, ResponseHandle.ERROR) 355 | request.add_map(ResponseHandle.DONE, self.append_response) 356 | request.add_map(ResponseHandle.ERROR, self.append_request) 357 | 358 | def append_response(self, request, response): 359 | self.responses.append(response) 360 | 361 | def append_request(self, request, response, err=None): 362 | self.errors.append(request) 363 | 364 | def make_method(method, addr, args, version): 365 | urlparser = urlparse(addr) 366 | resource = urlparser.path if urlparser.path else '/' 367 | 368 | if args or urlparser.query: 369 | resource = ''.join((resource, '?', urlparser.query, urlencode(args, doseq=True))) 370 | if urlparser.fragment: 371 | resource = ''.join((resource, '#', urlparser.fragment)) 372 | httpcmd = '%s %s %s\r\n' % (method, resource, version) 373 | return httpcmd.encode('ascii') 374 | 375 | def build_headers(headers): 376 | data = '' 377 | for key, value in headers.items(): 378 | data = data + '%s: %s\r\n' % (key, value) 379 | data = data + '\r\n' 380 | return data.encode('ascii') 381 | --------------------------------------------------------------------------------