├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── mugen ├── __init__.py ├── adapters.py ├── api.py ├── connect.py ├── connection_pool.py ├── cookies.py ├── exceptions.py ├── models.py ├── proxy.py ├── session.py ├── structures.py └── utils.py ├── pyproject.toml ├── setup.py └── tests ├── __init__.py └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | *.egg 4 | env/ 5 | cover/ 6 | build/ 7 | dist/ 8 | docs/_build 9 | mugen.egg-info/ 10 | .ropeproject/ 11 | __pycache__/ 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.6.1 - 2023-12-11 4 | 5 | ### Updated 6 | 7 | - Update dependencies 8 | 9 | ## v0.6.0 - 2021-07-22 10 | 11 | ### Added 12 | 13 | - Support Proxy-Authorization 14 | 15 | ```python 16 | await mugen.get("http://example.com", proxy='http://user:pwd@127.0.0.1:8888') 17 | ``` 18 | 19 | ## v0.5.1 - 2021-07-16 20 | 21 | ### Fixed 22 | 23 | - FIXME: File descriptor n is used by transport 24 | 25 | ## v0.5.0 - 2021-07-14 26 | 27 | ### Changed 28 | 29 | - Use async-await instead of asyncio.coroutine descriptor. 30 | - Merge all test cases into one file 31 | - Let poetry to manage building and publishing. 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2016 Peter Ding 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | typecheck: 2 | mypy -p mugen --ignore-missing-imports --warn-unreachable 3 | 4 | format-check: 5 | black --check . 6 | 7 | format: 8 | black . 9 | 10 | 11 | build: all 12 | rm -fr dist 13 | poetry build -f sdist 14 | 15 | publish: all 16 | poetry publish 17 | 18 | build-publish: build publish 19 | 20 | all: format-check typecheck 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Mugen - HTTP for Asynchronous Requests 2 | 3 | Mugen is library for http asynchronous requests. 4 | 5 | Only running on Python ^3.7 6 | 7 | ok, code demo: 8 | 9 | ```python 10 | import asyncio 11 | import mugen 12 | 13 | async def task(): 14 | url = 'https://www.google.com' 15 | resp = await mugen.get(url) 16 | print(resp.text) 17 | 18 | loop = asyncio.get_event_loop() 19 | loop.run_until_complete(task()) 20 | ``` 21 | 22 | See, [Documention](https://peterding.github.io/mugen-docs/). 23 | 24 | > Mugen is a name from _Samurai Champloo_ (サムライチャンプル, 混沌武士) 25 | 26 | ### Feature Support 27 | 28 | - Keep-Alive & Connection Pooling 29 | - DNS cache 30 | - Sessions with Cookie Persistence 31 | - Automatic Decompression 32 | - Automatic Content Decoding 33 | - HTTP(S)/SOCKS5 Proxy Support 34 | - Connection Timeouts 35 | -------------------------------------------------------------------------------- /mugen/__init__.py: -------------------------------------------------------------------------------- 1 | from mugen.api import ( 2 | head, 3 | get, 4 | post, 5 | request, 6 | session, 7 | ) 8 | 9 | __version__ = "0.6.1" 10 | -------------------------------------------------------------------------------- /mugen/adapters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | 4 | from mugen.utils import is_ip, parse_proxy 5 | from mugen.exceptions import UnknownProxyScheme 6 | from mugen.proxy import _make_https_proxy_connection, Socks5Proxy 7 | from mugen.models import Singleton, Response, DEFAULT_ENCODING 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | class HTTPAdapter(Singleton): 13 | def __init__(self, connection_pool, recycle=True, loop=None): 14 | if hasattr(self, "_initiated"): 15 | return 16 | 17 | logger.debug("instantiate HTTPAdapter: recycle: {}, ".format(recycle)) 18 | 19 | self._initiated = True 20 | self.recycle = recycle 21 | self.loop = loop or asyncio.get_event_loop() 22 | self.connection_pool = connection_pool 23 | 24 | async def generate_direct_connect(self, host, port, ssl, dns_cache, recycle=True): 25 | key = None 26 | if is_ip(host): 27 | ip = host.split(":")[0] 28 | key = (ip, port, ssl) 29 | 30 | if not key and not ssl: 31 | ip, port = await dns_cache.get(host, port) 32 | key = (ip, port, ssl) 33 | 34 | if not key and ssl: 35 | key = (host, port, ssl) 36 | 37 | conn = await self.get_connection(key, recycle=recycle) 38 | return conn 39 | 40 | async def generate_proxy_connect( 41 | self, host, port, ssl, proxy, proxy_auth, dns_cache, recycle=True 42 | ): 43 | proxy_scheme, proxy_host, proxy_port, username, password = parse_proxy(proxy) 44 | if not proxy_auth and username and password: 45 | proxy_auth = f"{username}:{password}" 46 | 47 | proxy_ip, proxy_port = await dns_cache.get(proxy_host, proxy_port) 48 | key = (proxy_ip, proxy_port, False, host) 49 | 50 | if proxy_scheme.lower() == "http": 51 | if not ssl: 52 | key = ( 53 | proxy_ip, 54 | proxy_port, 55 | False, 56 | ) # http proxy not needs CONNECT request 57 | conn = await self.generate_http_proxy_connect( 58 | key, host, port, ssl, proxy_auth, recycle=recycle 59 | ) 60 | elif proxy_scheme.lower() == "socks5": 61 | conn = await self.generate_socks5_proxy_connect( 62 | key, host, port, ssl, username, password, recycle=recycle 63 | ) 64 | else: 65 | raise UnknownProxyScheme(proxy_scheme) 66 | 67 | return conn 68 | 69 | async def generate_http_proxy_connect( 70 | self, key, host, port, ssl, proxy_auth, recycle=True 71 | ): 72 | conn = await self.get_connection(key, recycle=recycle) 73 | 74 | if ssl and not conn.ssl_on: 75 | logger.debug("[ssl_handshake]: {}".format(key)) 76 | await _make_https_proxy_connection( 77 | conn, host, port, proxy_auth, recycle=recycle 78 | ) 79 | conn.ssl_on = True 80 | return conn 81 | 82 | async def generate_socks5_proxy_connect( 83 | self, key, host, port, ssl, username, password, recycle=True 84 | ): 85 | conn = await self.get_connection(key, recycle=recycle) 86 | if conn.socks_on: 87 | return conn 88 | 89 | socks5_proxy = Socks5Proxy(conn, host, port, ssl, username, password) 90 | await socks5_proxy.init() 91 | return conn 92 | 93 | async def get_connection(self, key, recycle=True): 94 | conn = await self.connection_pool.get_connection(key, recycle=recycle) 95 | if not conn.reader: 96 | try: 97 | await conn.connect() 98 | except Exception as err: 99 | logger.debug("Fail connect to %s, error: %s", key, err) 100 | conn.close() 101 | raise err 102 | return conn 103 | 104 | async def send_request(self, conn, request): 105 | request_line, headers, data = request.make_request() 106 | request_line = request_line.encode("utf-8") 107 | headers = headers.encode("utf-8") 108 | if isinstance(data, str): 109 | data = data.encode("utf-8") 110 | 111 | conn.send(request_line + b"\r\n") 112 | conn.send(headers + b"\r\n") 113 | conn.send(b"\r\n") 114 | if data: 115 | conn.send(data) 116 | 117 | async def get_response(self, method, conn, encoding=DEFAULT_ENCODING): 118 | response = Response(method, conn, encoding=encoding) 119 | await response.receive() 120 | 121 | if response.headers.get("connection") == "close": 122 | conn.recycle = False 123 | conn.close() 124 | return response 125 | 126 | def closed(self): 127 | return self._initiated is None and self.connection_pool is None 128 | 129 | def close(self): 130 | self._initiated = self.connection_pool = self.loop = None 131 | -------------------------------------------------------------------------------- /mugen/api.py: -------------------------------------------------------------------------------- 1 | from mugen.session import Session 2 | from mugen.models import MAX_CONNECTION_POOL, MAX_POOL_TASKS 3 | 4 | 5 | async def head( 6 | url, 7 | params=None, 8 | headers=None, 9 | cookies=None, 10 | proxy=None, 11 | proxy_auth=None, 12 | allow_redirects=False, 13 | recycle=True, 14 | encoding=None, 15 | timeout=None, 16 | connection=None, 17 | loop=None, 18 | ): 19 | response = await request( 20 | "HEAD", 21 | url, 22 | params=params, 23 | headers=headers, 24 | cookies=cookies, 25 | proxy=proxy, 26 | proxy_auth=proxy_auth, 27 | allow_redirects=allow_redirects, 28 | recycle=recycle, 29 | encoding=encoding, 30 | timeout=timeout, 31 | connection=connection, 32 | loop=loop, 33 | ) 34 | return response 35 | 36 | 37 | async def get( 38 | url, 39 | params=None, 40 | headers=None, 41 | cookies=None, 42 | proxy=None, 43 | proxy_auth=None, 44 | allow_redirects=True, 45 | recycle=True, 46 | encoding=None, 47 | timeout=None, 48 | connection=None, 49 | loop=None, 50 | ): 51 | response = await request( 52 | "GET", 53 | url, 54 | params=params, 55 | headers=headers, 56 | cookies=cookies, 57 | proxy=proxy, 58 | proxy_auth=proxy_auth, 59 | allow_redirects=allow_redirects, 60 | recycle=recycle, 61 | encoding=encoding, 62 | timeout=timeout, 63 | connection=connection, 64 | loop=loop, 65 | ) 66 | return response 67 | 68 | 69 | async def post( 70 | url, 71 | params=None, 72 | headers=None, 73 | data=None, 74 | cookies=None, 75 | proxy=None, 76 | proxy_auth=None, 77 | allow_redirects=True, 78 | recycle=True, 79 | encoding=None, 80 | timeout=None, 81 | connection=None, 82 | loop=None, 83 | ): 84 | response = await request( 85 | "POST", 86 | url, 87 | params=params, 88 | headers=headers, 89 | data=data, 90 | cookies=cookies, 91 | proxy=proxy, 92 | proxy_auth=proxy_auth, 93 | allow_redirects=allow_redirects, 94 | recycle=recycle, 95 | encoding=encoding, 96 | timeout=timeout, 97 | connection=connection, 98 | loop=loop, 99 | ) 100 | return response 101 | 102 | 103 | async def request( 104 | method, 105 | url, 106 | params=None, 107 | headers=None, 108 | data=None, 109 | cookies=None, 110 | proxy=None, 111 | proxy_auth=None, 112 | allow_redirects=True, 113 | recycle=True, 114 | encoding=None, 115 | timeout=None, 116 | connection=None, 117 | loop=None, 118 | ): 119 | session = Session(recycle=recycle, encoding=encoding, loop=loop) 120 | response = await session.request( 121 | method, 122 | url, 123 | params=params, 124 | headers=headers, 125 | data=data, 126 | cookies=cookies, 127 | proxy=proxy, 128 | proxy_auth=proxy_auth, 129 | allow_redirects=allow_redirects, 130 | recycle=recycle, 131 | encoding=encoding, 132 | timeout=timeout, 133 | connection=connection, 134 | ) 135 | 136 | return response 137 | 138 | 139 | def session( 140 | headers=None, 141 | cookies=None, 142 | recycle=True, 143 | encoding=None, 144 | max_pool=MAX_CONNECTION_POOL, 145 | max_tasks=MAX_POOL_TASKS, 146 | loop=None, 147 | ): 148 | return Session( 149 | headers=headers, 150 | cookies=cookies, 151 | recycle=recycle, 152 | encoding=encoding, 153 | max_pool=max_pool, 154 | max_tasks=max_tasks, 155 | loop=loop, 156 | ) 157 | -------------------------------------------------------------------------------- /mugen/connect.py: -------------------------------------------------------------------------------- 1 | import re 2 | import time 3 | import logging 4 | import asyncio 5 | from asyncio import streams 6 | 7 | from functools import wraps 8 | 9 | from mugen.exceptions import ConnectionIsStale 10 | from mugen.models import MAX_CONNECTION_TIMEOUT, MAX_KEEP_ALIVE_TIME 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | 15 | def async_error_proof(gen): 16 | @wraps(gen) 17 | async def wrap(self, *args, **kwargs): 18 | try: 19 | rs = await gen(self, *args, **kwargs) 20 | return rs 21 | except Exception as err: 22 | logger.error("[{}]: {}".format(gen, repr(err))) 23 | self.close() 24 | raise err 25 | 26 | return wrap 27 | 28 | 29 | def error_proof(func): 30 | @wraps(func) 31 | def wrap(self, *args, **kwargs): 32 | try: 33 | rs = func(self, *args, **kwargs) 34 | return rs 35 | except Exception as err: 36 | logger.error("[{}]: {}".format(func, repr(err))) 37 | self.close() 38 | raise err 39 | 40 | return wrap 41 | 42 | 43 | FD_USED_ERROR = re.compile(r"File descriptor (\d+) is used by transport") 44 | 45 | 46 | class Connection(object): 47 | def __init__( 48 | self, ip, port, ssl=False, key=None, recycle=True, timeout=None, loop=None 49 | ): 50 | self.ip = ip 51 | self.port = port 52 | self.ssl = ssl 53 | self.key = key or (ip, port, ssl) 54 | self.recycle = recycle 55 | self.loop = loop or asyncio.get_event_loop() 56 | self.reader = None 57 | self.writer = None 58 | self.ssl_on = False # For http/socks proxy which need ssl connection 59 | self.socks_on = False # socks proxy which needs to be initiated 60 | self.timeout = timeout or MAX_KEEP_ALIVE_TIME 61 | self.__last_action = time.time() 62 | 63 | def __repr__(self): 64 | return "".format(self.key) 65 | 66 | def _watch(self): 67 | self.__last_action = time.time() 68 | return self.__last_action 69 | 70 | def is_timeout(self): 71 | return time.time() - self.__last_action > self.timeout 72 | 73 | @async_error_proof 74 | async def connect(self): 75 | logger.debug(f"[Connection.connect]: {self.key}") 76 | 77 | try: 78 | reader, writer = await streams.open_connection( 79 | self.ip, self.port, ssl=self.ssl 80 | ) 81 | except RuntimeError as err: 82 | logger.error("[Connection.connect]: %s:%s, %s", self.ip, self.port, err) 83 | info = str(err) 84 | 85 | # If the fd is used, we remove it 86 | m = FD_USED_ERROR.search(info) 87 | if m: 88 | fd = int(m.group(1)) 89 | transp = self.loop._transports[fd] 90 | if transp: 91 | transp.close() 92 | del self.loop._transports[fd] 93 | 94 | raise err 95 | except Exception as err: 96 | logger.error("[Connection.connect]: %s:%s, %s", self.ip, self.port, err) 97 | raise err 98 | 99 | self.reader = reader 100 | self.writer = writer 101 | 102 | @async_error_proof 103 | async def ssl_handshake(self, host): 104 | logger.debug("[Connection.ssl_handshake]: {}, {}".format(self.key, host)) 105 | transport = self.reader._transport 106 | raw_socket = transport.get_extra_info("socket", default=None) 107 | self.reader, self.writer = await streams.open_connection( 108 | ssl=True, sock=raw_socket, server_hostname=host 109 | ) 110 | 111 | @error_proof 112 | def send(self, data): 113 | logger.debug("[Connection.send]: {!r}".format(data)) 114 | self._watch() 115 | 116 | self.writer.write(data) 117 | 118 | @async_error_proof 119 | async def read(self, size=-1): 120 | logger.debug("[Connection.read]: {}: size = {}".format(self.key, size)) 121 | self._watch() 122 | # assert self.closed() is not True, 'connection is closed' 123 | # assert self.stale() is not True, 'connection is stale' 124 | 125 | if self.stale(): 126 | logger.debug( 127 | "[Connection.read] [Error] [ConnectionIsStale]: {}".format(self.key) 128 | ) 129 | raise ConnectionIsStale("{}".format(self.key)) 130 | 131 | if size < 0: 132 | chunk = await asyncio.wait_for( 133 | self.reader.read(size), timeout=MAX_CONNECTION_TIMEOUT 134 | ) 135 | return chunk 136 | else: 137 | chunks = b"" 138 | while size: 139 | chunk = await asyncio.wait_for( 140 | self.reader.read(size), timeout=MAX_CONNECTION_TIMEOUT 141 | ) 142 | size -= len(chunk) 143 | chunks += chunk 144 | return chunks 145 | 146 | @async_error_proof 147 | async def readline(self): 148 | # assert self.closed() is False, 'connection is closed' 149 | # assert self.stale() is not True, 'connection is stale' 150 | 151 | if self.stale(): 152 | logger.debug( 153 | "[Connection.readline] [Error] [ConnectionIsStale]: {}".format(self.key) 154 | ) 155 | raise ConnectionIsStale("{}".format(self.key)) 156 | 157 | chunk = await asyncio.wait_for( 158 | self.reader.readline(), timeout=MAX_CONNECTION_TIMEOUT 159 | ) 160 | 161 | logger.debug( 162 | "[Connection.readline]: " "{}: size = {}".format(self.key, len(chunk)) 163 | ) 164 | 165 | return chunk 166 | 167 | def close(self): 168 | logger.debug( 169 | "[Connection.close]: {}, " "recycle: {}".format(self.key, self.recycle) 170 | ) 171 | 172 | if not self.closed(): 173 | self.reader.feed_eof() 174 | self.writer.close() 175 | self.reader = self.writer = None 176 | logger.debug( 177 | "[Connection.close]: DONE. {}, recycle: {}".format( 178 | self.key, self.recycle 179 | ) 180 | ) 181 | 182 | def closed(self): 183 | return self.reader is None or self.writer is None 184 | 185 | def stale(self): 186 | is_stale = self.reader is None or self.reader.at_eof() 187 | if is_stale: 188 | logger.debug("[Connection.stale]: {} is stale".format(self.key)) 189 | return is_stale 190 | -------------------------------------------------------------------------------- /mugen/connection_pool.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | 4 | from collections import defaultdict, deque 5 | 6 | from mugen.connect import Connection 7 | from mugen.models import ( 8 | Singleton, 9 | MAX_CONNECTION_POOL, 10 | MAX_KEEP_ALIVE_TIME, 11 | MAX_POOL_TASKS, 12 | DEFAULT_RECHECK_INTERNAL, 13 | ) 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | class ConnectionPool(Singleton): 19 | """ 20 | recycle is True, restore connections for reuse 21 | """ 22 | 23 | def __init__( 24 | self, 25 | recycle=True, 26 | max_pool=MAX_CONNECTION_POOL, 27 | max_tasks=MAX_POOL_TASKS, 28 | recheck_internal=DEFAULT_RECHECK_INTERNAL, 29 | loop=None, 30 | ): 31 | if hasattr(self, "_initiated"): 32 | return None 33 | 34 | logger.debug("instantiate ConnectionPool") 35 | 36 | self._initiated = True 37 | self.recycle = recycle 38 | self.max_pool = max_pool # overall pool 39 | self.max_tasks = max_tasks # per-key limit 40 | self.loop = loop or asyncio.get_event_loop() 41 | self.__connections = defaultdict(deque) 42 | self.__connection_sizes = defaultdict(int) 43 | self.__recheck_internal = recheck_internal 44 | self.__call_count = 0 45 | 46 | asyncio.ensure_future(self._keep_alive_watcher(), loop=loop) 47 | 48 | def __repr__(self): 49 | conns = ", ".join( 50 | [f"{key}: {len(conns)}" for key, conns in self.__connections.items()] 51 | ) 52 | size = len(self) 53 | return f"" 54 | 55 | def __len__(self) -> int: 56 | return len(self.__connections or []) 57 | 58 | async def _keep_alive_watcher(self): 59 | # recheck connections for each MAX_KEEP_ALIVE_TIME 60 | while True: 61 | await asyncio.sleep(MAX_KEEP_ALIVE_TIME) 62 | try: 63 | self.recheck_connections() 64 | except Exception as err: 65 | logger.error("[ConnectionPool._keep_alive_watcher]: {}".format(err)) 66 | 67 | def get_connections(self, key): 68 | return self.__connections[key] 69 | 70 | async def get_connection(self, key, recycle=None, timeout=None): 71 | logger.debug( 72 | "[ConnectionPool.get_connection]: " "{}, recycle: {}".format(key, recycle) 73 | ) 74 | 75 | if recycle is None: 76 | recycle = self.recycle 77 | 78 | if recycle is False: 79 | return self.make_connection(key, recycle=recycle, timeout=timeout) 80 | 81 | conns = self.__connections[key] 82 | while len(conns): 83 | conn = conns.popleft() 84 | self.count_connections(key, -1) 85 | if not conn.stale(): 86 | return conn 87 | else: 88 | conn.close() 89 | 90 | if not conns: 91 | del self.__connections[key] 92 | 93 | conn = self.make_connection(key, recycle=recycle, timeout=timeout) 94 | return conn 95 | 96 | def make_connection(self, key, recycle=None, timeout=None): 97 | logger.debug( 98 | "[ConnectionPool.make_connection]" ": {}, recycle: {}".format(key, recycle) 99 | ) 100 | 101 | if recycle is None: 102 | recycle = self.recycle 103 | 104 | ip, port, ssl, *_ = key 105 | conn = Connection( 106 | ip, port, ssl=ssl, key=key, recycle=recycle, timeout=timeout, loop=self.loop 107 | ) 108 | return conn 109 | 110 | def recycle_connection(self, conn): 111 | logger.debug("[ConnectionPool.recycle_connection]: {}".format(conn)) 112 | 113 | if conn.recycle and not conn.stale() and not conn.is_timeout(): 114 | key = conn.key 115 | conns = self.__connections[key] 116 | if len(conns) < self.max_tasks or len(self.__connections) < self.max_pool: 117 | conns.append(conn) 118 | self.count_connections(key, 1) 119 | return None 120 | conn.close() 121 | 122 | def recheck_connections(self): 123 | logger.debug("[ConnectionPool.recheck_connections]: {!r}".format(self)) 124 | 125 | empty_conns = [] 126 | for key in self.__connections: 127 | # to ignore "RuntimeError: dictionary changed size during iteration" 128 | # when iterating a dictionary 129 | conns = self.__connections[key] 130 | conn_num = len(conns) 131 | for _ in range(conn_num): 132 | conn = conns.popleft() 133 | self.count_connections(key, -1) 134 | self.recycle_connection(conn) 135 | if not conns: 136 | empty_conns.append(key) 137 | 138 | for key in empty_conns: 139 | del self.__connections[key] 140 | 141 | def count_connections(self, key, incr): 142 | if self.__connection_sizes[key] > 0: 143 | self.__connection_sizes[key] += incr 144 | else: 145 | del self.__connection_sizes[key] 146 | 147 | def clear(self): 148 | """ 149 | Close all connnections 150 | """ 151 | 152 | logger.debug("[ConnectionPool.clear]") 153 | 154 | for key in self.__connections: 155 | conns = self.__connections[key] 156 | while len(conns): 157 | conn = conns.popleft() 158 | self.count_connections(key, -1) 159 | conn.recycle = False 160 | conn.close() 161 | 162 | self.__connections.clear() 163 | 164 | def closed(self): 165 | return self._initiated is None and self.__connections is None 166 | 167 | def close(self): 168 | """ 169 | clear connection_pool and reset the instance to uninitiated 170 | """ 171 | 172 | self.clear() 173 | self._initiated = self.__connections = None 174 | self.__connection_sizes = self.loop = None 175 | -------------------------------------------------------------------------------- /mugen/cookies.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from http.cookies import BaseCookie, Morsel 4 | 5 | 6 | class DictCookie(BaseCookie): 7 | def __init__(self, *args, **kwargs): 8 | super(DictCookie, self).__init__(*args, **kwargs) 9 | 10 | def __repr__(self): 11 | return "".format( 12 | json.dumps(self.get_dict(), ensure_ascii=False) 13 | ) 14 | 15 | def get_dict(self): 16 | dictionary = {} 17 | for key, value in self.items(): 18 | if isinstance(value, Morsel): 19 | value = value.value 20 | 21 | dictionary[key] = value 22 | return dictionary 23 | 24 | def format_cookie(self): 25 | return " ".join( 26 | ["{}={};".format(key, value) for key, value in self.get_dict().items()] 27 | ) 28 | -------------------------------------------------------------------------------- /mugen/exceptions.py: -------------------------------------------------------------------------------- 1 | class NotFindIP(Exception): 2 | pass 3 | 4 | 5 | class RedirectLoop(Exception): 6 | pass 7 | 8 | 9 | class TooManyRedirections(Exception): 10 | pass 11 | 12 | 13 | class ConnectionIsStale(Exception): 14 | pass 15 | 16 | 17 | class UnknownProxyScheme(Exception): 18 | pass 19 | 20 | 21 | class CanNotCreateConnect(Exception): 22 | pass 23 | -------------------------------------------------------------------------------- /mugen/models.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import asyncio 4 | import socket 5 | import base64 6 | from urllib.parse import urlparse, ParseResult 7 | 8 | from http.cookies import SimpleCookie, Morsel 9 | from collections import OrderedDict 10 | 11 | from mugen.cookies import DictCookie 12 | from mugen.exceptions import NotFindIP 13 | from mugen.structures import CaseInsensitiveDict 14 | from mugen.utils import ( 15 | default_headers, 16 | url_params_encode, 17 | form_encode, 18 | decode_gzip, 19 | decode_deflate, 20 | find_encoding, 21 | is_ip, 22 | parse_proxy, 23 | base64encode, 24 | ) 25 | 26 | from httptools import HttpResponseParser 27 | 28 | 29 | MAX_CONNECTION_POOL = 100 30 | MAX_POOL_TASKS = 100 31 | MAX_REDIRECTIONS = 1000 32 | MAX_CONNECTION_TIMEOUT = 1 * 60 33 | MAX_KEEP_ALIVE_TIME = 10 * 60 34 | DEFAULT_DNS_CACHE_SIZE = 5000 35 | DEFAULT_REDIRECT_LIMIT = 100 36 | DEFAULT_RECHECK_INTERNAL = 100 37 | HTTP_VERSION = "HTTP/1.1" 38 | DEFAULT_ENCODING = "utf-8" 39 | 40 | 41 | # https://magic.io/blog/uvloop-blazing-fast-python-networking/ 42 | DEFAULT_READ_SIZE = 1024 43 | 44 | logger = logging.getLogger(__name__) 45 | 46 | 47 | class Singleton(object): 48 | def __new__(cls, *args, **kwargs): 49 | if not hasattr(cls, "_instance"): 50 | instance = object.__new__(cls) 51 | cls._instance = instance 52 | return cls._instance 53 | 54 | 55 | class Request(object): 56 | def __init__( 57 | self, 58 | method, 59 | url, 60 | params=None, 61 | headers=None, 62 | data=None, 63 | cookies=None, 64 | proxy=None, 65 | proxy_auth=None, 66 | encoding=None, 67 | ): 68 | self.method = method.upper() 69 | self.url = url 70 | self.params = params or {} 71 | if headers is None: 72 | headers = {} 73 | self.headers = CaseInsensitiveDict(headers or default_headers()) 74 | self.data = data 75 | self.encoding = encoding 76 | if cookies is None: 77 | self.cookies = DictCookie() 78 | else: 79 | self.cookies = cookies 80 | 81 | self.proxy = proxy 82 | 83 | self.proxy_auth = proxy_auth 84 | if not proxy_auth and proxy: 85 | _, _, _, username, password = parse_proxy(proxy) 86 | if username and password: 87 | basic = f"{username}:{password}" 88 | self.proxy_auth = basic 89 | 90 | self.prepare() 91 | 92 | def prepare(self): 93 | parser = urlparse(self.url) 94 | 95 | scheme = parser.scheme 96 | host = parser.netloc 97 | path = parser.path 98 | _params = parser.params 99 | query = parser.query 100 | fragment = parser.fragment 101 | 102 | if self.params: 103 | enc_params = url_params_encode(self.params) 104 | query = "{}&{}".format(query, enc_params) 105 | 106 | self.url_parse_result = ParseResult( 107 | scheme=scheme or "http", 108 | netloc=host, 109 | path=path, 110 | params=_params, 111 | query=query, 112 | fragment=fragment, 113 | ) 114 | 115 | self.ssl = scheme.lower() == "https" 116 | 117 | def make_request(self): 118 | host = self.url_parse_result.netloc 119 | request_line = self.make_request_line() 120 | headers = self.make_request_headers( 121 | self.method, host, self.headers, self.cookies 122 | ) 123 | data = self.make_request_data(self.data) 124 | 125 | # TODO: encoding file 126 | 127 | return request_line, headers, data 128 | 129 | def make_request_line(self): 130 | method = self.method 131 | scheme = self.url_parse_result.scheme 132 | host = self.url_parse_result.netloc 133 | port = self.url_parse_result.port 134 | if not port: 135 | if self.ssl: 136 | port = 443 137 | else: 138 | port = 80 139 | 140 | path = self.url_parse_result.path or "/" 141 | query = self.url_parse_result.query 142 | 143 | if method.lower() == "connect": 144 | request_line = "{} {} {}".format(method, host, HTTP_VERSION) 145 | else: 146 | if self.proxy: 147 | uri = f"{scheme}://{host}{path}" 148 | else: 149 | uri = path 150 | 151 | if query: 152 | uri += "?" + query 153 | request_line = "{} {} {}".format(method, uri, HTTP_VERSION) 154 | return request_line 155 | 156 | def make_request_headers(self, method, host, headers, cookies): 157 | _headers = [] 158 | 159 | if not headers.get("host"): 160 | _headers.append("Host: " + host) 161 | 162 | if method.lower() == "post" and not self.data: 163 | _headers.append("Content-Length: 0") 164 | 165 | if self.data: 166 | data = self.make_request_data(self.data) 167 | _headers.append("Content-Length: {}".format(len(data))) 168 | if isinstance(self.data, dict) and not headers.get("Content-Type"): 169 | _headers.append("Content-Type: application/x-www-form-urlencoded") 170 | 171 | # add cookies 172 | if cookies: 173 | if isinstance(cookies, (DictCookie, SimpleCookie)): 174 | _cookies = [] 175 | for k in cookies: 176 | # TODO, path ? 177 | if isinstance(cookies[k], Morsel): 178 | v = cookies[k].value 179 | else: 180 | v = cookies[k] 181 | _cookies.append("{}={};".format(k, v)) 182 | 183 | cookie = "Cookie: " + " ".join(_cookies) 184 | _headers.append(cookie) 185 | elif isinstance(cookies, dict): 186 | _cookies = [] 187 | for k, v in cookies.items(): 188 | _cookies.append("{}={};".format(k, v)) 189 | 190 | cookie = "Cookie: " + " ".join(_cookies) 191 | _headers.append(cookie) 192 | 193 | # Add Proxy-Authorization header 194 | if self.proxy_auth: 195 | basic = base64encode(self.proxy_auth) 196 | proxy_auth = f"Proxy-Authorization: Basic {basic}" 197 | _headers.append(proxy_auth) 198 | _headers.append("Proxy-Connection: Keep-Alive") 199 | 200 | # make headers 201 | for k, v in headers.items(): 202 | _headers.append(k + ": " + v) 203 | return "\r\n".join(_headers) 204 | 205 | def make_request_data(self, data): 206 | if data is None: 207 | return data 208 | 209 | enc_data = None 210 | if isinstance(data, dict): 211 | enc_data = form_encode(data) 212 | elif isinstance(data, str): 213 | enc_data = bytes(data, "utf-8") 214 | elif isinstance(data, bytes): 215 | enc_data = data 216 | else: 217 | TypeError("request data must be str or dict, NOT {!r}".format(data)) 218 | 219 | return enc_data 220 | 221 | 222 | class HttpResonse(object): 223 | def __init__(self, cookies=None, encoding=None): 224 | self.headers = CaseInsensitiveDict() 225 | self.content = b"" 226 | self.encoding = encoding or DEFAULT_ENCODING 227 | if cookies is None: 228 | self.cookies = DictCookie() 229 | else: 230 | self.cookies = cookies 231 | 232 | def on_header(self, name, value): 233 | name = name.decode(self.encoding) 234 | value = value.decode(self.encoding) 235 | if name.lower() == "set-cookie": 236 | self.cookies.load(value) 237 | if self.headers.get(name): 238 | self.headers[name] += ", " + value 239 | return None 240 | self.headers[name] = value 241 | 242 | def on_body(self, value): 243 | self.content += value 244 | 245 | 246 | class Response(object): 247 | def __init__(self, method, connection, encoding=None): 248 | self.method = method 249 | self.connection = connection 250 | self.headers = None 251 | self.content = None 252 | self.cookies = DictCookie() 253 | self.encoding = encoding 254 | self.status_code = None 255 | self.history = [] 256 | self.request = None 257 | 258 | def __repr__(self): 259 | return "".format(self.status_code) 260 | 261 | async def receive(self): 262 | http_response = HttpResonse(cookies=self.cookies, encoding=self.encoding) 263 | http_response_parser = HttpResponseParser(http_response) 264 | 265 | conn = self.connection 266 | 267 | # TODO, handle Maximum amount of incoming data to buffer 268 | chucks = b"" 269 | while True: 270 | chuck = await conn.readline() 271 | chucks += chuck 272 | if chuck == b"\r\n": 273 | break 274 | 275 | http_response_parser.feed_data(chucks) 276 | 277 | self.status_code = http_response_parser.get_status_code() 278 | headers = http_response.headers 279 | self.headers = headers 280 | 281 | # (protocol, status_code, ok), headers, cookies = parse_headers(chucks) 282 | # self.headers = headers 283 | # self.status_code = status_code 284 | # self.cookies = cookies 285 | 286 | # TODO, handle redirect 287 | 288 | body = b"" 289 | if self.method.lower() == "head": # HEAD 290 | self.content = body 291 | return None 292 | 293 | nbytes = headers.get("Content-Length") 294 | if nbytes: 295 | nbytes = int(nbytes) 296 | if nbytes: 297 | body += await conn.read(nbytes) 298 | else: 299 | if headers.get("Transfer-Encoding") == "chunked": 300 | blocks = [] 301 | while True: 302 | size_header = await conn.readline() 303 | if not size_header: 304 | # logging 305 | break 306 | 307 | parts = size_header.split(b";") 308 | size = int(parts[0], 16) 309 | if size: 310 | block = await conn.read(size) 311 | assert len(block) == size, ( 312 | "[Response.receive] [Transfer-Encoding]", 313 | len(block), 314 | size, 315 | ) 316 | blocks.append(block) 317 | 318 | crlf = await conn.readline() 319 | assert crlf == b"\r\n", repr(crlf) 320 | if not size: 321 | break 322 | 323 | body += b"".join(blocks) 324 | else: 325 | # reading until EOF 326 | pass 327 | # body += await conn.read(-1) 328 | 329 | if body and self.headers.get("Content-Encoding", "").lower() == "gzip": 330 | self.content = decode_gzip(body) 331 | elif body and self.headers.get("Content-Encoding", "").lower() == "deflate": 332 | self.content = decode_deflate(body) 333 | else: 334 | self.content = body 335 | 336 | if not self.encoding: 337 | # find charset from content-type 338 | encoding = find_encoding(self.headers.get("Content-Type", "")) 339 | if encoding: 340 | self.encoding = encoding 341 | 342 | @property 343 | def text(self): 344 | # TODO, use chardet to detect charset 345 | encoding = self.encoding or DEFAULT_ENCODING 346 | 347 | return str(self.content, encoding, errors="replace") 348 | 349 | def json(self): 350 | return json.loads(self.text) 351 | 352 | 353 | class DNSCache(Singleton): 354 | """ 355 | DNS Cache 356 | """ 357 | 358 | def __init__(self, size=DEFAULT_DNS_CACHE_SIZE, loop=None): 359 | if hasattr(self, "_initiated"): 360 | return None 361 | 362 | logger.debug("instantiate DNSCache: size: {}".format(size)) 363 | 364 | self._initiated = True 365 | self.__size = size 366 | self.__hosts = OrderedDict() 367 | self.loop = loop or asyncio.get_event_loop() 368 | 369 | def __repr__(self): 370 | return repr(dict(self.__hosts)) 371 | 372 | async def get(self, host, port, uncache=False): 373 | if is_ip(host): 374 | return host, port 375 | 376 | key = (host, port) 377 | if uncache: 378 | ipaddrs = await self.get_ipaddrs(host, port) 379 | ipaddr = self.add_host(key, ipaddrs) 380 | else: 381 | ipaddr = self.__hosts.get(key) 382 | if not ipaddr: 383 | ipaddrs = await self.get_ipaddrs(host, port) 384 | ipaddr = self.add_host(key, ipaddrs) 385 | 386 | self.limit_cache() 387 | 388 | assert ipaddr, NotFindIP(str(key)) 389 | 390 | family, type, proto, canonname, (ip, port, *_) = ipaddr 391 | return ip, port 392 | 393 | async def get_ipaddrs(self, host, port): 394 | ipaddrs = await self.loop.getaddrinfo(host, port, proto=socket.IPPROTO_TCP) 395 | return ipaddrs 396 | 397 | def add_host(self, key, ipaddrs): 398 | for ipaddr in ipaddrs: 399 | family, type, proto, canonname, (ip, port, *_) = ipaddr 400 | if ( 401 | family == socket.AF_INET 402 | and type == socket.SOCK_STREAM 403 | and proto == socket.IPPROTO_TCP 404 | ): 405 | self.__hosts[key] = ipaddr 406 | self.__hosts.move_to_end(key, last=False) # FIFO 407 | return ipaddr 408 | 409 | def limit_cache(self): 410 | while len(self.__hosts) > self.__size: 411 | self.__hosts.popitem() 412 | 413 | def clear(self): 414 | self.__hosts.clear() 415 | -------------------------------------------------------------------------------- /mugen/proxy.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | import logging 3 | import socket 4 | import struct 5 | 6 | from urllib.parse import urlparse 7 | 8 | import mugen 9 | from mugen.utils import is_ip 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class GeneralProxyError(Exception): 15 | pass 16 | 17 | 18 | class SOCKS5AuthError(Exception): 19 | pass 20 | 21 | 22 | class SOCKS5Error(Exception): 23 | pass 24 | 25 | 26 | class ProxyNotPort(Exception): 27 | pass 28 | 29 | 30 | SOCKS4_ERRORS = { 31 | 0x5B: "Request rejected or failed", 32 | 0x5C: "Request rejected because SOCKS server cannot connect to identd on the client", 33 | 0x5D: "Request rejected because the client program and identd report different user-ids", 34 | } 35 | 36 | SOCKS5_ERRORS = { 37 | 0x01: "General SOCKS server failure", 38 | 0x02: "Connection not allowed by ruleset", 39 | 0x03: "Network unreachable", 40 | 0x04: "Host unreachable", 41 | 0x05: "Connection refused", 42 | 0x06: "TTL expired", 43 | 0x07: "Command not supported, or protocol error", 44 | 0x08: "Address type not supported", 45 | } 46 | 47 | 48 | async def get_http_proxy_key(proxy_url, dns_cache): 49 | urlparser = urlparse(proxy_url) 50 | # ssl = urlparser.scheme == 'https' 51 | ssl = False 52 | host = urlparser.netloc.split(":")[0] 53 | port = urlparser.port 54 | 55 | if is_ip(host): 56 | if not port: 57 | raise ProxyNotPort("proxy: {} has not port".format(proxy_url)) 58 | key = (host, port, ssl) 59 | else: 60 | if not port: 61 | port = 80 62 | ip, port = await dns_cache.get(host, port) 63 | key = (ip, port, ssl) 64 | return key 65 | 66 | 67 | async def _make_https_proxy_connection( 68 | conn, 69 | host, 70 | port, 71 | proxy_auth: Optional[str] = None, 72 | recycle=None, 73 | ): 74 | url = "https://" + host 75 | if port: 76 | url = url + f":{port}" 77 | 78 | await mugen.request( 79 | "CONNECT", url, recycle=recycle, proxy_auth=proxy_auth, connection=conn 80 | ) 81 | await conn.ssl_handshake(host) 82 | return conn 83 | 84 | 85 | class Socks5Proxy: 86 | def __init__(self, conn, dest_host, dest_port, ssl, username, password): 87 | self.conn = conn 88 | self.dest_host = dest_host 89 | self.dest_port = dest_port 90 | self.ssl = ssl 91 | self.username = username 92 | self.password = password 93 | 94 | async def init(self): 95 | # 1. connect to socks server and to authorize 96 | await self.auth() 97 | 98 | # 2. let socks server to connect dest_host 99 | await self.connect() 100 | 101 | # 3. SSL/TLS handshake 102 | if self.ssl: 103 | await self.connect_ssl() 104 | 105 | self.conn.socks_on = True 106 | 107 | async def auth(self): 108 | logger.debug("[Socks5Proxy.init.auth]: {}".format(self.conn)) 109 | 110 | # sending the authentication packages we support. 111 | if self.username and self.password: 112 | self.conn.send(b"\x05\x02\x00\x02") 113 | else: 114 | # VER, NMETHODS, and at least 1 METHODS 115 | self.conn.send(b"\x05\x01\x00") 116 | 117 | chosen_auth = await self.conn.read(2) 118 | 119 | if chosen_auth[0:1] != b"\x05": 120 | raise GeneralProxyError("SOCKS5 proxy server sent invalid data") 121 | 122 | if chosen_auth[1:2] == b"\x02": 123 | # Okay, we need to perform a basic username/password 124 | # authentication. 125 | self.conn.send( 126 | b"\x01" 127 | + chr(len(self.username)).encode() 128 | + self.username 129 | + chr(len(self.password)).encode() 130 | + self.password 131 | ) 132 | 133 | auth_status = await self.conn.reader.read() 134 | if auth_status[0:1] != b"\x01": 135 | # Bad response 136 | raise GeneralProxyError("SOCKS5 proxy server sent invalid data") 137 | if auth_status[1:2] != b"\x00": 138 | # Authentication failed 139 | raise SOCKS5AuthError("SOCKS5 authentication failed") 140 | 141 | # No authentication is required if 0x00 142 | elif chosen_auth[1:2] != b"\x00": 143 | # Reaching here is always bad 144 | if chosen_auth[1:2] == b"\xFF": 145 | raise SOCKS5AuthError( 146 | "All offered SOCKS5 authentication methods were rejected" 147 | ) 148 | else: 149 | raise GeneralProxyError("SOCKS5 proxy server sent invalid data") 150 | # Otherwise, authentication succeeded 151 | 152 | async def connect(self): 153 | logger.debug("[Socks5Proxy.init.connect]: {}".format(self.conn)) 154 | 155 | cmd = b"\x01" # CONNECT 156 | # Now we can request the actual connection 157 | header = b"\x05" + cmd + b"\x00" 158 | 159 | family_to_byte = {socket.AF_INET: b"\x01", socket.AF_INET6: b"\x04"} 160 | for family in (socket.AF_INET, socket.AF_INET6, None): 161 | if not family: 162 | self.conn.send( 163 | header 164 | + b"\x03" 165 | + bytes([len(self.dest_host)]) 166 | + self.dest_host.encode("utf-8") 167 | + struct.pack(">H", self.dest_port) 168 | ) 169 | break 170 | 171 | try: 172 | addr_bytes = socket.inet_pton(family, self.dest_host) 173 | self.conn.send( 174 | header 175 | + family_to_byte[family] 176 | + addr_bytes 177 | + struct.pack(">H", self.dest_port) 178 | ) 179 | break 180 | except socket.error: 181 | continue 182 | 183 | # Get the response 184 | resp = await self.conn.read(3) 185 | if resp[0:1] != b"\x05": 186 | raise GeneralProxyError("SOCKS5 proxy server sent invalid data") 187 | 188 | status = ord(resp[1:2]) 189 | if status != 0x00: 190 | # Connection failed: server returned an error 191 | error = SOCKS5_ERRORS.get(status, "Unknown error") 192 | raise SOCKS5Error("{0:#04x}: {1}".format(status, error)) 193 | 194 | # Get the bound address/port 195 | tp = await self.conn.read(1) 196 | if tp == b"\x01": 197 | chk = await self.conn.read(4) 198 | addr = socket.inet_ntoa(chk) 199 | elif tp == b"\x03": 200 | length = await self.conn.read(1) 201 | addr = await self.conn.read(ord(length)) 202 | elif tp == b"\x04": 203 | chk = await self.conn.read(16) 204 | addr = socket.inet_ntop(socket.AF_INET6, chk) 205 | else: 206 | raise GeneralProxyError("SOCKS5 proxy server sent invalid data") 207 | 208 | pt = await self.conn.read(2) 209 | port = struct.unpack(">H", pt)[0] 210 | return addr, port 211 | 212 | async def connect_ssl(self): 213 | logger.debug("[Socks5Proxy.connect_ssl]: {}".format(self.conn)) 214 | await self.conn.ssl_handshake(self.dest_host) 215 | self.conn.ssl_on = True 216 | -------------------------------------------------------------------------------- /mugen/session.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import asyncio 3 | from urllib.parse import urljoin 4 | 5 | from mugen.cookies import DictCookie 6 | from mugen.connection_pool import ConnectionPool 7 | from mugen.connect import Connection 8 | from mugen.adapters import HTTPAdapter 9 | from mugen.structures import CaseInsensitiveDict 10 | from mugen.models import ( 11 | Request, 12 | DNSCache, 13 | DEFAULT_REDIRECT_LIMIT, 14 | MAX_CONNECTION_POOL, 15 | MAX_POOL_TASKS, 16 | MAX_REDIRECTIONS, 17 | DEFAULT_ENCODING, 18 | ) 19 | from mugen.exceptions import RedirectLoop, TooManyRedirections 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class Session(object): 25 | def __init__( 26 | self, 27 | headers=None, 28 | cookies=None, 29 | recycle=True, 30 | encoding=None, 31 | max_pool=MAX_CONNECTION_POOL, 32 | max_tasks=MAX_POOL_TASKS, 33 | loop=None, 34 | ): 35 | logger.debug( 36 | "instantiate Session: " 37 | "max_pool: {}, max_tasks: {}, " 38 | "recycle: {}, encoding: {}".format(max_pool, max_tasks, recycle, encoding) 39 | ) 40 | 41 | self.headers = CaseInsensitiveDict() 42 | if headers: 43 | self.headers.update(headers) 44 | 45 | self.cookies = DictCookie() 46 | if cookies: 47 | self.cookies = DictCookie.update(cookies) 48 | 49 | self.recycle = recycle 50 | self.encoding = encoding 51 | 52 | self.max_redirects = DEFAULT_REDIRECT_LIMIT 53 | self.loop = loop or asyncio.get_event_loop() 54 | 55 | self.connection_pool = ConnectionPool( 56 | recycle=recycle, max_pool=max_pool, max_tasks=max_tasks, loop=self.loop 57 | ) 58 | self.adapter = HTTPAdapter( 59 | self.connection_pool, recycle=recycle, loop=self.loop 60 | ) 61 | self.dns_cache = DNSCache(loop=self.loop) 62 | 63 | async def request( 64 | self, 65 | method, 66 | url, 67 | params=None, 68 | headers=None, 69 | data=None, 70 | cookies=None, 71 | proxy=None, 72 | proxy_auth=None, 73 | allow_redirects=True, 74 | recycle=None, 75 | encoding=None, 76 | timeout=None, 77 | connection=None, 78 | ): 79 | if recycle is None: 80 | recycle = self.recycle 81 | 82 | if allow_redirects: 83 | response = await asyncio.wait_for( 84 | self._redirect( 85 | method, 86 | url, 87 | params=params, 88 | headers=headers, 89 | data=data, 90 | cookies=cookies, 91 | proxy=proxy, 92 | proxy_auth=proxy_auth, 93 | allow_redirects=allow_redirects, 94 | recycle=recycle, 95 | encoding=encoding, 96 | connection=connection, 97 | ), 98 | timeout=timeout, 99 | ) 100 | else: 101 | response = await asyncio.wait_for( 102 | self._request( 103 | method, 104 | url, 105 | params=params, 106 | headers=headers, 107 | data=data, 108 | cookies=cookies, 109 | proxy=proxy, 110 | proxy_auth=proxy_auth, 111 | allow_redirects=allow_redirects, 112 | recycle=recycle, 113 | encoding=encoding, 114 | connection=connection, 115 | ), 116 | timeout=timeout, 117 | ) 118 | 119 | return response 120 | 121 | async def _request( 122 | self, 123 | method, 124 | url, 125 | params=None, 126 | headers=None, 127 | data=None, 128 | cookies=None, 129 | proxy=None, 130 | proxy_auth=None, 131 | allow_redirects=True, 132 | recycle=None, 133 | encoding=None, 134 | connection=None, 135 | ): 136 | logger.debug( 137 | "[Session.request]: " 138 | "method: {}, " 139 | "url: {}, " 140 | "params: {}, " 141 | "headers: {}, " 142 | "data: {}, " 143 | "cookies: {}, " 144 | "proxy: {}".format(method, url, params, headers, data, cookies, proxy) 145 | ) 146 | 147 | encoding = encoding or self.encoding 148 | 149 | if recycle is None: 150 | recycle = self.recycle 151 | 152 | if cookies: 153 | self.cookies.update(cookies) 154 | 155 | if headers is None or not dict(headers): 156 | headers = self.headers 157 | 158 | request = Request( 159 | method, 160 | url, 161 | params=params, 162 | headers=headers, 163 | data=data, 164 | proxy=proxy, 165 | proxy_auth=proxy_auth, 166 | cookies=self.cookies, 167 | encoding=encoding, 168 | ) 169 | 170 | # Make connection 171 | if not connection: 172 | host, *_ = request.url_parse_result.netloc.split(":", 1) 173 | ssl = request.url_parse_result.scheme.lower() == "https" 174 | port = request.url_parse_result.port 175 | if not port: 176 | port = 443 if ssl else 80 177 | 178 | if proxy: 179 | conn = await self.adapter.generate_proxy_connect( 180 | host, port, ssl, proxy, proxy_auth, self.dns_cache, recycle=recycle 181 | ) 182 | else: 183 | conn = await self.adapter.generate_direct_connect( 184 | host, port, ssl, self.dns_cache, recycle=recycle 185 | ) 186 | else: 187 | if not isinstance(connection, Connection): 188 | raise TypeError( 189 | "connection is NOT an instance of Mugen.connect.Connection" 190 | ) 191 | 192 | conn = connection 193 | 194 | try: 195 | # send request 196 | await self.adapter.send_request(conn, request) 197 | except Exception as err: 198 | logger.debug("[Session._request]: send_request error, {}".format(err)) 199 | logger.warning("Close connect at request: %s", conn) 200 | conn.close() 201 | raise err 202 | 203 | try: 204 | # receive response 205 | response = await self.adapter.get_response(method, conn, encoding=encoding) 206 | except Exception as err: 207 | logger.debug("[Session._request]: get_response error, {}".format(err)) 208 | logger.warning("Close connect at response: %s", conn) 209 | conn.close() 210 | raise err 211 | 212 | # update cookies 213 | self.cookies.update(response.cookies) 214 | response.cookies = self.cookies 215 | 216 | if method.lower() != "connect": 217 | self.connection_pool.recycle_connection(conn) 218 | 219 | return response 220 | 221 | async def _redirect( 222 | self, 223 | method, 224 | url, 225 | params=None, 226 | headers=None, 227 | data=None, 228 | cookies=None, 229 | proxy=None, 230 | proxy_auth=None, 231 | allow_redirects=True, 232 | recycle=None, 233 | encoding=None, 234 | connection=None, 235 | ): 236 | if recycle is None: 237 | recycle = self.recycle 238 | 239 | history = [] 240 | _URL = url 241 | base_url = url 242 | redirect_urls = set() 243 | 244 | while True: 245 | if len(redirect_urls) > MAX_REDIRECTIONS: 246 | raise TooManyRedirections(_URL) 247 | 248 | redirect_urls.add(url) 249 | response = await self._request( 250 | method, 251 | url, 252 | params=params, 253 | headers=headers, 254 | data=data, 255 | cookies=cookies, 256 | proxy=proxy, 257 | proxy_auth=proxy_auth, 258 | allow_redirects=allow_redirects, 259 | recycle=recycle, 260 | encoding=encoding, 261 | connection=connection, 262 | ) 263 | 264 | response.request = Request( 265 | method, 266 | url, 267 | params=params, 268 | headers=headers, 269 | data=data, 270 | proxy=proxy, 271 | cookies=cookies, 272 | encoding=encoding, 273 | ) 274 | 275 | if not response.headers.get("Location"): 276 | response.history = history 277 | return response 278 | 279 | # XXX, not store responses in self.history, which could be used by other 280 | # coroutines 281 | 282 | location = response.headers["Location"] 283 | url = urljoin(base_url, location) 284 | base_url = url 285 | 286 | if url in redirect_urls: 287 | raise RedirectLoop(url) 288 | 289 | history.append(response) 290 | 291 | async def head( 292 | self, 293 | url, 294 | params=None, 295 | headers=None, 296 | cookies=None, 297 | proxy=None, 298 | allow_redirects=False, 299 | recycle=None, 300 | encoding=None, 301 | timeout=None, 302 | connection=None, 303 | ): 304 | if recycle is None: 305 | recycle = self.recycle 306 | 307 | response = await self.request( 308 | "HEAD", 309 | url, 310 | params=params, 311 | headers=headers, 312 | cookies=cookies, 313 | proxy=proxy, 314 | allow_redirects=allow_redirects, 315 | recycle=recycle, 316 | encoding=encoding, 317 | timeout=timeout, 318 | connection=connection, 319 | ) 320 | return response 321 | 322 | async def get( 323 | self, 324 | url, 325 | params=None, 326 | headers=None, 327 | cookies=None, 328 | proxy=None, 329 | allow_redirects=True, 330 | recycle=None, 331 | encoding=None, 332 | timeout=None, 333 | connection=None, 334 | ): 335 | if recycle is None: 336 | recycle = self.recycle 337 | 338 | response = await self.request( 339 | "GET", 340 | url, 341 | params=params, 342 | headers=headers, 343 | cookies=cookies, 344 | proxy=proxy, 345 | allow_redirects=allow_redirects, 346 | recycle=recycle, 347 | encoding=encoding, 348 | timeout=timeout, 349 | connection=connection, 350 | ) 351 | return response 352 | 353 | async def post( 354 | self, 355 | url, 356 | params=None, 357 | headers=None, 358 | data=None, 359 | cookies=None, 360 | proxy=None, 361 | allow_redirects=True, 362 | recycle=None, 363 | encoding=None, 364 | timeout=None, 365 | connection=None, 366 | ): 367 | if recycle is None: 368 | recycle = self.recycle 369 | 370 | response = await self.request( 371 | "POST", 372 | url, 373 | params=params, 374 | headers=headers, 375 | data=data, 376 | cookies=cookies, 377 | proxy=proxy, 378 | allow_redirects=allow_redirects, 379 | recycle=recycle, 380 | encoding=encoding, 381 | timeout=timeout, 382 | connection=connection, 383 | ) 384 | return response 385 | 386 | def clear(self): 387 | """ 388 | Reset cookies and headers to empty 389 | """ 390 | 391 | self.cookies.clear() 392 | self.headers = None 393 | 394 | def close(self): 395 | """ 396 | Close this session, all connections and dns cache will be cleaned. 397 | cookies will be set to None 398 | """ 399 | 400 | # self.adapter.close() # No sense 401 | self.connection_pool.clear() 402 | self.dns_cache.clear() 403 | self.headers = self.cookies = self.dns_cache = None 404 | -------------------------------------------------------------------------------- /mugen/structures.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from collections.abc import MutableMapping 3 | 4 | 5 | class CaseInsensitiveDict(MutableMapping): 6 | def __init__(self, *args, **kwargs): 7 | self._store = OrderedDict() 8 | self._map = {} 9 | 10 | self.update(*args, **kwargs) 11 | 12 | def __getitem__(self, key): 13 | if isinstance(key, str): 14 | key = key.lower() 15 | 16 | return self._store[key] 17 | 18 | def __setitem__(self, key, value): 19 | if isinstance(key, str): 20 | lower_key = key.lower() 21 | else: 22 | lower_key = key 23 | 24 | self._store[lower_key] = value 25 | self._map[lower_key] = key 26 | 27 | def __delitem__(self, key): 28 | if isinstance(key, str): 29 | key = key.lower() 30 | 31 | self._store.pop(key) 32 | self._map.pop(key) 33 | 34 | def __iter__(self): 35 | for k in self._map.values(): 36 | yield k 37 | 38 | def __len__(self): 39 | return len(self._store) 40 | -------------------------------------------------------------------------------- /mugen/utils.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import json 3 | import re 4 | import gzip 5 | import zlib 6 | import base64 7 | from urllib.parse import quote as url_quote 8 | from urllib.parse import urlparse 9 | 10 | from mugen.cookies import DictCookie 11 | from mugen.structures import CaseInsensitiveDict 12 | 13 | 14 | def default_headers(): 15 | return { 16 | "User-Agent": "mugen", 17 | "Accept": "*/*", 18 | "Accept-Encoding": "deflate, gzip", 19 | "Connection": "Keep-Alive", 20 | } 21 | 22 | 23 | def str_encode(dt, encoding="utf-8"): 24 | """ 25 | check dt type, then encoding 26 | """ 27 | 28 | if isinstance(dt, str): 29 | return dt.encode(encoding) if encoding else dt 30 | elif isinstance(dt, bytes): 31 | return dt 32 | else: 33 | raise TypeError("argument must be str or bytes, NOT {!r}".format(dt)) 34 | 35 | 36 | def form_encode(data): 37 | """ 38 | form-encode data 39 | """ 40 | 41 | assert isinstance(data, dict), "data must be dict like" 42 | 43 | enc_data = "&".join( 44 | [ 45 | "{}={}".format( 46 | k, 47 | url_quote( 48 | v if isinstance(v, str) else json.dumps(v, ensure_ascii=False) 49 | ), 50 | ) 51 | for k, v in data.items() 52 | ] 53 | ) 54 | return enc_data 55 | 56 | 57 | def url_params_encode(params): 58 | if isinstance(params, str): 59 | return params 60 | elif isinstance(params, bytes): 61 | return params 62 | elif isinstance(params, dict): 63 | _params = [] 64 | for k, v in params.items(): 65 | _params.append(k + "=" + v) 66 | return "&".join(_params) 67 | else: 68 | raise TypeError( 69 | "argument must be str or bytes or dict, NOT {!r}".format(params) 70 | ) 71 | 72 | 73 | _re_ip = re.compile(r"^(http://|https://|)\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}") 74 | 75 | 76 | def is_ip(netloc): 77 | return _re_ip.search(netloc) is not None 78 | 79 | 80 | def parse_headers(lines): 81 | headers = CaseInsensitiveDict() 82 | cookies = DictCookie() 83 | 84 | protocol, status_code, ok = lines[0].decode("utf-8").split(" ", 2) 85 | 86 | for line in lines[1:]: 87 | line = line.decode("utf-8").strip() 88 | if not line: 89 | continue 90 | 91 | index = line.find(": ") 92 | key = line[:index] 93 | value = line[index + 2 :] 94 | 95 | if key.lower() == "set-cookie": 96 | cookies.load(value) 97 | if headers.get(key): 98 | headers[key] += ", " + value 99 | else: 100 | headers[key] = value 101 | 102 | return (protocol, status_code, ok), headers, cookies 103 | 104 | 105 | def parse_proxy(proxy_url): 106 | parser = urlparse(proxy_url) 107 | 108 | proxy_scheme = parser.scheme 109 | if "@" in parser.netloc: 110 | user_pwd, host_port = parser.netloc.split("@", 1) 111 | proxy_host, pt = host_port.split(":", 1) 112 | proxy_port = int(pt) 113 | username, password = user_pwd.split(":", 1) 114 | else: 115 | proxy_host = parser.netloc.split(":")[0] 116 | proxy_port = parser.port 117 | username = None 118 | password = None 119 | return proxy_scheme, proxy_host, proxy_port, username, password 120 | 121 | 122 | def decode_gzip(content): 123 | assert isinstance(content, bytes) 124 | return gzip.decompress(content) 125 | 126 | 127 | def decode_deflate(content): 128 | assert isinstance(content, bytes) 129 | try: 130 | return zlib.decompress(content) 131 | except Exception: 132 | return zlib.decompress(content, -zlib.MAX_WBITS) 133 | 134 | 135 | def find_encoding(content_type): 136 | if "charset" in content_type.lower(): 137 | chucks = content_type.split(";") 138 | for chuck in chucks: 139 | if "charset" in chuck.lower(): 140 | cks = chuck.split("=") 141 | if len(cks) == 1: 142 | return None 143 | else: 144 | return cks[-1].strip() 145 | 146 | 147 | def base64encode(buf: Union[str, bytes]) -> str: 148 | if isinstance(buf, str): 149 | buf = buf.encode("utf-8") 150 | 151 | return base64.b64encode(buf).decode("utf-8") 152 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "mugen" 3 | homepage = "https://github.com/PeterDing/mugen" 4 | version = "0.6.1" 5 | description = "Mugen - HTTP for Asynchronous Requests" 6 | authors = ["PeterDing "] 7 | license = "MIT" 8 | readme = "README.md" 9 | classifiers = [ 10 | "Intended Audience :: Developers", 11 | "Operating System :: MacOS", 12 | "Operating System :: POSIX :: Linux", 13 | "Programming Language :: Python :: 3", 14 | "Programming Language :: Python :: 3.7", 15 | "Programming Language :: Python :: 3.8", 16 | "Programming Language :: Python :: 3.9", 17 | "Programming Language :: Python :: 3.10", 18 | ] 19 | 20 | [tool.poetry.dependencies] 21 | python = "^3.7" 22 | httptools = "^0.6.1" 23 | 24 | [tool.poetry.dev-dependencies] 25 | pytest = "^6.2.4" 26 | mypy = "^0.910" 27 | black = "^21.6b0" 28 | 29 | [build-system] 30 | requires = ["poetry-core>=1.0.0"] 31 | build-backend = "poetry.core.masonry.api" 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # This is a shim to hopefully allow Github to detect the package, build is done with poetry 4 | 5 | import setuptools 6 | 7 | if __name__ == "__main__": 8 | setuptools.setup(name="mugen") 9 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterDing/mugen/70b79eaf47bb3e1ab0905a9eeee0a083d9766793/tests/__init__.py -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import mugen 3 | 4 | 5 | def test(): 6 | loop = asyncio.get_event_loop() 7 | 8 | async def test_recycle(): 9 | session = mugen.session(recycle=False) 10 | await session.get("http://baidu.com") 11 | assert len(session.connection_pool) == 0 12 | 13 | session = mugen.session(recycle=True) 14 | await session.get("http://baidu.com") 15 | assert len(session.connection_pool) == 1 16 | 17 | loop.run_until_complete(test_recycle()) 18 | 19 | async def test_head(): 20 | resp = await mugen.head("http://httpbin.org") 21 | assert len(resp.headers.items()) > 0 22 | assert len(resp.text) == 0 23 | 24 | loop.run_until_complete(test_head()) 25 | 26 | async def test_get(): 27 | resp = await mugen.get("http://www.baidu.com/") 28 | assert resp.text.startswith("") 29 | 30 | loop.run_until_complete(test_get()) 31 | 32 | async def test_post(): 33 | resp = await mugen.post("http://httpbin.org/post", data={"k": "v"}) 34 | assert resp.json()["form"] == {"k": "v"} 35 | 36 | loop.run_until_complete(test_post()) 37 | 38 | async def test_cookies(): 39 | resp = await mugen.get("http://httpbin.org/cookies/set?k2=v2&k1=v1") 40 | assert resp.cookies.get_dict() == {"k1": "v1", "k2": "v2"} 41 | 42 | loop.run_until_complete(test_cookies()) 43 | 44 | async def test_session_cookies(): 45 | ss = mugen.session() 46 | resp = await ss.get( 47 | "http://httpbin.org/cookies/set?k2=v2&k1=v1", cookies={"a": 1, "b": 2} 48 | ) 49 | 50 | assert resp.cookies.get_dict() == {"k1": "v1", "k2": "v2", "a": 1, "b": 2} 51 | assert ss.cookies.get_dict() == {"k1": "v1", "k2": "v2", "a": 1, "b": 2} 52 | 53 | loop.run_until_complete(test_session_cookies()) 54 | 55 | async def test_timeout(): 56 | try: 57 | await mugen.get("http://httpbin.org/ip", timeout=0.01) 58 | assert "No timeout" 59 | except Exception as err: 60 | assert isinstance(err, asyncio.TimeoutError) 61 | 62 | loop.run_until_complete(test_timeout()) 63 | --------------------------------------------------------------------------------