├── .gitignore ├── index.json ├── indexit.py ├── myrequests ├── __init__.py ├── adapters.py ├── api.py ├── auth.py ├── cacert.pem ├── certs.py ├── compat.py ├── cookies.py ├── exceptions.py ├── hooks.py ├── models.py ├── packages │ ├── README.rst │ ├── __init__.py │ ├── chardet │ │ ├── __init__.py │ │ ├── big5freq.py │ │ ├── big5prober.py │ │ ├── chardetect.py │ │ ├── chardistribution.py │ │ ├── charsetgroupprober.py │ │ ├── charsetprober.py │ │ ├── codingstatemachine.py │ │ ├── compat.py │ │ ├── constants.py │ │ ├── cp949prober.py │ │ ├── escprober.py │ │ ├── escsm.py │ │ ├── eucjpprober.py │ │ ├── euckrfreq.py │ │ ├── euckrprober.py │ │ ├── euctwfreq.py │ │ ├── euctwprober.py │ │ ├── gb2312freq.py │ │ ├── gb2312prober.py │ │ ├── hebrewprober.py │ │ ├── jisfreq.py │ │ ├── jpcntx.py │ │ ├── langbulgarianmodel.py │ │ ├── langcyrillicmodel.py │ │ ├── langgreekmodel.py │ │ ├── langhebrewmodel.py │ │ ├── langhungarianmodel.py │ │ ├── langthaimodel.py │ │ ├── latin1prober.py │ │ ├── mbcharsetprober.py │ │ ├── mbcsgroupprober.py │ │ ├── mbcssm.py │ │ ├── sbcharsetprober.py │ │ ├── sbcsgroupprober.py │ │ ├── sjisprober.py │ │ ├── universaldetector.py │ │ └── utf8prober.py │ └── urllib3 │ │ ├── __init__.py │ │ ├── _collections.py │ │ ├── connection.py │ │ ├── connectionpool.py │ │ ├── connectionpool.py~ │ │ ├── contrib │ │ ├── __init__.py │ │ ├── ntlmpool.py │ │ └── pyopenssl.py │ │ ├── exceptions.py │ │ ├── fields.py │ │ ├── filepost.py │ │ ├── packages │ │ ├── __init__.py │ │ ├── ordered_dict.py │ │ ├── six.py │ │ └── ssl_match_hostname │ │ │ ├── __init__.py │ │ │ └── _implementation.py │ │ ├── poolmanager.py │ │ ├── request.py │ │ ├── response.py │ │ └── util │ │ ├── __init__.py │ │ ├── connection.py │ │ ├── request.py │ │ ├── response.py │ │ ├── retry.py │ │ ├── ssl_.py │ │ ├── timeout.py │ │ └── url.py ├── sessions.py ├── status_codes.py ├── structures.py └── utils.py ├── providers.txt ├── pyhp_server.py ├── pymultihash ├── __init__.py ├── base58.py └── pyMultiHash.py ├── readme.md └── web ├── cacheit.pyhp ├── css ├── main.css └── main.css~ ├── index.html ├── index.pyhp ├── js └── live_bg.js └── searchit.pyhp /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/* 2 | *.pyc 3 | -------------------------------------------------------------------------------- /index.json: -------------------------------------------------------------------------------- 1 | {"247940973485701776233985278752134197441944278805763437625145466167693730995444243438226614686605966479783911911996581878271923915637803741338416641610741239559262819947011238740134130313980511122350382048892227490289890084566802467610334359632997812141965131758284948715859770985812894985921710465263132240674766285595154613536524252194884890217922002050210870166744686477405957132438088180508354900132687284119912120786878482238512930053732296817721373564454808178461189879135868479116859955661811119507610265849435135932338825491685587634584796718256593241893564973945616470443599542612899069648352258652151810380975563158421876455745052846118094939082635782441474986264995056655326928252930161911709570059183186718288712625942616127340986835098518340902113509786553902": "QmUvYGTjMKarxnjg7Gg9AV8dF2eHBbgqVvtDdaL7ujm62Y"} -------------------------------------------------------------------------------- /indexit.py: -------------------------------------------------------------------------------- 1 | import pymultihash as pmh 2 | import re 3 | from bs4 import BeautifulSoup 4 | import myrequests as requests 5 | import base64 6 | 7 | IPFSGateway = "http://blamestross.com/ipfs/" 8 | 9 | INDEX_PATH = "index.json" 10 | 11 | 12 | def onecount(bloomint): 13 | count = 0 14 | while bloomint>0: 15 | count += bloomint % 2 16 | bloomint //= 2 17 | return count 18 | 19 | 20 | def generateBloomFilter(wordlist): 21 | f = 0 22 | j = 0 23 | for w in wordlist: 24 | hashInt = 0 25 | hashVal = pmh.genHash(w, 0x12) 26 | for i in range(0, 10): 27 | try: 28 | tmpInt = 2**256-1 29 | for j in range(0,10): 30 | tmpInt &= pmh.parseHash(hashVal) 31 | hashVal = pmh.genHash(hashVal, 0x12) 32 | hashInt = (hashInt << 256) | tmpInt 33 | 34 | except Exception as e: 35 | print("error ",e) 36 | print(hashVal, w, i, j, len(wordlist)) 37 | hashVal = pmh.genHash(hashVal, 0x12) 38 | f |= hashInt 39 | 40 | j += 1 41 | return f 42 | 43 | 44 | def wordInFilter(bloomInt, testWord): 45 | hashVal = pmh.genHash(testWord, 0x12) 46 | hashInt = pmh.parseHash(hashVal) 47 | return (bloomInt & hashInt) == hashInt 48 | 49 | 50 | def filterInFilter(bloomInt, testInt): 51 | return (bloomInt & testInt) == testInt 52 | 53 | 54 | def tokenizeHTML(html): 55 | raw = BeautifulSoup(html, 'html.parser').get_text() 56 | wordlist = map(lambda x: x.strip().lower(), re.split(r'[ \n\t]', raw)) 57 | longlist = filter(lambda x: len(x) > 1, wordlist) 58 | return list(set(longlist)) 59 | 60 | 61 | def indexFile(IPFSHash): 62 | path = IPFSGateway+IPFSHash 63 | req = requests.get(path) 64 | print("got request") 65 | rawText = req.text 66 | bloom = generateBloomFilter(tokenizeHTML(rawText)) 67 | return bloom 68 | -------------------------------------------------------------------------------- /myrequests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | Requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('http://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2015 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | 42 | """ 43 | 44 | __title__ = 'requests' 45 | __version__ = '2.7.0' 46 | __build__ = 0x020700 47 | __author__ = 'Kenneth Reitz' 48 | __license__ = 'Apache 2.0' 49 | __copyright__ = 'Copyright 2015 Kenneth Reitz' 50 | 51 | # Attempt to enable urllib3's SNI support, if possible 52 | try: 53 | from .packages.urllib3.contrib import pyopenssl 54 | pyopenssl.inject_into_urllib3() 55 | except ImportError: 56 | pass 57 | 58 | from . import utils 59 | from .models import Request, Response, PreparedRequest 60 | from .api import request, get, head, post, patch, put, delete, options 61 | from .sessions import session, Session 62 | from .status_codes import codes 63 | from .exceptions import ( 64 | RequestException, Timeout, URLRequired, 65 | TooManyRedirects, HTTPError, ConnectionError 66 | ) 67 | 68 | # Set default logging handler to avoid "No handler found" warnings. 69 | import logging 70 | try: # Python 2.7+ 71 | from logging import NullHandler 72 | except ImportError: 73 | class NullHandler(logging.Handler): 74 | def emit(self, record): 75 | pass 76 | 77 | logging.getLogger(__name__).addHandler(NullHandler()) 78 | -------------------------------------------------------------------------------- /myrequests/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.api 5 | ~~~~~~~~~~~~ 6 | 7 | This module implements the Requests API. 8 | 9 | :copyright: (c) 2012 by Kenneth Reitz. 10 | :license: Apache2, see LICENSE for more details. 11 | 12 | """ 13 | 14 | from . import sessions 15 | 16 | 17 | def request(method, url, **kwargs): 18 | """Constructs and sends a :class:`Request `. 19 | 20 | :param method: method for the new :class:`Request` object. 21 | :param url: URL for the new :class:`Request` object. 22 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 23 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 24 | :param json: (optional) json data to send in the body of the :class:`Request`. 25 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. 26 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. 27 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. 28 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. 29 | :param timeout: (optional) How long to wait for the server to send data 30 | before giving up, as a float, or a (`connect timeout, read timeout 31 | `_) tuple. 32 | :type timeout: float or tuple 33 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. 34 | :type allow_redirects: bool 35 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. 36 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. 37 | :param stream: (optional) if ``False``, the response content will be immediately downloaded. 38 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. 39 | :return: :class:`Response ` object 40 | :rtype: requests.Response 41 | 42 | Usage:: 43 | 44 | >>> import requests 45 | >>> req = requests.request('GET', 'http://httpbin.org/get') 46 | 47 | """ 48 | 49 | session = sessions.Session() 50 | response = session.request(method=method, url=url, **kwargs) 51 | # By explicitly closing the session, we avoid leaving sockets open which 52 | # can trigger a ResourceWarning in some cases, and look like a memory leak 53 | # in others. 54 | session.close() 55 | return response 56 | 57 | 58 | def get(url, params=None, **kwargs): 59 | """Sends a GET request. 60 | 61 | :param url: URL for the new :class:`Request` object. 62 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 63 | :param \*\*kwargs: Optional arguments that ``request`` takes. 64 | :return: :class:`Response ` object 65 | :rtype: requests.Response 66 | """ 67 | 68 | kwargs.setdefault('allow_redirects', True) 69 | return request('get', url, params=params, **kwargs) 70 | 71 | 72 | def options(url, **kwargs): 73 | """Sends a OPTIONS request. 74 | 75 | :param url: URL for the new :class:`Request` object. 76 | :param \*\*kwargs: Optional arguments that ``request`` takes. 77 | :return: :class:`Response ` object 78 | :rtype: requests.Response 79 | """ 80 | 81 | kwargs.setdefault('allow_redirects', True) 82 | return request('options', url, **kwargs) 83 | 84 | 85 | def head(url, **kwargs): 86 | """Sends a HEAD request. 87 | 88 | :param url: URL for the new :class:`Request` object. 89 | :param \*\*kwargs: Optional arguments that ``request`` takes. 90 | :return: :class:`Response ` object 91 | :rtype: requests.Response 92 | """ 93 | 94 | kwargs.setdefault('allow_redirects', False) 95 | return request('head', url, **kwargs) 96 | 97 | 98 | def post(url, data=None, json=None, **kwargs): 99 | """Sends a POST request. 100 | 101 | :param url: URL for the new :class:`Request` object. 102 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 103 | :param json: (optional) json data to send in the body of the :class:`Request`. 104 | :param \*\*kwargs: Optional arguments that ``request`` takes. 105 | :return: :class:`Response ` object 106 | :rtype: requests.Response 107 | """ 108 | 109 | return request('post', url, data=data, json=json, **kwargs) 110 | 111 | 112 | def put(url, data=None, **kwargs): 113 | """Sends a PUT request. 114 | 115 | :param url: URL for the new :class:`Request` object. 116 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 117 | :param \*\*kwargs: Optional arguments that ``request`` takes. 118 | :return: :class:`Response ` object 119 | :rtype: requests.Response 120 | """ 121 | 122 | return request('put', url, data=data, **kwargs) 123 | 124 | 125 | def patch(url, data=None, **kwargs): 126 | """Sends a PATCH request. 127 | 128 | :param url: URL for the new :class:`Request` object. 129 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 130 | :param \*\*kwargs: Optional arguments that ``request`` takes. 131 | :return: :class:`Response ` object 132 | :rtype: requests.Response 133 | """ 134 | 135 | return request('patch', url, data=data, **kwargs) 136 | 137 | 138 | def delete(url, **kwargs): 139 | """Sends a DELETE request. 140 | 141 | :param url: URL for the new :class:`Request` object. 142 | :param \*\*kwargs: Optional arguments that ``request`` takes. 143 | :return: :class:`Response ` object 144 | :rtype: requests.Response 145 | """ 146 | 147 | return request('delete', url, **kwargs) 148 | -------------------------------------------------------------------------------- /myrequests/auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.auth 5 | ~~~~~~~~~~~~~ 6 | 7 | This module contains the authentication handlers for Requests. 8 | """ 9 | 10 | import os 11 | import re 12 | import time 13 | import hashlib 14 | 15 | from base64 import b64encode 16 | 17 | from .compat import urlparse, str 18 | from .cookies import extract_cookies_to_jar 19 | from .utils import parse_dict_header, to_native_string 20 | from .status_codes import codes 21 | 22 | CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' 23 | CONTENT_TYPE_MULTI_PART = 'multipart/form-data' 24 | 25 | 26 | def _basic_auth_str(username, password): 27 | """Returns a Basic Auth string.""" 28 | 29 | authstr = 'Basic ' + to_native_string( 30 | b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() 31 | ) 32 | 33 | return authstr 34 | 35 | 36 | class AuthBase(object): 37 | """Base class that all auth implementations derive from""" 38 | 39 | def __call__(self, r): 40 | raise NotImplementedError('Auth hooks must be callable.') 41 | 42 | 43 | class HTTPBasicAuth(AuthBase): 44 | """Attaches HTTP Basic Authentication to the given Request object.""" 45 | def __init__(self, username, password): 46 | self.username = username 47 | self.password = password 48 | 49 | def __call__(self, r): 50 | r.headers['Authorization'] = _basic_auth_str(self.username, self.password) 51 | return r 52 | 53 | 54 | class HTTPProxyAuth(HTTPBasicAuth): 55 | """Attaches HTTP Proxy Authentication to a given Request object.""" 56 | def __call__(self, r): 57 | r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) 58 | return r 59 | 60 | 61 | class HTTPDigestAuth(AuthBase): 62 | """Attaches HTTP Digest Authentication to the given Request object.""" 63 | def __init__(self, username, password): 64 | self.username = username 65 | self.password = password 66 | self.last_nonce = '' 67 | self.nonce_count = 0 68 | self.chal = {} 69 | self.pos = None 70 | self.num_401_calls = 1 71 | 72 | def build_digest_header(self, method, url): 73 | 74 | realm = self.chal['realm'] 75 | nonce = self.chal['nonce'] 76 | qop = self.chal.get('qop') 77 | algorithm = self.chal.get('algorithm') 78 | opaque = self.chal.get('opaque') 79 | 80 | if algorithm is None: 81 | _algorithm = 'MD5' 82 | else: 83 | _algorithm = algorithm.upper() 84 | # lambdas assume digest modules are imported at the top level 85 | if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': 86 | def md5_utf8(x): 87 | if isinstance(x, str): 88 | x = x.encode('utf-8') 89 | return hashlib.md5(x).hexdigest() 90 | hash_utf8 = md5_utf8 91 | elif _algorithm == 'SHA': 92 | def sha_utf8(x): 93 | if isinstance(x, str): 94 | x = x.encode('utf-8') 95 | return hashlib.sha1(x).hexdigest() 96 | hash_utf8 = sha_utf8 97 | 98 | KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) 99 | 100 | if hash_utf8 is None: 101 | return None 102 | 103 | # XXX not implemented yet 104 | entdig = None 105 | p_parsed = urlparse(url) 106 | #: path is request-uri defined in RFC 2616 which should not be empty 107 | path = p_parsed.path or "/" 108 | if p_parsed.query: 109 | path += '?' + p_parsed.query 110 | 111 | A1 = '%s:%s:%s' % (self.username, realm, self.password) 112 | A2 = '%s:%s' % (method, path) 113 | 114 | HA1 = hash_utf8(A1) 115 | HA2 = hash_utf8(A2) 116 | 117 | if nonce == self.last_nonce: 118 | self.nonce_count += 1 119 | else: 120 | self.nonce_count = 1 121 | ncvalue = '%08x' % self.nonce_count 122 | s = str(self.nonce_count).encode('utf-8') 123 | s += nonce.encode('utf-8') 124 | s += time.ctime().encode('utf-8') 125 | s += os.urandom(8) 126 | 127 | cnonce = (hashlib.sha1(s).hexdigest()[:16]) 128 | if _algorithm == 'MD5-SESS': 129 | HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) 130 | 131 | if qop is None: 132 | respdig = KD(HA1, "%s:%s" % (nonce, HA2)) 133 | elif qop == 'auth' or 'auth' in qop.split(','): 134 | noncebit = "%s:%s:%s:%s:%s" % ( 135 | nonce, ncvalue, cnonce, 'auth', HA2 136 | ) 137 | respdig = KD(HA1, noncebit) 138 | else: 139 | # XXX handle auth-int. 140 | return None 141 | 142 | self.last_nonce = nonce 143 | 144 | # XXX should the partial digests be encoded too? 145 | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 146 | 'response="%s"' % (self.username, realm, nonce, path, respdig) 147 | if opaque: 148 | base += ', opaque="%s"' % opaque 149 | if algorithm: 150 | base += ', algorithm="%s"' % algorithm 151 | if entdig: 152 | base += ', digest="%s"' % entdig 153 | if qop: 154 | base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) 155 | 156 | return 'Digest %s' % (base) 157 | 158 | def handle_redirect(self, r, **kwargs): 159 | """Reset num_401_calls counter on redirects.""" 160 | if r.is_redirect: 161 | self.num_401_calls = 1 162 | 163 | def handle_401(self, r, **kwargs): 164 | """Takes the given response and tries digest-auth, if needed.""" 165 | 166 | if self.pos is not None: 167 | # Rewind the file position indicator of the body to where 168 | # it was to resend the request. 169 | r.request.body.seek(self.pos) 170 | num_401_calls = getattr(self, 'num_401_calls', 1) 171 | s_auth = r.headers.get('www-authenticate', '') 172 | 173 | if 'digest' in s_auth.lower() and num_401_calls < 2: 174 | 175 | self.num_401_calls += 1 176 | pat = re.compile(r'digest ', flags=re.IGNORECASE) 177 | self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) 178 | 179 | # Consume content and release the original connection 180 | # to allow our new request to reuse the same one. 181 | r.content 182 | r.close() 183 | prep = r.request.copy() 184 | extract_cookies_to_jar(prep._cookies, r.request, r.raw) 185 | prep.prepare_cookies(prep._cookies) 186 | 187 | prep.headers['Authorization'] = self.build_digest_header( 188 | prep.method, prep.url) 189 | _r = r.connection.send(prep, **kwargs) 190 | _r.history.append(r) 191 | _r.request = prep 192 | 193 | return _r 194 | 195 | self.num_401_calls = 1 196 | return r 197 | 198 | def __call__(self, r): 199 | # If we have a saved nonce, skip the 401 200 | if self.last_nonce: 201 | r.headers['Authorization'] = self.build_digest_header(r.method, r.url) 202 | try: 203 | self.pos = r.body.tell() 204 | except AttributeError: 205 | # In the case of HTTPDigestAuth being reused and the body of 206 | # the previous request was a file-like object, pos has the 207 | # file position of the previous body. Ensure it's set to 208 | # None. 209 | self.pos = None 210 | r.register_hook('response', self.handle_401) 211 | r.register_hook('response', self.handle_redirect) 212 | return r 213 | -------------------------------------------------------------------------------- /myrequests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | certs.py 6 | ~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | import os.path 15 | 16 | try: 17 | from certifi import where 18 | except ImportError: 19 | def where(): 20 | """Return the preferred certificate bundle.""" 21 | # vendored bundle inside Requests 22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 23 | 24 | if __name__ == '__main__': 25 | print(where()) 26 | -------------------------------------------------------------------------------- /myrequests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | pythoncompat 5 | """ 6 | 7 | from .packages import chardet 8 | 9 | import sys 10 | 11 | # ------- 12 | # Pythons 13 | # ------- 14 | 15 | # Syntax sugar. 16 | _ver = sys.version_info 17 | 18 | #: Python 2.x? 19 | is_py2 = (_ver[0] == 2) 20 | 21 | #: Python 3.x? 22 | is_py3 = (_ver[0] == 3) 23 | 24 | try: 25 | import simplejson as json 26 | except (ImportError, SyntaxError): 27 | # simplejson does not support Python 3.2, it throws a SyntaxError 28 | # because of u'...' Unicode literals. 29 | import json 30 | 31 | # --------- 32 | # Specifics 33 | # --------- 34 | 35 | if is_py2: 36 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 37 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 38 | from urllib2 import parse_http_list 39 | import cookielib 40 | from Cookie import Morsel 41 | from StringIO import StringIO 42 | from .packages.urllib3.packages.ordered_dict import OrderedDict 43 | 44 | builtin_str = str 45 | bytes = str 46 | str = unicode 47 | basestring = basestring 48 | numeric_types = (int, long, float) 49 | 50 | elif is_py3: 51 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 52 | from urllib.request import parse_http_list, getproxies, proxy_bypass 53 | from http import cookiejar as cookielib 54 | from http.cookies import Morsel 55 | from io import StringIO 56 | from collections import OrderedDict 57 | 58 | builtin_str = str 59 | str = str 60 | bytes = bytes 61 | basestring = (str, bytes) 62 | numeric_types = (int, float) 63 | -------------------------------------------------------------------------------- /myrequests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | 9 | """ 10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError 11 | 12 | 13 | class RequestException(IOError): 14 | """There was an ambiguous exception that occurred while handling your 15 | request.""" 16 | 17 | def __init__(self, *args, **kwargs): 18 | """ 19 | Initialize RequestException with `request` and `response` objects. 20 | """ 21 | response = kwargs.pop('response', None) 22 | self.response = response 23 | self.request = kwargs.pop('request', None) 24 | if (response is not None and not self.request and 25 | hasattr(response, 'request')): 26 | self.request = self.response.request 27 | super(RequestException, self).__init__(*args, **kwargs) 28 | 29 | 30 | class HTTPError(RequestException): 31 | """An HTTP error occurred.""" 32 | 33 | 34 | class ConnectionError(RequestException): 35 | """A Connection error occurred.""" 36 | 37 | 38 | class ProxyError(ConnectionError): 39 | """A proxy error occurred.""" 40 | 41 | 42 | class SSLError(ConnectionError): 43 | """An SSL error occurred.""" 44 | 45 | 46 | class Timeout(RequestException): 47 | """The request timed out. 48 | 49 | Catching this error will catch both 50 | :exc:`~requests.exceptions.ConnectTimeout` and 51 | :exc:`~requests.exceptions.ReadTimeout` errors. 52 | """ 53 | 54 | 55 | class ConnectTimeout(ConnectionError, Timeout): 56 | """The request timed out while trying to connect to the remote server. 57 | 58 | Requests that produced this error are safe to retry. 59 | """ 60 | 61 | 62 | class ReadTimeout(Timeout): 63 | """The server did not send any data in the allotted amount of time.""" 64 | 65 | 66 | class URLRequired(RequestException): 67 | """A valid URL is required to make a request.""" 68 | 69 | 70 | class TooManyRedirects(RequestException): 71 | """Too many redirects.""" 72 | 73 | 74 | class MissingSchema(RequestException, ValueError): 75 | """The URL schema (e.g. http or https) is missing.""" 76 | 77 | 78 | class InvalidSchema(RequestException, ValueError): 79 | """See defaults.py for valid schemas.""" 80 | 81 | 82 | class InvalidURL(RequestException, ValueError): 83 | """ The URL provided was somehow invalid. """ 84 | 85 | 86 | class ChunkedEncodingError(RequestException): 87 | """The server declared chunked encoding but sent an invalid chunk.""" 88 | 89 | 90 | class ContentDecodingError(RequestException, BaseHTTPError): 91 | """Failed to decode response content""" 92 | 93 | 94 | class StreamConsumedError(RequestException, TypeError): 95 | """The content for this response was already consumed""" 96 | 97 | 98 | class RetryError(RequestException): 99 | """Custom retries logic failed""" 100 | -------------------------------------------------------------------------------- /myrequests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | 14 | """ 15 | 16 | 17 | HOOKS = ['response'] 18 | 19 | 20 | def default_hooks(): 21 | hooks = {} 22 | for event in HOOKS: 23 | hooks[event] = [] 24 | return hooks 25 | 26 | # TODO: response is the only one 27 | 28 | 29 | def dispatch_hook(key, hooks, hook_data, **kwargs): 30 | """Dispatches a hook dictionary on a given piece of data.""" 31 | 32 | hooks = hooks or dict() 33 | 34 | if key in hooks: 35 | hooks = hooks.get(key) 36 | 37 | if hasattr(hooks, '__call__'): 38 | hooks = [hooks] 39 | 40 | for hook in hooks: 41 | _hook_data = hook(hook_data, **kwargs) 42 | if _hook_data is not None: 43 | hook_data = _hook_data 44 | 45 | return hook_data 46 | -------------------------------------------------------------------------------- /myrequests/packages/README.rst: -------------------------------------------------------------------------------- 1 | If you are planning to submit a pull request to requests with any changes in 2 | this library do not go any further. These are independent libraries which we 3 | vendor into requests. Any changes necessary to these libraries must be made in 4 | them and submitted as separate pull requests to those libraries. 5 | 6 | urllib3 pull requests go here: https://github.com/shazow/urllib3 7 | 8 | chardet pull requests go here: https://github.com/chardet/chardet 9 | -------------------------------------------------------------------------------- /myrequests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import urllib3 4 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/chardistribution.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE, 29 | EUCTW_TYPICAL_DISTRIBUTION_RATIO) 30 | from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE, 31 | EUCKR_TYPICAL_DISTRIBUTION_RATIO) 32 | from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE, 33 | GB2312_TYPICAL_DISTRIBUTION_RATIO) 34 | from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE, 35 | BIG5_TYPICAL_DISTRIBUTION_RATIO) 36 | from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE, 37 | JIS_TYPICAL_DISTRIBUTION_RATIO) 38 | from .compat import wrap_ord 39 | 40 | ENOUGH_DATA_THRESHOLD = 1024 41 | SURE_YES = 0.99 42 | SURE_NO = 0.01 43 | MINIMUM_DATA_THRESHOLD = 3 44 | 45 | 46 | class CharDistributionAnalysis: 47 | def __init__(self): 48 | # Mapping table to get frequency order from char order (get from 49 | # GetOrder()) 50 | self._mCharToFreqOrder = None 51 | self._mTableSize = None # Size of above table 52 | # This is a constant value which varies from language to language, 53 | # used in calculating confidence. See 54 | # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html 55 | # for further detail. 56 | self._mTypicalDistributionRatio = None 57 | self.reset() 58 | 59 | def reset(self): 60 | """reset analyser, clear any state""" 61 | # If this flag is set to True, detection is done and conclusion has 62 | # been made 63 | self._mDone = False 64 | self._mTotalChars = 0 # Total characters encountered 65 | # The number of characters whose frequency order is less than 512 66 | self._mFreqChars = 0 67 | 68 | def feed(self, aBuf, aCharLen): 69 | """feed a character with known length""" 70 | if aCharLen == 2: 71 | # we only care about 2-bytes character in our distribution analysis 72 | order = self.get_order(aBuf) 73 | else: 74 | order = -1 75 | if order >= 0: 76 | self._mTotalChars += 1 77 | # order is valid 78 | if order < self._mTableSize: 79 | if 512 > self._mCharToFreqOrder[order]: 80 | self._mFreqChars += 1 81 | 82 | def get_confidence(self): 83 | """return confidence based on existing data""" 84 | # if we didn't receive any character in our consideration range, 85 | # return negative answer 86 | if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD: 87 | return SURE_NO 88 | 89 | if self._mTotalChars != self._mFreqChars: 90 | r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars) 91 | * self._mTypicalDistributionRatio)) 92 | if r < SURE_YES: 93 | return r 94 | 95 | # normalize confidence (we don't want to be 100% sure) 96 | return SURE_YES 97 | 98 | def got_enough_data(self): 99 | # It is not necessary to receive all data to draw conclusion. 100 | # For charset detection, certain amount of data is enough 101 | return self._mTotalChars > ENOUGH_DATA_THRESHOLD 102 | 103 | def get_order(self, aBuf): 104 | # We do not handle characters based on the original encoding string, 105 | # but convert this encoding string to a number, here called order. 106 | # This allows multiple encodings of a language to share one frequency 107 | # table. 108 | return -1 109 | 110 | 111 | class EUCTWDistributionAnalysis(CharDistributionAnalysis): 112 | def __init__(self): 113 | CharDistributionAnalysis.__init__(self) 114 | self._mCharToFreqOrder = EUCTWCharToFreqOrder 115 | self._mTableSize = EUCTW_TABLE_SIZE 116 | self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO 117 | 118 | def get_order(self, aBuf): 119 | # for euc-TW encoding, we are interested 120 | # first byte range: 0xc4 -- 0xfe 121 | # second byte range: 0xa1 -- 0xfe 122 | # no validation needed here. State machine has done that 123 | first_char = wrap_ord(aBuf[0]) 124 | if first_char >= 0xC4: 125 | return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1 126 | else: 127 | return -1 128 | 129 | 130 | class EUCKRDistributionAnalysis(CharDistributionAnalysis): 131 | def __init__(self): 132 | CharDistributionAnalysis.__init__(self) 133 | self._mCharToFreqOrder = EUCKRCharToFreqOrder 134 | self._mTableSize = EUCKR_TABLE_SIZE 135 | self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO 136 | 137 | def get_order(self, aBuf): 138 | # for euc-KR encoding, we are interested 139 | # first byte range: 0xb0 -- 0xfe 140 | # second byte range: 0xa1 -- 0xfe 141 | # no validation needed here. State machine has done that 142 | first_char = wrap_ord(aBuf[0]) 143 | if first_char >= 0xB0: 144 | return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1 145 | else: 146 | return -1 147 | 148 | 149 | class GB2312DistributionAnalysis(CharDistributionAnalysis): 150 | def __init__(self): 151 | CharDistributionAnalysis.__init__(self) 152 | self._mCharToFreqOrder = GB2312CharToFreqOrder 153 | self._mTableSize = GB2312_TABLE_SIZE 154 | self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO 155 | 156 | def get_order(self, aBuf): 157 | # for GB2312 encoding, we are interested 158 | # first byte range: 0xb0 -- 0xfe 159 | # second byte range: 0xa1 -- 0xfe 160 | # no validation needed here. State machine has done that 161 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) 162 | if (first_char >= 0xB0) and (second_char >= 0xA1): 163 | return 94 * (first_char - 0xB0) + second_char - 0xA1 164 | else: 165 | return -1 166 | 167 | 168 | class Big5DistributionAnalysis(CharDistributionAnalysis): 169 | def __init__(self): 170 | CharDistributionAnalysis.__init__(self) 171 | self._mCharToFreqOrder = Big5CharToFreqOrder 172 | self._mTableSize = BIG5_TABLE_SIZE 173 | self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO 174 | 175 | def get_order(self, aBuf): 176 | # for big5 encoding, we are interested 177 | # first byte range: 0xa4 -- 0xfe 178 | # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe 179 | # no validation needed here. State machine has done that 180 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) 181 | if first_char >= 0xA4: 182 | if second_char >= 0xA1: 183 | return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 184 | else: 185 | return 157 * (first_char - 0xA4) + second_char - 0x40 186 | else: 187 | return -1 188 | 189 | 190 | class SJISDistributionAnalysis(CharDistributionAnalysis): 191 | def __init__(self): 192 | CharDistributionAnalysis.__init__(self) 193 | self._mCharToFreqOrder = JISCharToFreqOrder 194 | self._mTableSize = JIS_TABLE_SIZE 195 | self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO 196 | 197 | def get_order(self, aBuf): 198 | # for sjis encoding, we are interested 199 | # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe 200 | # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe 201 | # no validation needed here. State machine has done that 202 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1]) 203 | if (first_char >= 0x81) and (first_char <= 0x9F): 204 | order = 188 * (first_char - 0x81) 205 | elif (first_char >= 0xE0) and (first_char <= 0xEF): 206 | order = 188 * (first_char - 0xE0 + 31) 207 | else: 208 | return -1 209 | order = order + second_char - 0x40 210 | if second_char > 0x7F: 211 | order = -1 212 | return order 213 | 214 | 215 | class EUCJPDistributionAnalysis(CharDistributionAnalysis): 216 | def __init__(self): 217 | CharDistributionAnalysis.__init__(self) 218 | self._mCharToFreqOrder = JISCharToFreqOrder 219 | self._mTableSize = JIS_TABLE_SIZE 220 | self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO 221 | 222 | def get_order(self, aBuf): 223 | # for euc-JP encoding, we are interested 224 | # first byte range: 0xa0 -- 0xfe 225 | # second byte range: 0xa1 -- 0xfe 226 | # no validation needed here. State machine has done that 227 | char = wrap_ord(aBuf[0]) 228 | if char >= 0xA0: 229 | return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1 230 | else: 231 | return -1 232 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/escsm.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart, eError, eItsMe 29 | 30 | HZ_cls = ( 31 | 1,0,0,0,0,0,0,0, # 00 - 07 32 | 0,0,0,0,0,0,0,0, # 08 - 0f 33 | 0,0,0,0,0,0,0,0, # 10 - 17 34 | 0,0,0,1,0,0,0,0, # 18 - 1f 35 | 0,0,0,0,0,0,0,0, # 20 - 27 36 | 0,0,0,0,0,0,0,0, # 28 - 2f 37 | 0,0,0,0,0,0,0,0, # 30 - 37 38 | 0,0,0,0,0,0,0,0, # 38 - 3f 39 | 0,0,0,0,0,0,0,0, # 40 - 47 40 | 0,0,0,0,0,0,0,0, # 48 - 4f 41 | 0,0,0,0,0,0,0,0, # 50 - 57 42 | 0,0,0,0,0,0,0,0, # 58 - 5f 43 | 0,0,0,0,0,0,0,0, # 60 - 67 44 | 0,0,0,0,0,0,0,0, # 68 - 6f 45 | 0,0,0,0,0,0,0,0, # 70 - 77 46 | 0,0,0,4,0,5,2,0, # 78 - 7f 47 | 1,1,1,1,1,1,1,1, # 80 - 87 48 | 1,1,1,1,1,1,1,1, # 88 - 8f 49 | 1,1,1,1,1,1,1,1, # 90 - 97 50 | 1,1,1,1,1,1,1,1, # 98 - 9f 51 | 1,1,1,1,1,1,1,1, # a0 - a7 52 | 1,1,1,1,1,1,1,1, # a8 - af 53 | 1,1,1,1,1,1,1,1, # b0 - b7 54 | 1,1,1,1,1,1,1,1, # b8 - bf 55 | 1,1,1,1,1,1,1,1, # c0 - c7 56 | 1,1,1,1,1,1,1,1, # c8 - cf 57 | 1,1,1,1,1,1,1,1, # d0 - d7 58 | 1,1,1,1,1,1,1,1, # d8 - df 59 | 1,1,1,1,1,1,1,1, # e0 - e7 60 | 1,1,1,1,1,1,1,1, # e8 - ef 61 | 1,1,1,1,1,1,1,1, # f0 - f7 62 | 1,1,1,1,1,1,1,1, # f8 - ff 63 | ) 64 | 65 | HZ_st = ( 66 | eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 67 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 68 | eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 69 | 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f 70 | 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 71 | 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f 72 | ) 73 | 74 | HZCharLenTable = (0, 0, 0, 0, 0, 0) 75 | 76 | HZSMModel = {'classTable': HZ_cls, 77 | 'classFactor': 6, 78 | 'stateTable': HZ_st, 79 | 'charLenTable': HZCharLenTable, 80 | 'name': "HZ-GB-2312"} 81 | 82 | ISO2022CN_cls = ( 83 | 2,0,0,0,0,0,0,0, # 00 - 07 84 | 0,0,0,0,0,0,0,0, # 08 - 0f 85 | 0,0,0,0,0,0,0,0, # 10 - 17 86 | 0,0,0,1,0,0,0,0, # 18 - 1f 87 | 0,0,0,0,0,0,0,0, # 20 - 27 88 | 0,3,0,0,0,0,0,0, # 28 - 2f 89 | 0,0,0,0,0,0,0,0, # 30 - 37 90 | 0,0,0,0,0,0,0,0, # 38 - 3f 91 | 0,0,0,4,0,0,0,0, # 40 - 47 92 | 0,0,0,0,0,0,0,0, # 48 - 4f 93 | 0,0,0,0,0,0,0,0, # 50 - 57 94 | 0,0,0,0,0,0,0,0, # 58 - 5f 95 | 0,0,0,0,0,0,0,0, # 60 - 67 96 | 0,0,0,0,0,0,0,0, # 68 - 6f 97 | 0,0,0,0,0,0,0,0, # 70 - 77 98 | 0,0,0,0,0,0,0,0, # 78 - 7f 99 | 2,2,2,2,2,2,2,2, # 80 - 87 100 | 2,2,2,2,2,2,2,2, # 88 - 8f 101 | 2,2,2,2,2,2,2,2, # 90 - 97 102 | 2,2,2,2,2,2,2,2, # 98 - 9f 103 | 2,2,2,2,2,2,2,2, # a0 - a7 104 | 2,2,2,2,2,2,2,2, # a8 - af 105 | 2,2,2,2,2,2,2,2, # b0 - b7 106 | 2,2,2,2,2,2,2,2, # b8 - bf 107 | 2,2,2,2,2,2,2,2, # c0 - c7 108 | 2,2,2,2,2,2,2,2, # c8 - cf 109 | 2,2,2,2,2,2,2,2, # d0 - d7 110 | 2,2,2,2,2,2,2,2, # d8 - df 111 | 2,2,2,2,2,2,2,2, # e0 - e7 112 | 2,2,2,2,2,2,2,2, # e8 - ef 113 | 2,2,2,2,2,2,2,2, # f0 - f7 114 | 2,2,2,2,2,2,2,2, # f8 - ff 115 | ) 116 | 117 | ISO2022CN_st = ( 118 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 119 | eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f 120 | eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 121 | eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f 122 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 123 | 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f 124 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 125 | eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f 126 | ) 127 | 128 | ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) 129 | 130 | ISO2022CNSMModel = {'classTable': ISO2022CN_cls, 131 | 'classFactor': 9, 132 | 'stateTable': ISO2022CN_st, 133 | 'charLenTable': ISO2022CNCharLenTable, 134 | 'name': "ISO-2022-CN"} 135 | 136 | ISO2022JP_cls = ( 137 | 2,0,0,0,0,0,0,0, # 00 - 07 138 | 0,0,0,0,0,0,2,2, # 08 - 0f 139 | 0,0,0,0,0,0,0,0, # 10 - 17 140 | 0,0,0,1,0,0,0,0, # 18 - 1f 141 | 0,0,0,0,7,0,0,0, # 20 - 27 142 | 3,0,0,0,0,0,0,0, # 28 - 2f 143 | 0,0,0,0,0,0,0,0, # 30 - 37 144 | 0,0,0,0,0,0,0,0, # 38 - 3f 145 | 6,0,4,0,8,0,0,0, # 40 - 47 146 | 0,9,5,0,0,0,0,0, # 48 - 4f 147 | 0,0,0,0,0,0,0,0, # 50 - 57 148 | 0,0,0,0,0,0,0,0, # 58 - 5f 149 | 0,0,0,0,0,0,0,0, # 60 - 67 150 | 0,0,0,0,0,0,0,0, # 68 - 6f 151 | 0,0,0,0,0,0,0,0, # 70 - 77 152 | 0,0,0,0,0,0,0,0, # 78 - 7f 153 | 2,2,2,2,2,2,2,2, # 80 - 87 154 | 2,2,2,2,2,2,2,2, # 88 - 8f 155 | 2,2,2,2,2,2,2,2, # 90 - 97 156 | 2,2,2,2,2,2,2,2, # 98 - 9f 157 | 2,2,2,2,2,2,2,2, # a0 - a7 158 | 2,2,2,2,2,2,2,2, # a8 - af 159 | 2,2,2,2,2,2,2,2, # b0 - b7 160 | 2,2,2,2,2,2,2,2, # b8 - bf 161 | 2,2,2,2,2,2,2,2, # c0 - c7 162 | 2,2,2,2,2,2,2,2, # c8 - cf 163 | 2,2,2,2,2,2,2,2, # d0 - d7 164 | 2,2,2,2,2,2,2,2, # d8 - df 165 | 2,2,2,2,2,2,2,2, # e0 - e7 166 | 2,2,2,2,2,2,2,2, # e8 - ef 167 | 2,2,2,2,2,2,2,2, # f0 - f7 168 | 2,2,2,2,2,2,2,2, # f8 - ff 169 | ) 170 | 171 | ISO2022JP_st = ( 172 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 173 | eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f 174 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 175 | eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f 176 | eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 177 | eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f 178 | eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 179 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f 180 | eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 181 | ) 182 | 183 | ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 184 | 185 | ISO2022JPSMModel = {'classTable': ISO2022JP_cls, 186 | 'classFactor': 10, 187 | 'stateTable': ISO2022JP_st, 188 | 'charLenTable': ISO2022JPCharLenTable, 189 | 'name': "ISO-2022-JP"} 190 | 191 | ISO2022KR_cls = ( 192 | 2,0,0,0,0,0,0,0, # 00 - 07 193 | 0,0,0,0,0,0,0,0, # 08 - 0f 194 | 0,0,0,0,0,0,0,0, # 10 - 17 195 | 0,0,0,1,0,0,0,0, # 18 - 1f 196 | 0,0,0,0,3,0,0,0, # 20 - 27 197 | 0,4,0,0,0,0,0,0, # 28 - 2f 198 | 0,0,0,0,0,0,0,0, # 30 - 37 199 | 0,0,0,0,0,0,0,0, # 38 - 3f 200 | 0,0,0,5,0,0,0,0, # 40 - 47 201 | 0,0,0,0,0,0,0,0, # 48 - 4f 202 | 0,0,0,0,0,0,0,0, # 50 - 57 203 | 0,0,0,0,0,0,0,0, # 58 - 5f 204 | 0,0,0,0,0,0,0,0, # 60 - 67 205 | 0,0,0,0,0,0,0,0, # 68 - 6f 206 | 0,0,0,0,0,0,0,0, # 70 - 77 207 | 0,0,0,0,0,0,0,0, # 78 - 7f 208 | 2,2,2,2,2,2,2,2, # 80 - 87 209 | 2,2,2,2,2,2,2,2, # 88 - 8f 210 | 2,2,2,2,2,2,2,2, # 90 - 97 211 | 2,2,2,2,2,2,2,2, # 98 - 9f 212 | 2,2,2,2,2,2,2,2, # a0 - a7 213 | 2,2,2,2,2,2,2,2, # a8 - af 214 | 2,2,2,2,2,2,2,2, # b0 - b7 215 | 2,2,2,2,2,2,2,2, # b8 - bf 216 | 2,2,2,2,2,2,2,2, # c0 - c7 217 | 2,2,2,2,2,2,2,2, # c8 - cf 218 | 2,2,2,2,2,2,2,2, # d0 - d7 219 | 2,2,2,2,2,2,2,2, # d8 - df 220 | 2,2,2,2,2,2,2,2, # e0 - e7 221 | 2,2,2,2,2,2,2,2, # e8 - ef 222 | 2,2,2,2,2,2,2,2, # f0 - f7 223 | 2,2,2,2,2,2,2,2, # f8 - ff 224 | ) 225 | 226 | ISO2022KR_st = ( 227 | eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 228 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 229 | eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 230 | eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f 231 | eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 232 | ) 233 | 234 | ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) 235 | 236 | ISO2022KRSMModel = {'classTable': ISO2022KR_cls, 237 | 'classFactor': 6, 238 | 'stateTable': ISO2022KR_st, 239 | 'charLenTable': ISO2022KRCharLenTable, 240 | 'name': "ISO-2022-KR"} 241 | 242 | # flake8: noqa 243 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .constants import eNotMe 31 | from .compat import wrap_ord 32 | 33 | FREQ_CAT_NUM = 4 34 | 35 | UDF = 0 # undefined 36 | OTH = 1 # other 37 | ASC = 2 # ascii capital letter 38 | ASS = 3 # ascii small letter 39 | ACV = 4 # accent capital vowel 40 | ACO = 5 # accent capital other 41 | ASV = 6 # accent small vowel 42 | ASO = 7 # accent small other 43 | CLASS_NUM = 8 # total classes 44 | 45 | Latin1_CharToClass = ( 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 78 | ) 79 | 80 | # 0 : illegal 81 | # 1 : very unlikely 82 | # 2 : normal 83 | # 3 : very likely 84 | Latin1ClassModel = ( 85 | # UDF OTH ASC ASS ACV ACO ASV ASO 86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 94 | ) 95 | 96 | 97 | class Latin1Prober(CharSetProber): 98 | def __init__(self): 99 | CharSetProber.__init__(self) 100 | self.reset() 101 | 102 | def reset(self): 103 | self._mLastCharClass = OTH 104 | self._mFreqCounter = [0] * FREQ_CAT_NUM 105 | CharSetProber.reset(self) 106 | 107 | def get_charset_name(self): 108 | return "windows-1252" 109 | 110 | def feed(self, aBuf): 111 | aBuf = self.filter_with_english_letters(aBuf) 112 | for c in aBuf: 113 | charClass = Latin1_CharToClass[wrap_ord(c)] 114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) 115 | + charClass] 116 | if freq == 0: 117 | self._mState = eNotMe 118 | break 119 | self._mFreqCounter[freq] += 1 120 | self._mLastCharClass = charClass 121 | 122 | return self.get_state() 123 | 124 | def get_confidence(self): 125 | if self.get_state() == eNotMe: 126 | return 0.01 127 | 128 | total = sum(self._mFreqCounter) 129 | if total < 0.01: 130 | confidence = 0.0 131 | else: 132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) 133 | / total) 134 | if confidence < 0.0: 135 | confidence = 0.0 136 | # lower the confidence of latin1 so that other more accurate 137 | # detector can take priority. 138 | confidence = confidence * 0.73 139 | return confidence 140 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import sys 30 | from . import constants 31 | from .charsetprober import CharSetProber 32 | from .compat import wrap_ord 33 | 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | SYMBOL_CAT_ORDER = 250 39 | NUMBER_OF_SEQ_CAT = 4 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 41 | #NEGATIVE_CAT = 0 42 | 43 | 44 | class SingleByteCharSetProber(CharSetProber): 45 | def __init__(self, model, reversed=False, nameProber=None): 46 | CharSetProber.__init__(self) 47 | self._mModel = model 48 | # TRUE if we need to reverse every pair in the model lookup 49 | self._mReversed = reversed 50 | # Optional auxiliary prober for name decision 51 | self._mNameProber = nameProber 52 | self.reset() 53 | 54 | def reset(self): 55 | CharSetProber.reset(self) 56 | # char order of last character 57 | self._mLastOrder = 255 58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT 59 | self._mTotalSeqs = 0 60 | self._mTotalChar = 0 61 | # characters that fall in our sampling range 62 | self._mFreqChar = 0 63 | 64 | def get_charset_name(self): 65 | if self._mNameProber: 66 | return self._mNameProber.get_charset_name() 67 | else: 68 | return self._mModel['charsetName'] 69 | 70 | def feed(self, aBuf): 71 | if not self._mModel['keepEnglishLetter']: 72 | aBuf = self.filter_without_english_letters(aBuf) 73 | aLen = len(aBuf) 74 | if not aLen: 75 | return self.get_state() 76 | for c in aBuf: 77 | order = self._mModel['charToOrderMap'][wrap_ord(c)] 78 | if order < SYMBOL_CAT_ORDER: 79 | self._mTotalChar += 1 80 | if order < SAMPLE_SIZE: 81 | self._mFreqChar += 1 82 | if self._mLastOrder < SAMPLE_SIZE: 83 | self._mTotalSeqs += 1 84 | if not self._mReversed: 85 | i = (self._mLastOrder * SAMPLE_SIZE) + order 86 | model = self._mModel['precedenceMatrix'][i] 87 | else: # reverse the order of the letters in the lookup 88 | i = (order * SAMPLE_SIZE) + self._mLastOrder 89 | model = self._mModel['precedenceMatrix'][i] 90 | self._mSeqCounters[model] += 1 91 | self._mLastOrder = order 92 | 93 | if self.get_state() == constants.eDetecting: 94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: 95 | cf = self.get_confidence() 96 | if cf > POSITIVE_SHORTCUT_THRESHOLD: 97 | if constants._debug: 98 | sys.stderr.write('%s confidence = %s, we have a' 99 | 'winner\n' % 100 | (self._mModel['charsetName'], cf)) 101 | self._mState = constants.eFoundIt 102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD: 103 | if constants._debug: 104 | sys.stderr.write('%s confidence = %s, below negative' 105 | 'shortcut threshhold %s\n' % 106 | (self._mModel['charsetName'], cf, 107 | NEGATIVE_SHORTCUT_THRESHOLD)) 108 | self._mState = constants.eNotMe 109 | 110 | return self.get_state() 111 | 112 | def get_confidence(self): 113 | r = 0.01 114 | if self._mTotalSeqs > 0: 115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs 116 | / self._mModel['mTypicalPositiveRatio']) 117 | r = r * self._mFreqChar / self._mTotalChar 118 | if r >= 1.0: 119 | r = 0.99 120 | return r 121 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return self._mContextAnalyzer.get_charset_name() 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/universaldetector.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import sys 31 | import codecs 32 | from .latin1prober import Latin1Prober # windows-1252 33 | from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets 34 | from .sbcsgroupprober import SBCSGroupProber # single-byte character sets 35 | from .escprober import EscCharSetProber # ISO-2122, etc. 36 | import re 37 | 38 | MINIMUM_THRESHOLD = 0.20 39 | ePureAscii = 0 40 | eEscAscii = 1 41 | eHighbyte = 2 42 | 43 | 44 | class UniversalDetector: 45 | def __init__(self): 46 | self._highBitDetector = re.compile(b'[\x80-\xFF]') 47 | self._escDetector = re.compile(b'(\033|~{)') 48 | self._mEscCharSetProber = None 49 | self._mCharSetProbers = [] 50 | self.reset() 51 | 52 | def reset(self): 53 | self.result = {'encoding': None, 'confidence': 0.0} 54 | self.done = False 55 | self._mStart = True 56 | self._mGotData = False 57 | self._mInputState = ePureAscii 58 | self._mLastChar = b'' 59 | if self._mEscCharSetProber: 60 | self._mEscCharSetProber.reset() 61 | for prober in self._mCharSetProbers: 62 | prober.reset() 63 | 64 | def feed(self, aBuf): 65 | if self.done: 66 | return 67 | 68 | aLen = len(aBuf) 69 | if not aLen: 70 | return 71 | 72 | if not self._mGotData: 73 | # If the data starts with BOM, we know it is UTF 74 | if aBuf[:3] == codecs.BOM_UTF8: 75 | # EF BB BF UTF-8 with BOM 76 | self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} 77 | elif aBuf[:4] == codecs.BOM_UTF32_LE: 78 | # FF FE 00 00 UTF-32, little-endian BOM 79 | self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} 80 | elif aBuf[:4] == codecs.BOM_UTF32_BE: 81 | # 00 00 FE FF UTF-32, big-endian BOM 82 | self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} 83 | elif aBuf[:4] == b'\xFE\xFF\x00\x00': 84 | # FE FF 00 00 UCS-4, unusual octet order BOM (3412) 85 | self.result = { 86 | 'encoding': "X-ISO-10646-UCS-4-3412", 87 | 'confidence': 1.0 88 | } 89 | elif aBuf[:4] == b'\x00\x00\xFF\xFE': 90 | # 00 00 FF FE UCS-4, unusual octet order BOM (2143) 91 | self.result = { 92 | 'encoding': "X-ISO-10646-UCS-4-2143", 93 | 'confidence': 1.0 94 | } 95 | elif aBuf[:2] == codecs.BOM_LE: 96 | # FF FE UTF-16, little endian BOM 97 | self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} 98 | elif aBuf[:2] == codecs.BOM_BE: 99 | # FE FF UTF-16, big endian BOM 100 | self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} 101 | 102 | self._mGotData = True 103 | if self.result['encoding'] and (self.result['confidence'] > 0.0): 104 | self.done = True 105 | return 106 | 107 | if self._mInputState == ePureAscii: 108 | if self._highBitDetector.search(aBuf): 109 | self._mInputState = eHighbyte 110 | elif ((self._mInputState == ePureAscii) and 111 | self._escDetector.search(self._mLastChar + aBuf)): 112 | self._mInputState = eEscAscii 113 | 114 | self._mLastChar = aBuf[-1:] 115 | 116 | if self._mInputState == eEscAscii: 117 | if not self._mEscCharSetProber: 118 | self._mEscCharSetProber = EscCharSetProber() 119 | if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: 120 | self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), 121 | 'confidence': self._mEscCharSetProber.get_confidence()} 122 | self.done = True 123 | elif self._mInputState == eHighbyte: 124 | if not self._mCharSetProbers: 125 | self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), 126 | Latin1Prober()] 127 | for prober in self._mCharSetProbers: 128 | if prober.feed(aBuf) == constants.eFoundIt: 129 | self.result = {'encoding': prober.get_charset_name(), 130 | 'confidence': prober.get_confidence()} 131 | self.done = True 132 | break 133 | 134 | def close(self): 135 | if self.done: 136 | return 137 | if not self._mGotData: 138 | if constants._debug: 139 | sys.stderr.write('no data received!\n') 140 | return 141 | self.done = True 142 | 143 | if self._mInputState == ePureAscii: 144 | self.result = {'encoding': 'ascii', 'confidence': 1.0} 145 | return self.result 146 | 147 | if self._mInputState == eHighbyte: 148 | proberConfidence = None 149 | maxProberConfidence = 0.0 150 | maxProber = None 151 | for prober in self._mCharSetProbers: 152 | if not prober: 153 | continue 154 | proberConfidence = prober.get_confidence() 155 | if proberConfidence > maxProberConfidence: 156 | maxProberConfidence = proberConfidence 157 | maxProber = prober 158 | if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): 159 | self.result = {'encoding': maxProber.get_charset_name(), 160 | 'confidence': maxProber.get_confidence()} 161 | return self.result 162 | 163 | if constants._debug: 164 | sys.stderr.write('no probers hit minimum threshhold\n') 165 | for prober in self._mCharSetProbers[0].mProbers: 166 | if not prober: 167 | continue 168 | sys.stderr.write('%s confidence = %s\n' % 169 | (prober.get_charset_name(), 170 | prober.get_confidence())) 171 | -------------------------------------------------------------------------------- /myrequests/packages/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 6 | __license__ = 'MIT' 7 | __version__ = '1.10.4' 8 | 9 | 10 | from .connectionpool import ( 11 | HTTPConnectionPool, 12 | HTTPSConnectionPool, 13 | connection_from_url 14 | ) 15 | 16 | from . import exceptions 17 | from .filepost import encode_multipart_formdata 18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 19 | from .response import HTTPResponse 20 | from .util.request import make_headers 21 | from .util.url import get_host 22 | from .util.timeout import Timeout 23 | from .util.retry import Retry 24 | 25 | 26 | # Set default logging handler to avoid "No handler found" warnings. 27 | import logging 28 | try: # Python 2.7+ 29 | from logging import NullHandler 30 | except ImportError: 31 | class NullHandler(logging.Handler): 32 | def emit(self, record): 33 | pass 34 | 35 | logging.getLogger(__name__).addHandler(NullHandler()) 36 | 37 | def add_stderr_logger(level=logging.DEBUG): 38 | """ 39 | Helper for quickly adding a StreamHandler to the logger. Useful for 40 | debugging. 41 | 42 | Returns the handler after adding it. 43 | """ 44 | # This method needs to be in this __init__.py to get the __name__ correct 45 | # even if urllib3 is vendored within another package. 46 | logger = logging.getLogger(__name__) 47 | handler = logging.StreamHandler() 48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 49 | logger.addHandler(handler) 50 | logger.setLevel(level) 51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__) 52 | return handler 53 | 54 | # ... Clean up. 55 | del NullHandler 56 | 57 | 58 | import warnings 59 | # SecurityWarning's always go off by default. 60 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True) 61 | # InsecurePlatformWarning's don't vary between requests, so we keep it default. 62 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning, 63 | append=True) 64 | 65 | def disable_warnings(category=exceptions.HTTPWarning): 66 | """ 67 | Helper for quickly disabling all urllib3 warnings. 68 | """ 69 | warnings.simplefilter('ignore', category) 70 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/connection.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys 3 | import socket 4 | from socket import timeout as SocketTimeout 5 | import warnings 6 | from .packages import six 7 | 8 | try: # Python 3 9 | from http.client import HTTPConnection as _HTTPConnection, HTTPException 10 | except ImportError: 11 | from httplib import HTTPConnection as _HTTPConnection, HTTPException 12 | 13 | 14 | class DummyConnection(object): 15 | "Used to detect a failed ConnectionCls import." 16 | pass 17 | 18 | 19 | try: # Compiled with SSL? 20 | HTTPSConnection = DummyConnection 21 | import ssl 22 | BaseSSLError = ssl.SSLError 23 | except (ImportError, AttributeError): # Platform-specific: No SSL. 24 | ssl = None 25 | 26 | class BaseSSLError(BaseException): 27 | pass 28 | 29 | 30 | try: # Python 3: 31 | # Not a no-op, we're adding this to the namespace so it can be imported. 32 | ConnectionError = ConnectionError 33 | except NameError: # Python 2: 34 | class ConnectionError(Exception): 35 | pass 36 | 37 | 38 | from .exceptions import ( 39 | ConnectTimeoutError, 40 | SystemTimeWarning, 41 | SecurityWarning, 42 | ) 43 | from .packages.ssl_match_hostname import match_hostname 44 | 45 | from .util.ssl_ import ( 46 | resolve_cert_reqs, 47 | resolve_ssl_version, 48 | ssl_wrap_socket, 49 | assert_fingerprint, 50 | ) 51 | 52 | 53 | from .util import connection 54 | 55 | port_by_scheme = { 56 | 'http': 80, 57 | 'https': 443, 58 | } 59 | 60 | RECENT_DATE = datetime.date(2014, 1, 1) 61 | 62 | 63 | class HTTPConnection(_HTTPConnection, object): 64 | """ 65 | Based on httplib.HTTPConnection but provides an extra constructor 66 | backwards-compatibility layer between older and newer Pythons. 67 | 68 | Additional keyword parameters are used to configure attributes of the connection. 69 | Accepted parameters include: 70 | 71 | - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 72 | - ``source_address``: Set the source address for the current connection. 73 | 74 | .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x 75 | 76 | - ``socket_options``: Set specific options on the underlying socket. If not specified, then 77 | defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 78 | Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 79 | 80 | For example, if you wish to enable TCP Keep Alive in addition to the defaults, 81 | you might pass:: 82 | 83 | HTTPConnection.default_socket_options + [ 84 | (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 85 | ] 86 | 87 | Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 88 | """ 89 | 90 | default_port = port_by_scheme['http'] 91 | 92 | #: Disable Nagle's algorithm by default. 93 | #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 94 | default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 95 | 96 | #: Whether this connection verifies the host's certificate. 97 | is_verified = False 98 | 99 | def __init__(self, *args, **kw): 100 | if six.PY3: # Python 3 101 | kw.pop('strict', None) 102 | 103 | # Pre-set source_address in case we have an older Python like 2.6. 104 | self.source_address = kw.get('source_address') 105 | 106 | if sys.version_info < (2, 7): # Python 2.6 107 | # _HTTPConnection on Python 2.6 will balk at this keyword arg, but 108 | # not newer versions. We can still use it when creating a 109 | # connection though, so we pop it *after* we have saved it as 110 | # self.source_address. 111 | kw.pop('source_address', None) 112 | 113 | #: The socket options provided by the user. If no options are 114 | #: provided, we use the default options. 115 | self.socket_options = kw.pop('socket_options', self.default_socket_options) 116 | 117 | # Superclass also sets self.source_address in Python 2.7+. 118 | _HTTPConnection.__init__(self, *args, **kw) 119 | 120 | def _new_conn(self): 121 | """ Establish a socket connection and set nodelay settings on it. 122 | 123 | :return: New socket connection. 124 | """ 125 | extra_kw = {} 126 | if self.source_address: 127 | extra_kw['source_address'] = self.source_address 128 | 129 | if self.socket_options: 130 | extra_kw['socket_options'] = self.socket_options 131 | 132 | try: 133 | conn = connection.create_connection( 134 | (self.host, self.port), self.timeout, **extra_kw) 135 | 136 | except SocketTimeout: 137 | raise ConnectTimeoutError( 138 | self, "Connection to %s timed out. (connect timeout=%s)" % 139 | (self.host, self.timeout)) 140 | 141 | return conn 142 | 143 | def _prepare_conn(self, conn): 144 | self.sock = conn 145 | # the _tunnel_host attribute was added in python 2.6.3 (via 146 | # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do 147 | # not have them. 148 | if getattr(self, '_tunnel_host', None): 149 | # TODO: Fix tunnel so it doesn't depend on self.sock state. 150 | self._tunnel() 151 | # Mark this connection as not reusable 152 | self.auto_open = 0 153 | 154 | def connect(self): 155 | conn = self._new_conn() 156 | self._prepare_conn(conn) 157 | 158 | 159 | class HTTPSConnection(HTTPConnection): 160 | default_port = port_by_scheme['https'] 161 | 162 | def __init__(self, host, port=None, key_file=None, cert_file=None, 163 | strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): 164 | 165 | HTTPConnection.__init__(self, host, port, strict=strict, 166 | timeout=timeout, **kw) 167 | 168 | self.key_file = key_file 169 | self.cert_file = cert_file 170 | 171 | # Required property for Google AppEngine 1.9.0 which otherwise causes 172 | # HTTPS requests to go out as HTTP. (See Issue #356) 173 | self._protocol = 'https' 174 | 175 | def connect(self): 176 | conn = self._new_conn() 177 | self._prepare_conn(conn) 178 | self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) 179 | 180 | 181 | class VerifiedHTTPSConnection(HTTPSConnection): 182 | """ 183 | Based on httplib.HTTPSConnection but wraps the socket with 184 | SSL certification. 185 | """ 186 | cert_reqs = None 187 | ca_certs = None 188 | ssl_version = None 189 | assert_fingerprint = None 190 | 191 | def set_cert(self, key_file=None, cert_file=None, 192 | cert_reqs=None, ca_certs=None, 193 | assert_hostname=None, assert_fingerprint=None): 194 | 195 | self.key_file = key_file 196 | self.cert_file = cert_file 197 | self.cert_reqs = cert_reqs 198 | self.ca_certs = ca_certs 199 | self.assert_hostname = assert_hostname 200 | self.assert_fingerprint = assert_fingerprint 201 | 202 | def connect(self): 203 | # Add certificate verification 204 | conn = self._new_conn() 205 | 206 | resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) 207 | resolved_ssl_version = resolve_ssl_version(self.ssl_version) 208 | 209 | hostname = self.host 210 | if getattr(self, '_tunnel_host', None): 211 | # _tunnel_host was added in Python 2.6.3 212 | # (See: http://hg.python.org/cpython/rev/0f57b30a152f) 213 | 214 | self.sock = conn 215 | # Calls self._set_hostport(), so self.host is 216 | # self._tunnel_host below. 217 | self._tunnel() 218 | # Mark this connection as not reusable 219 | self.auto_open = 0 220 | 221 | # Override the host with the one we're requesting data from. 222 | hostname = self._tunnel_host 223 | 224 | is_time_off = datetime.date.today() < RECENT_DATE 225 | if is_time_off: 226 | warnings.warn(( 227 | 'System time is way off (before {0}). This will probably ' 228 | 'lead to SSL verification errors').format(RECENT_DATE), 229 | SystemTimeWarning 230 | ) 231 | 232 | # Wrap socket using verification with the root certs in 233 | # trusted_root_certs 234 | self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, 235 | cert_reqs=resolved_cert_reqs, 236 | ca_certs=self.ca_certs, 237 | server_hostname=hostname, 238 | ssl_version=resolved_ssl_version) 239 | 240 | if self.assert_fingerprint: 241 | assert_fingerprint(self.sock.getpeercert(binary_form=True), 242 | self.assert_fingerprint) 243 | elif resolved_cert_reqs != ssl.CERT_NONE \ 244 | and self.assert_hostname is not False: 245 | cert = self.sock.getpeercert() 246 | if not cert.get('subjectAltName', ()): 247 | warnings.warn(( 248 | 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' 249 | 'This feature is being removed by major browsers and deprecated by RFC 2818. ' 250 | '(See https://github.com/shazow/urllib3/issues/497 for details.)'), 251 | SecurityWarning 252 | ) 253 | match_hostname(cert, self.assert_hostname or hostname) 254 | 255 | self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED 256 | or self.assert_fingerprint is not None) 257 | 258 | 259 | if ssl: 260 | # Make a copy for testing. 261 | UnverifiedHTTPSConnection = HTTPSConnection 262 | HTTPSConnection = VerifiedHTTPSConnection 263 | else: 264 | HTTPSConnection = DummyConnection 265 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BrendanBenshoof/cachewarmer/2472f51a4032326ba76a6c2865a75c5a86a6b659/myrequests/packages/urllib3/contrib/__init__.py -------------------------------------------------------------------------------- /myrequests/packages/urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | 7 | try: 8 | from http.client import HTTPSConnection 9 | except ImportError: 10 | from httplib import HTTPSConnection 11 | from logging import getLogger 12 | from ntlm import ntlm 13 | 14 | from urllib3 import HTTPSConnectionPool 15 | 16 | 17 | log = getLogger(__name__) 18 | 19 | 20 | class NTLMConnectionPool(HTTPSConnectionPool): 21 | """ 22 | Implements an NTLM authentication version of an urllib3 connection pool 23 | """ 24 | 25 | scheme = 'https' 26 | 27 | def __init__(self, user, pw, authurl, *args, **kwargs): 28 | """ 29 | authurl is a random URL on the server that is protected by NTLM. 30 | user is the Windows user, probably in the DOMAIN\\username format. 31 | pw is the password for the user. 32 | """ 33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 34 | self.authurl = authurl 35 | self.rawuser = user 36 | user_parts = user.split('\\', 1) 37 | self.domain = user_parts[0].upper() 38 | self.user = user_parts[1] 39 | self.pw = pw 40 | 41 | def _new_conn(self): 42 | # Performs the NTLM handshake that secures the connection. The socket 43 | # must be kept open while requests are performed. 44 | self.num_connections += 1 45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % 46 | (self.num_connections, self.host, self.authurl)) 47 | 48 | headers = {} 49 | headers['Connection'] = 'Keep-Alive' 50 | req_header = 'Authorization' 51 | resp_header = 'www-authenticate' 52 | 53 | conn = HTTPSConnection(host=self.host, port=self.port) 54 | 55 | # Send negotiation message 56 | headers[req_header] = ( 57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) 58 | log.debug('Request headers: %s' % headers) 59 | conn.request('GET', self.authurl, None, headers) 60 | res = conn.getresponse() 61 | reshdr = dict(res.getheaders()) 62 | log.debug('Response status: %s %s' % (res.status, res.reason)) 63 | log.debug('Response headers: %s' % reshdr) 64 | log.debug('Response data: %s [...]' % res.read(100)) 65 | 66 | # Remove the reference to the socket, so that it can not be closed by 67 | # the response object (we want to keep the socket open) 68 | res.fp = None 69 | 70 | # Server should respond with a challenge message 71 | auth_header_values = reshdr[resp_header].split(', ') 72 | auth_header_value = None 73 | for s in auth_header_values: 74 | if s[:5] == 'NTLM ': 75 | auth_header_value = s[5:] 76 | if auth_header_value is None: 77 | raise Exception('Unexpected %s response header: %s' % 78 | (resp_header, reshdr[resp_header])) 79 | 80 | # Send authentication message 81 | ServerChallenge, NegotiateFlags = \ 82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) 83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, 84 | self.user, 85 | self.domain, 86 | self.pw, 87 | NegotiateFlags) 88 | headers[req_header] = 'NTLM %s' % auth_msg 89 | log.debug('Request headers: %s' % headers) 90 | conn.request('GET', self.authurl, None, headers) 91 | res = conn.getresponse() 92 | log.debug('Response status: %s %s' % (res.status, res.reason)) 93 | log.debug('Response headers: %s' % dict(res.getheaders())) 94 | log.debug('Response data: %s [...]' % res.read()[:100]) 95 | if res.status != 200: 96 | if res.status == 401: 97 | raise Exception('Server rejected request: wrong ' 98 | 'username or password') 99 | raise Exception('Wrong server response: %s %s' % 100 | (res.status, res.reason)) 101 | 102 | res.fp = None 103 | log.debug('Connection established') 104 | return conn 105 | 106 | def urlopen(self, method, url, body=None, headers=None, retries=3, 107 | redirect=True, assert_same_host=True): 108 | if headers is None: 109 | headers = {} 110 | headers['Connection'] = 'Keep-Alive' 111 | return super(NTLMConnectionPool, self).urlopen(method, url, body, 112 | headers, retries, 113 | redirect, 114 | assert_same_host) 115 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | ## Base Exceptions 3 | 4 | class HTTPError(Exception): 5 | "Base exception used by this module." 6 | pass 7 | 8 | class HTTPWarning(Warning): 9 | "Base warning used by this module." 10 | pass 11 | 12 | 13 | 14 | class PoolError(HTTPError): 15 | "Base exception for errors caused within a pool." 16 | def __init__(self, pool, message): 17 | self.pool = pool 18 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 19 | 20 | def __reduce__(self): 21 | # For pickling purposes. 22 | return self.__class__, (None, None) 23 | 24 | 25 | class RequestError(PoolError): 26 | "Base exception for PoolErrors that have associated URLs." 27 | def __init__(self, pool, url, message): 28 | self.url = url 29 | PoolError.__init__(self, pool, message) 30 | 31 | def __reduce__(self): 32 | # For pickling purposes. 33 | return self.__class__, (None, self.url, None) 34 | 35 | 36 | class SSLError(HTTPError): 37 | "Raised when SSL certificate fails in an HTTPS connection." 38 | pass 39 | 40 | 41 | class ProxyError(HTTPError): 42 | "Raised when the connection to a proxy fails." 43 | pass 44 | 45 | 46 | class DecodeError(HTTPError): 47 | "Raised when automatic decoding based on Content-Type fails." 48 | pass 49 | 50 | 51 | class ProtocolError(HTTPError): 52 | "Raised when something unexpected happens mid-request/response." 53 | pass 54 | 55 | 56 | #: Renamed to ProtocolError but aliased for backwards compatibility. 57 | ConnectionError = ProtocolError 58 | 59 | 60 | ## Leaf Exceptions 61 | 62 | class MaxRetryError(RequestError): 63 | """Raised when the maximum number of retries is exceeded. 64 | 65 | :param pool: The connection pool 66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 67 | :param string url: The requested Url 68 | :param exceptions.Exception reason: The underlying error 69 | 70 | """ 71 | 72 | def __init__(self, pool, url, reason=None): 73 | self.reason = reason 74 | 75 | message = "Max retries exceeded with url: %s (Caused by %r)" % ( 76 | url, reason) 77 | 78 | RequestError.__init__(self, pool, url, message) 79 | 80 | 81 | class HostChangedError(RequestError): 82 | "Raised when an existing pool gets a request for a foreign host." 83 | 84 | def __init__(self, pool, url, retries=3): 85 | message = "Tried to open a foreign host with url: %s" % url 86 | RequestError.__init__(self, pool, url, message) 87 | self.retries = retries 88 | 89 | 90 | class TimeoutStateError(HTTPError): 91 | """ Raised when passing an invalid state to a timeout """ 92 | pass 93 | 94 | 95 | class TimeoutError(HTTPError): 96 | """ Raised when a socket timeout error occurs. 97 | 98 | Catching this error will catch both :exc:`ReadTimeoutErrors 99 | ` and :exc:`ConnectTimeoutErrors `. 100 | """ 101 | pass 102 | 103 | 104 | class ReadTimeoutError(TimeoutError, RequestError): 105 | "Raised when a socket timeout occurs while receiving data from a server" 106 | pass 107 | 108 | 109 | # This timeout error does not have a URL attached and needs to inherit from the 110 | # base HTTPError 111 | class ConnectTimeoutError(TimeoutError): 112 | "Raised when a socket timeout occurs while connecting to a server" 113 | pass 114 | 115 | 116 | class EmptyPoolError(PoolError): 117 | "Raised when a pool runs out of connections and no more are allowed." 118 | pass 119 | 120 | 121 | class ClosedPoolError(PoolError): 122 | "Raised when a request enters a pool after the pool has been closed." 123 | pass 124 | 125 | 126 | class LocationValueError(ValueError, HTTPError): 127 | "Raised when there is something wrong with a given URL input." 128 | pass 129 | 130 | 131 | class LocationParseError(LocationValueError): 132 | "Raised when get_host or similar fails to parse the URL input." 133 | 134 | def __init__(self, location): 135 | message = "Failed to parse: %s" % location 136 | HTTPError.__init__(self, message) 137 | 138 | self.location = location 139 | 140 | 141 | class ResponseError(HTTPError): 142 | "Used as a container for an error reason supplied in a MaxRetryError." 143 | GENERIC_ERROR = 'too many error responses' 144 | SPECIFIC_ERROR = 'too many {status_code} error responses' 145 | 146 | 147 | class SecurityWarning(HTTPWarning): 148 | "Warned when perfoming security reducing actions" 149 | pass 150 | 151 | 152 | class InsecureRequestWarning(SecurityWarning): 153 | "Warned when making an unverified HTTPS request." 154 | pass 155 | 156 | 157 | class SystemTimeWarning(SecurityWarning): 158 | "Warned when system time is suspected to be wrong" 159 | pass 160 | 161 | 162 | class InsecurePlatformWarning(SecurityWarning): 163 | "Warned when certain SSL configuration is not available on a platform." 164 | pass 165 | 166 | 167 | class ResponseNotChunked(ProtocolError, ValueError): 168 | "Response needs to be chunked in order to read it as chunks." 169 | pass 170 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/fields.py: -------------------------------------------------------------------------------- 1 | import email.utils 2 | import mimetypes 3 | 4 | from .packages import six 5 | 6 | 7 | def guess_content_type(filename, default='application/octet-stream'): 8 | """ 9 | Guess the "Content-Type" of a file. 10 | 11 | :param filename: 12 | The filename to guess the "Content-Type" of using :mod:`mimetypes`. 13 | :param default: 14 | If no "Content-Type" can be guessed, default to `default`. 15 | """ 16 | if filename: 17 | return mimetypes.guess_type(filename)[0] or default 18 | return default 19 | 20 | 21 | def format_header_param(name, value): 22 | """ 23 | Helper function to format and quote a single header parameter. 24 | 25 | Particularly useful for header parameters which might contain 26 | non-ASCII values, like file names. This follows RFC 2231, as 27 | suggested by RFC 2388 Section 4.4. 28 | 29 | :param name: 30 | The name of the parameter, a string expected to be ASCII only. 31 | :param value: 32 | The value of the parameter, provided as a unicode string. 33 | """ 34 | if not any(ch in value for ch in '"\\\r\n'): 35 | result = '%s="%s"' % (name, value) 36 | try: 37 | result.encode('ascii') 38 | except UnicodeEncodeError: 39 | pass 40 | else: 41 | return result 42 | if not six.PY3: # Python 2: 43 | value = value.encode('utf-8') 44 | value = email.utils.encode_rfc2231(value, 'utf-8') 45 | value = '%s*=%s' % (name, value) 46 | return value 47 | 48 | 49 | class RequestField(object): 50 | """ 51 | A data container for request body parameters. 52 | 53 | :param name: 54 | The name of this request field. 55 | :param data: 56 | The data/value body. 57 | :param filename: 58 | An optional filename of the request field. 59 | :param headers: 60 | An optional dict-like object of headers to initially use for the field. 61 | """ 62 | def __init__(self, name, data, filename=None, headers=None): 63 | self._name = name 64 | self._filename = filename 65 | self.data = data 66 | self.headers = {} 67 | if headers: 68 | self.headers = dict(headers) 69 | 70 | @classmethod 71 | def from_tuples(cls, fieldname, value): 72 | """ 73 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 74 | 75 | Supports constructing :class:`~urllib3.fields.RequestField` from 76 | parameter of key/value strings AND key/filetuple. A filetuple is a 77 | (filename, data, MIME type) tuple where the MIME type is optional. 78 | For example:: 79 | 80 | 'foo': 'bar', 81 | 'fakefile': ('foofile.txt', 'contents of foofile'), 82 | 'realfile': ('barfile.txt', open('realfile').read()), 83 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 84 | 'nonamefile': 'contents of nonamefile field', 85 | 86 | Field names and filenames must be unicode. 87 | """ 88 | if isinstance(value, tuple): 89 | if len(value) == 3: 90 | filename, data, content_type = value 91 | else: 92 | filename, data = value 93 | content_type = guess_content_type(filename) 94 | else: 95 | filename = None 96 | content_type = None 97 | data = value 98 | 99 | request_param = cls(fieldname, data, filename=filename) 100 | request_param.make_multipart(content_type=content_type) 101 | 102 | return request_param 103 | 104 | def _render_part(self, name, value): 105 | """ 106 | Overridable helper function to format a single header parameter. 107 | 108 | :param name: 109 | The name of the parameter, a string expected to be ASCII only. 110 | :param value: 111 | The value of the parameter, provided as a unicode string. 112 | """ 113 | return format_header_param(name, value) 114 | 115 | def _render_parts(self, header_parts): 116 | """ 117 | Helper function to format and quote a single header. 118 | 119 | Useful for single headers that are composed of multiple items. E.g., 120 | 'Content-Disposition' fields. 121 | 122 | :param header_parts: 123 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format 124 | as `k1="v1"; k2="v2"; ...`. 125 | """ 126 | parts = [] 127 | iterable = header_parts 128 | if isinstance(header_parts, dict): 129 | iterable = header_parts.items() 130 | 131 | for name, value in iterable: 132 | if value: 133 | parts.append(self._render_part(name, value)) 134 | 135 | return '; '.join(parts) 136 | 137 | def render_headers(self): 138 | """ 139 | Renders the headers for this request field. 140 | """ 141 | lines = [] 142 | 143 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] 144 | for sort_key in sort_keys: 145 | if self.headers.get(sort_key, False): 146 | lines.append('%s: %s' % (sort_key, self.headers[sort_key])) 147 | 148 | for header_name, header_value in self.headers.items(): 149 | if header_name not in sort_keys: 150 | if header_value: 151 | lines.append('%s: %s' % (header_name, header_value)) 152 | 153 | lines.append('\r\n') 154 | return '\r\n'.join(lines) 155 | 156 | def make_multipart(self, content_disposition=None, content_type=None, 157 | content_location=None): 158 | """ 159 | Makes this request field into a multipart request field. 160 | 161 | This method overrides "Content-Disposition", "Content-Type" and 162 | "Content-Location" headers to the request parameter. 163 | 164 | :param content_type: 165 | The 'Content-Type' of the request body. 166 | :param content_location: 167 | The 'Content-Location' of the request body. 168 | 169 | """ 170 | self.headers['Content-Disposition'] = content_disposition or 'form-data' 171 | self.headers['Content-Disposition'] += '; '.join([ 172 | '', self._render_parts( 173 | (('name', self._name), ('filename', self._filename)) 174 | ) 175 | ]) 176 | self.headers['Content-Type'] = content_type 177 | self.headers['Content-Location'] = content_location 178 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | from uuid import uuid4 4 | from io import BytesIO 5 | 6 | from .packages import six 7 | from .packages.six import b 8 | from .fields import RequestField 9 | 10 | writer = codecs.lookup('utf-8')[3] 11 | 12 | 13 | def choose_boundary(): 14 | """ 15 | Our embarassingly-simple replacement for mimetools.choose_boundary. 16 | """ 17 | return uuid4().hex 18 | 19 | 20 | def iter_field_objects(fields): 21 | """ 22 | Iterate over fields. 23 | 24 | Supports list of (k, v) tuples and dicts, and lists of 25 | :class:`~urllib3.fields.RequestField`. 26 | 27 | """ 28 | if isinstance(fields, dict): 29 | i = six.iteritems(fields) 30 | else: 31 | i = iter(fields) 32 | 33 | for field in i: 34 | if isinstance(field, RequestField): 35 | yield field 36 | else: 37 | yield RequestField.from_tuples(*field) 38 | 39 | 40 | def iter_fields(fields): 41 | """ 42 | .. deprecated:: 1.6 43 | 44 | Iterate over fields. 45 | 46 | The addition of :class:`~urllib3.fields.RequestField` makes this function 47 | obsolete. Instead, use :func:`iter_field_objects`, which returns 48 | :class:`~urllib3.fields.RequestField` objects. 49 | 50 | Supports list of (k, v) tuples and dicts. 51 | """ 52 | if isinstance(fields, dict): 53 | return ((k, v) for k, v in six.iteritems(fields)) 54 | 55 | return ((k, v) for k, v in fields) 56 | 57 | 58 | def encode_multipart_formdata(fields, boundary=None): 59 | """ 60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 61 | 62 | :param fields: 63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 64 | 65 | :param boundary: 66 | If not specified, then a random boundary will be generated using 67 | :func:`mimetools.choose_boundary`. 68 | """ 69 | body = BytesIO() 70 | if boundary is None: 71 | boundary = choose_boundary() 72 | 73 | for field in iter_field_objects(fields): 74 | body.write(b('--%s\r\n' % (boundary))) 75 | 76 | writer(body).write(field.render_headers()) 77 | data = field.data 78 | 79 | if isinstance(data, int): 80 | data = str(data) # Backwards compatibility 81 | 82 | if isinstance(data, six.text_type): 83 | writer(body).write(data) 84 | else: 85 | body.write(data) 86 | 87 | body.write(b'\r\n') 88 | 89 | body.write(b('--%s--\r\n' % (boundary))) 90 | 91 | content_type = str('multipart/form-data; boundary=%s' % boundary) 92 | 93 | return body.getvalue(), content_type 94 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/packages/ordered_dict.py: -------------------------------------------------------------------------------- 1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 2 | # Passes Python2.7's test suite and incorporates all the latest updates. 3 | # Copyright 2009 Raymond Hettinger, released under the MIT License. 4 | # http://code.activestate.com/recipes/576693/ 5 | try: 6 | from thread import get_ident as _get_ident 7 | except ImportError: 8 | from dummy_thread import get_ident as _get_ident 9 | 10 | try: 11 | from _abcoll import KeysView, ValuesView, ItemsView 12 | except ImportError: 13 | pass 14 | 15 | 16 | class OrderedDict(dict): 17 | 'Dictionary that remembers insertion order' 18 | # An inherited dict maps keys to values. 19 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 20 | # The remaining methods are order-aware. 21 | # Big-O running times for all methods are the same as for regular dictionaries. 22 | 23 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 24 | # The circular doubly linked list starts and ends with a sentinel element. 25 | # The sentinel element never gets deleted (this simplifies the algorithm). 26 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 27 | 28 | def __init__(self, *args, **kwds): 29 | '''Initialize an ordered dictionary. Signature is the same as for 30 | regular dictionaries, but keyword arguments are not recommended 31 | because their insertion order is arbitrary. 32 | 33 | ''' 34 | if len(args) > 1: 35 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 36 | try: 37 | self.__root 38 | except AttributeError: 39 | self.__root = root = [] # sentinel node 40 | root[:] = [root, root, None] 41 | self.__map = {} 42 | self.__update(*args, **kwds) 43 | 44 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 45 | 'od.__setitem__(i, y) <==> od[i]=y' 46 | # Setting a new item creates a new link which goes at the end of the linked 47 | # list, and the inherited dictionary is updated with the new key/value pair. 48 | if key not in self: 49 | root = self.__root 50 | last = root[0] 51 | last[1] = root[0] = self.__map[key] = [last, root, key] 52 | dict_setitem(self, key, value) 53 | 54 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 55 | 'od.__delitem__(y) <==> del od[y]' 56 | # Deleting an existing item uses self.__map to find the link which is 57 | # then removed by updating the links in the predecessor and successor nodes. 58 | dict_delitem(self, key) 59 | link_prev, link_next, key = self.__map.pop(key) 60 | link_prev[1] = link_next 61 | link_next[0] = link_prev 62 | 63 | def __iter__(self): 64 | 'od.__iter__() <==> iter(od)' 65 | root = self.__root 66 | curr = root[1] 67 | while curr is not root: 68 | yield curr[2] 69 | curr = curr[1] 70 | 71 | def __reversed__(self): 72 | 'od.__reversed__() <==> reversed(od)' 73 | root = self.__root 74 | curr = root[0] 75 | while curr is not root: 76 | yield curr[2] 77 | curr = curr[0] 78 | 79 | def clear(self): 80 | 'od.clear() -> None. Remove all items from od.' 81 | try: 82 | for node in self.__map.itervalues(): 83 | del node[:] 84 | root = self.__root 85 | root[:] = [root, root, None] 86 | self.__map.clear() 87 | except AttributeError: 88 | pass 89 | dict.clear(self) 90 | 91 | def popitem(self, last=True): 92 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 93 | Pairs are returned in LIFO order if last is true or FIFO order if false. 94 | 95 | ''' 96 | if not self: 97 | raise KeyError('dictionary is empty') 98 | root = self.__root 99 | if last: 100 | link = root[0] 101 | link_prev = link[0] 102 | link_prev[1] = root 103 | root[0] = link_prev 104 | else: 105 | link = root[1] 106 | link_next = link[1] 107 | root[1] = link_next 108 | link_next[0] = root 109 | key = link[2] 110 | del self.__map[key] 111 | value = dict.pop(self, key) 112 | return key, value 113 | 114 | # -- the following methods do not depend on the internal structure -- 115 | 116 | def keys(self): 117 | 'od.keys() -> list of keys in od' 118 | return list(self) 119 | 120 | def values(self): 121 | 'od.values() -> list of values in od' 122 | return [self[key] for key in self] 123 | 124 | def items(self): 125 | 'od.items() -> list of (key, value) pairs in od' 126 | return [(key, self[key]) for key in self] 127 | 128 | def iterkeys(self): 129 | 'od.iterkeys() -> an iterator over the keys in od' 130 | return iter(self) 131 | 132 | def itervalues(self): 133 | 'od.itervalues -> an iterator over the values in od' 134 | for k in self: 135 | yield self[k] 136 | 137 | def iteritems(self): 138 | 'od.iteritems -> an iterator over the (key, value) items in od' 139 | for k in self: 140 | yield (k, self[k]) 141 | 142 | def update(*args, **kwds): 143 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 144 | 145 | If E is a dict instance, does: for k in E: od[k] = E[k] 146 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 147 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 148 | In either case, this is followed by: for k, v in F.items(): od[k] = v 149 | 150 | ''' 151 | if len(args) > 2: 152 | raise TypeError('update() takes at most 2 positional ' 153 | 'arguments (%d given)' % (len(args),)) 154 | elif not args: 155 | raise TypeError('update() takes at least 1 argument (0 given)') 156 | self = args[0] 157 | # Make progressively weaker assumptions about "other" 158 | other = () 159 | if len(args) == 2: 160 | other = args[1] 161 | if isinstance(other, dict): 162 | for key in other: 163 | self[key] = other[key] 164 | elif hasattr(other, 'keys'): 165 | for key in other.keys(): 166 | self[key] = other[key] 167 | else: 168 | for key, value in other: 169 | self[key] = value 170 | for key, value in kwds.items(): 171 | self[key] = value 172 | 173 | __update = update # let subclasses override update without breaking __init__ 174 | 175 | __marker = object() 176 | 177 | def pop(self, key, default=__marker): 178 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 179 | If key is not found, d is returned if given, otherwise KeyError is raised. 180 | 181 | ''' 182 | if key in self: 183 | result = self[key] 184 | del self[key] 185 | return result 186 | if default is self.__marker: 187 | raise KeyError(key) 188 | return default 189 | 190 | def setdefault(self, key, default=None): 191 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 192 | if key in self: 193 | return self[key] 194 | self[key] = default 195 | return default 196 | 197 | def __repr__(self, _repr_running={}): 198 | 'od.__repr__() <==> repr(od)' 199 | call_key = id(self), _get_ident() 200 | if call_key in _repr_running: 201 | return '...' 202 | _repr_running[call_key] = 1 203 | try: 204 | if not self: 205 | return '%s()' % (self.__class__.__name__,) 206 | return '%s(%r)' % (self.__class__.__name__, self.items()) 207 | finally: 208 | del _repr_running[call_key] 209 | 210 | def __reduce__(self): 211 | 'Return state information for pickling' 212 | items = [[k, self[k]] for k in self] 213 | inst_dict = vars(self).copy() 214 | for k in vars(OrderedDict()): 215 | inst_dict.pop(k, None) 216 | if inst_dict: 217 | return (self.__class__, (items,), inst_dict) 218 | return self.__class__, (items,) 219 | 220 | def copy(self): 221 | 'od.copy() -> a shallow copy of od' 222 | return self.__class__(self) 223 | 224 | @classmethod 225 | def fromkeys(cls, iterable, value=None): 226 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 227 | and values equal to v (which defaults to None). 228 | 229 | ''' 230 | d = cls() 231 | for key in iterable: 232 | d[key] = value 233 | return d 234 | 235 | def __eq__(self, other): 236 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 237 | while comparison to a regular mapping is order-insensitive. 238 | 239 | ''' 240 | if isinstance(other, OrderedDict): 241 | return len(self)==len(other) and self.items() == other.items() 242 | return dict.__eq__(self, other) 243 | 244 | def __ne__(self, other): 245 | return not self == other 246 | 247 | # -- the following methods are only used in Python 2.7 -- 248 | 249 | def viewkeys(self): 250 | "od.viewkeys() -> a set-like object providing a view on od's keys" 251 | return KeysView(self) 252 | 253 | def viewvalues(self): 254 | "od.viewvalues() -> an object providing a view on od's values" 255 | return ValuesView(self) 256 | 257 | def viewitems(self): 258 | "od.viewitems() -> a set-like object providing a view on od's items" 259 | return ItemsView(self) 260 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Python 3.2+ 3 | from ssl import CertificateError, match_hostname 4 | except ImportError: 5 | try: 6 | # Backport of the function from a pypi module 7 | from backports.ssl_match_hostname import CertificateError, match_hostname 8 | except ImportError: 9 | # Our vendored copy 10 | from ._implementation import CertificateError, match_hostname 11 | 12 | # Not needed, but documenting what we provide. 13 | __all__ = ('CertificateError', 'match_hostname') 14 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | 8 | __version__ = '3.4.0.2' 9 | 10 | class CertificateError(ValueError): 11 | pass 12 | 13 | 14 | def _dnsname_match(dn, hostname, max_wildcards=1): 15 | """Matching according to RFC 6125, section 6.4.3 16 | 17 | http://tools.ietf.org/html/rfc6125#section-6.4.3 18 | """ 19 | pats = [] 20 | if not dn: 21 | return False 22 | 23 | # Ported from python3-syntax: 24 | # leftmost, *remainder = dn.split(r'.') 25 | parts = dn.split(r'.') 26 | leftmost = parts[0] 27 | remainder = parts[1:] 28 | 29 | wildcards = leftmost.count('*') 30 | if wildcards > max_wildcards: 31 | # Issue #17980: avoid denials of service by refusing more 32 | # than one wildcard per fragment. A survey of established 33 | # policy among SSL implementations showed it to be a 34 | # reasonable choice. 35 | raise CertificateError( 36 | "too many wildcards in certificate DNS name: " + repr(dn)) 37 | 38 | # speed up common case w/o wildcards 39 | if not wildcards: 40 | return dn.lower() == hostname.lower() 41 | 42 | # RFC 6125, section 6.4.3, subitem 1. 43 | # The client SHOULD NOT attempt to match a presented identifier in which 44 | # the wildcard character comprises a label other than the left-most label. 45 | if leftmost == '*': 46 | # When '*' is a fragment by itself, it matches a non-empty dotless 47 | # fragment. 48 | pats.append('[^.]+') 49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 50 | # RFC 6125, section 6.4.3, subitem 3. 51 | # The client SHOULD NOT attempt to match a presented identifier 52 | # where the wildcard character is embedded within an A-label or 53 | # U-label of an internationalized domain name. 54 | pats.append(re.escape(leftmost)) 55 | else: 56 | # Otherwise, '*' matches any dotless string, e.g. www* 57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 58 | 59 | # add the remaining fragments, ignore any wildcards 60 | for frag in remainder: 61 | pats.append(re.escape(frag)) 62 | 63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 64 | return pat.match(hostname) 65 | 66 | 67 | def match_hostname(cert, hostname): 68 | """Verify that *cert* (in decoded format as returned by 69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 70 | rules are followed, but IP addresses are not accepted for *hostname*. 71 | 72 | CertificateError is raised on failure. On success, the function 73 | returns nothing. 74 | """ 75 | if not cert: 76 | raise ValueError("empty or no certificate") 77 | dnsnames = [] 78 | san = cert.get('subjectAltName', ()) 79 | for key, value in san: 80 | if key == 'DNS': 81 | if _dnsname_match(value, hostname): 82 | return 83 | dnsnames.append(value) 84 | if not dnsnames: 85 | # The subject is only checked when there is no dNSName entry 86 | # in subjectAltName 87 | for sub in cert.get('subject', ()): 88 | for key, value in sub: 89 | # XXX according to RFC 2818, the most specific Common Name 90 | # must be used. 91 | if key == 'commonName': 92 | if _dnsname_match(value, hostname): 93 | return 94 | dnsnames.append(value) 95 | if len(dnsnames) > 1: 96 | raise CertificateError("hostname %r " 97 | "doesn't match either of %s" 98 | % (hostname, ', '.join(map(repr, dnsnames)))) 99 | elif len(dnsnames) == 1: 100 | raise CertificateError("hostname %r " 101 | "doesn't match %r" 102 | % (hostname, dnsnames[0])) 103 | else: 104 | raise CertificateError("no appropriate commonName or " 105 | "subjectAltName fields were found") 106 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/poolmanager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | try: # Python 3 4 | from urllib.parse import urljoin 5 | except ImportError: 6 | from urlparse import urljoin 7 | 8 | from ._collections import RecentlyUsedContainer 9 | from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool 10 | from .connectionpool import port_by_scheme 11 | from .exceptions import LocationValueError, MaxRetryError 12 | from .request import RequestMethods 13 | from .util.url import parse_url 14 | from .util.retry import Retry 15 | 16 | 17 | __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url'] 18 | 19 | 20 | pool_classes_by_scheme = { 21 | 'http': HTTPConnectionPool, 22 | 'https': HTTPSConnectionPool, 23 | } 24 | 25 | log = logging.getLogger(__name__) 26 | 27 | SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs', 28 | 'ssl_version') 29 | 30 | 31 | class PoolManager(RequestMethods): 32 | """ 33 | Allows for arbitrary requests while transparently keeping track of 34 | necessary connection pools for you. 35 | 36 | :param num_pools: 37 | Number of connection pools to cache before discarding the least 38 | recently used pool. 39 | 40 | :param headers: 41 | Headers to include with all requests, unless other headers are given 42 | explicitly. 43 | 44 | :param \**connection_pool_kw: 45 | Additional parameters are used to create fresh 46 | :class:`urllib3.connectionpool.ConnectionPool` instances. 47 | 48 | Example:: 49 | 50 | >>> manager = PoolManager(num_pools=2) 51 | >>> r = manager.request('GET', 'http://google.com/') 52 | >>> r = manager.request('GET', 'http://google.com/mail') 53 | >>> r = manager.request('GET', 'http://yahoo.com/') 54 | >>> len(manager.pools) 55 | 2 56 | 57 | """ 58 | 59 | proxy = None 60 | 61 | def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 62 | RequestMethods.__init__(self, headers) 63 | self.connection_pool_kw = connection_pool_kw 64 | self.pools = RecentlyUsedContainer(num_pools, 65 | dispose_func=lambda p: p.close()) 66 | 67 | def __enter__(self): 68 | return self 69 | 70 | def __exit__(self, exc_type, exc_val, exc_tb): 71 | self.clear() 72 | # Return False to re-raise any potential exceptions 73 | return False 74 | 75 | def _new_pool(self, scheme, host, port): 76 | """ 77 | Create a new :class:`ConnectionPool` based on host, port and scheme. 78 | 79 | This method is used to actually create the connection pools handed out 80 | by :meth:`connection_from_url` and companion methods. It is intended 81 | to be overridden for customization. 82 | """ 83 | pool_cls = pool_classes_by_scheme[scheme] 84 | kwargs = self.connection_pool_kw 85 | if scheme == 'http': 86 | kwargs = self.connection_pool_kw.copy() 87 | for kw in SSL_KEYWORDS: 88 | kwargs.pop(kw, None) 89 | 90 | return pool_cls(host, port, **kwargs) 91 | 92 | def clear(self): 93 | """ 94 | Empty our store of pools and direct them all to close. 95 | 96 | This will not affect in-flight connections, but they will not be 97 | re-used after completion. 98 | """ 99 | self.pools.clear() 100 | 101 | def connection_from_host(self, host, port=None, scheme='http'): 102 | """ 103 | Get a :class:`ConnectionPool` based on the host, port, and scheme. 104 | 105 | If ``port`` isn't given, it will be derived from the ``scheme`` using 106 | ``urllib3.connectionpool.port_by_scheme``. 107 | """ 108 | 109 | if not host: 110 | raise LocationValueError("No host specified.") 111 | 112 | scheme = scheme or 'http' 113 | port = port or port_by_scheme.get(scheme, 80) 114 | pool_key = (scheme, host, port) 115 | 116 | with self.pools.lock: 117 | # If the scheme, host, or port doesn't match existing open 118 | # connections, open a new ConnectionPool. 119 | pool = self.pools.get(pool_key) 120 | if pool: 121 | return pool 122 | 123 | # Make a fresh ConnectionPool of the desired type 124 | pool = self._new_pool(scheme, host, port) 125 | self.pools[pool_key] = pool 126 | 127 | return pool 128 | 129 | def connection_from_url(self, url): 130 | """ 131 | Similar to :func:`urllib3.connectionpool.connection_from_url` but 132 | doesn't pass any additional parameters to the 133 | :class:`urllib3.connectionpool.ConnectionPool` constructor. 134 | 135 | Additional parameters are taken from the :class:`.PoolManager` 136 | constructor. 137 | """ 138 | u = parse_url(url) 139 | return self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 140 | 141 | def urlopen(self, method, url, redirect=True, **kw): 142 | """ 143 | Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen` 144 | with custom cross-host redirect logic and only sends the request-uri 145 | portion of the ``url``. 146 | 147 | The given ``url`` parameter must be absolute, such that an appropriate 148 | :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 149 | """ 150 | u = parse_url(url) 151 | conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 152 | 153 | kw['assert_same_host'] = False 154 | kw['redirect'] = False 155 | if 'headers' not in kw: 156 | kw['headers'] = self.headers 157 | 158 | if self.proxy is not None and u.scheme == "http": 159 | response = conn.urlopen(method, url, **kw) 160 | else: 161 | response = conn.urlopen(method, u.request_uri, **kw) 162 | 163 | redirect_location = redirect and response.get_redirect_location() 164 | if not redirect_location: 165 | return response 166 | 167 | # Support relative URLs for redirecting. 168 | redirect_location = urljoin(url, redirect_location) 169 | 170 | # RFC 7231, Section 6.4.4 171 | if response.status == 303: 172 | method = 'GET' 173 | 174 | retries = kw.get('retries') 175 | if not isinstance(retries, Retry): 176 | retries = Retry.from_int(retries, redirect=redirect) 177 | 178 | try: 179 | retries = retries.increment(method, url, response=response, _pool=conn) 180 | except MaxRetryError: 181 | if retries.raise_on_redirect: 182 | raise 183 | return response 184 | 185 | kw['retries'] = retries 186 | kw['redirect'] = redirect 187 | 188 | log.info("Redirecting %s -> %s" % (url, redirect_location)) 189 | return self.urlopen(method, redirect_location, **kw) 190 | 191 | 192 | class ProxyManager(PoolManager): 193 | """ 194 | Behaves just like :class:`PoolManager`, but sends all requests through 195 | the defined proxy, using the CONNECT method for HTTPS URLs. 196 | 197 | :param proxy_url: 198 | The URL of the proxy to be used. 199 | 200 | :param proxy_headers: 201 | A dictionary contaning headers that will be sent to the proxy. In case 202 | of HTTP they are being sent with each request, while in the 203 | HTTPS/CONNECT case they are sent only once. Could be used for proxy 204 | authentication. 205 | 206 | Example: 207 | >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 208 | >>> r1 = proxy.request('GET', 'http://google.com/') 209 | >>> r2 = proxy.request('GET', 'http://httpbin.org/') 210 | >>> len(proxy.pools) 211 | 1 212 | >>> r3 = proxy.request('GET', 'https://httpbin.org/') 213 | >>> r4 = proxy.request('GET', 'https://twitter.com/') 214 | >>> len(proxy.pools) 215 | 3 216 | 217 | """ 218 | 219 | def __init__(self, proxy_url, num_pools=10, headers=None, 220 | proxy_headers=None, **connection_pool_kw): 221 | 222 | if isinstance(proxy_url, HTTPConnectionPool): 223 | proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host, 224 | proxy_url.port) 225 | proxy = parse_url(proxy_url) 226 | if not proxy.port: 227 | port = port_by_scheme.get(proxy.scheme, 80) 228 | proxy = proxy._replace(port=port) 229 | 230 | assert proxy.scheme in ("http", "https"), \ 231 | 'Not supported proxy scheme %s' % proxy.scheme 232 | 233 | self.proxy = proxy 234 | self.proxy_headers = proxy_headers or {} 235 | 236 | connection_pool_kw['_proxy'] = self.proxy 237 | connection_pool_kw['_proxy_headers'] = self.proxy_headers 238 | 239 | super(ProxyManager, self).__init__( 240 | num_pools, headers, **connection_pool_kw) 241 | 242 | def connection_from_host(self, host, port=None, scheme='http'): 243 | if scheme == "https": 244 | return super(ProxyManager, self).connection_from_host( 245 | host, port, scheme) 246 | 247 | return super(ProxyManager, self).connection_from_host( 248 | self.proxy.host, self.proxy.port, self.proxy.scheme) 249 | 250 | def _set_proxy_headers(self, url, headers=None): 251 | """ 252 | Sets headers needed by proxies: specifically, the Accept and Host 253 | headers. Only sets headers not provided by the user. 254 | """ 255 | headers_ = {'Accept': '*/*'} 256 | 257 | netloc = parse_url(url).netloc 258 | if netloc: 259 | headers_['Host'] = netloc 260 | 261 | if headers: 262 | headers_.update(headers) 263 | return headers_ 264 | 265 | def urlopen(self, method, url, redirect=True, **kw): 266 | "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 267 | u = parse_url(url) 268 | 269 | if u.scheme == "http": 270 | # For proxied HTTPS requests, httplib sets the necessary headers 271 | # on the CONNECT to the proxy. For HTTP, we'll definitely 272 | # need to set 'Host' at the very least. 273 | headers = kw.get('headers', self.headers) 274 | kw['headers'] = self._set_proxy_headers(url, headers) 275 | 276 | return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 277 | 278 | 279 | def proxy_from_url(url, **kw): 280 | return ProxyManager(proxy_url=url, **kw) 281 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/request.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.parse import urlencode 3 | except ImportError: 4 | from urllib import urlencode 5 | 6 | from .filepost import encode_multipart_formdata 7 | 8 | 9 | __all__ = ['RequestMethods'] 10 | 11 | 12 | class RequestMethods(object): 13 | """ 14 | Convenience mixin for classes who implement a :meth:`urlopen` method, such 15 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and 16 | :class:`~urllib3.poolmanager.PoolManager`. 17 | 18 | Provides behavior for making common types of HTTP request methods and 19 | decides which type of request field encoding to use. 20 | 21 | Specifically, 22 | 23 | :meth:`.request_encode_url` is for sending requests whose fields are 24 | encoded in the URL (such as GET, HEAD, DELETE). 25 | 26 | :meth:`.request_encode_body` is for sending requests whose fields are 27 | encoded in the *body* of the request using multipart or www-form-urlencoded 28 | (such as for POST, PUT, PATCH). 29 | 30 | :meth:`.request` is for making any kind of request, it will look up the 31 | appropriate encoding format and use one of the above two methods to make 32 | the request. 33 | 34 | Initializer parameters: 35 | 36 | :param headers: 37 | Headers to include with all requests, unless other headers are given 38 | explicitly. 39 | """ 40 | 41 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) 42 | 43 | def __init__(self, headers=None): 44 | self.headers = headers or {} 45 | 46 | def urlopen(self, method, url, body=None, headers=None, 47 | encode_multipart=True, multipart_boundary=None, 48 | **kw): # Abstract 49 | raise NotImplemented("Classes extending RequestMethods must implement " 50 | "their own ``urlopen`` method.") 51 | 52 | def request(self, method, url, fields=None, headers=None, **urlopen_kw): 53 | """ 54 | Make a request using :meth:`urlopen` with the appropriate encoding of 55 | ``fields`` based on the ``method`` used. 56 | 57 | This is a convenience method that requires the least amount of manual 58 | effort. It can be used in most situations, while still having the 59 | option to drop down to more specific methods when necessary, such as 60 | :meth:`request_encode_url`, :meth:`request_encode_body`, 61 | or even the lowest level :meth:`urlopen`. 62 | """ 63 | method = method.upper() 64 | 65 | if method in self._encode_url_methods: 66 | return self.request_encode_url(method, url, fields=fields, 67 | headers=headers, 68 | **urlopen_kw) 69 | else: 70 | return self.request_encode_body(method, url, fields=fields, 71 | headers=headers, 72 | **urlopen_kw) 73 | 74 | def request_encode_url(self, method, url, fields=None, **urlopen_kw): 75 | """ 76 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 77 | the url. This is useful for request methods like GET, HEAD, DELETE, etc. 78 | """ 79 | if fields: 80 | url += '?' + urlencode(fields) 81 | return self.urlopen(method, url, **urlopen_kw) 82 | 83 | def request_encode_body(self, method, url, fields=None, headers=None, 84 | encode_multipart=True, multipart_boundary=None, 85 | **urlopen_kw): 86 | """ 87 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 88 | the body. This is useful for request methods like POST, PUT, PATCH, etc. 89 | 90 | When ``encode_multipart=True`` (default), then 91 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode 92 | the payload with the appropriate content type. Otherwise 93 | :meth:`urllib.urlencode` is used with the 94 | 'application/x-www-form-urlencoded' content type. 95 | 96 | Multipart encoding must be used when posting files, and it's reasonably 97 | safe to use it in other times too. However, it may break request 98 | signing, such as with OAuth. 99 | 100 | Supports an optional ``fields`` parameter of key/value strings AND 101 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where 102 | the MIME type is optional. For example:: 103 | 104 | fields = { 105 | 'foo': 'bar', 106 | 'fakefile': ('foofile.txt', 'contents of foofile'), 107 | 'realfile': ('barfile.txt', open('realfile').read()), 108 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 109 | 'image/jpeg'), 110 | 'nonamefile': 'contents of nonamefile field', 111 | } 112 | 113 | When uploading a file, providing a filename (the first parameter of the 114 | tuple) is optional but recommended to best mimick behavior of browsers. 115 | 116 | Note that if ``headers`` are supplied, the 'Content-Type' header will 117 | be overwritten because it depends on the dynamic random boundary string 118 | which is used to compose the body of the request. The random boundary 119 | string can be explicitly set with the ``multipart_boundary`` parameter. 120 | """ 121 | if headers is None: 122 | headers = self.headers 123 | 124 | extra_kw = {'headers': {}} 125 | 126 | if fields: 127 | if 'body' in urlopen_kw: 128 | raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') 129 | 130 | if encode_multipart: 131 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) 132 | else: 133 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' 134 | 135 | extra_kw['body'] = body 136 | extra_kw['headers'] = {'Content-Type': content_type} 137 | 138 | extra_kw['headers'].update(headers) 139 | extra_kw.update(urlopen_kw) 140 | 141 | return self.urlopen(method, url, **extra_kw) 142 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | # For backwards compatibility, provide imports that used to be here. 2 | from .connection import is_connection_dropped 3 | from .request import make_headers 4 | from .response import is_fp_closed 5 | from .ssl_ import ( 6 | SSLContext, 7 | HAS_SNI, 8 | assert_fingerprint, 9 | resolve_cert_reqs, 10 | resolve_ssl_version, 11 | ssl_wrap_socket, 12 | ) 13 | from .timeout import ( 14 | current_time, 15 | Timeout, 16 | ) 17 | 18 | from .retry import Retry 19 | from .url import ( 20 | get_host, 21 | parse_url, 22 | split_first, 23 | Url, 24 | ) 25 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | import socket 2 | try: 3 | from select import poll, POLLIN 4 | except ImportError: # `poll` doesn't exist on OSX and other platforms 5 | poll = False 6 | try: 7 | from select import select 8 | except ImportError: # `select` doesn't exist on AppEngine. 9 | select = False 10 | 11 | 12 | def is_connection_dropped(conn): # Platform-specific 13 | """ 14 | Returns True if the connection is dropped and should be closed. 15 | 16 | :param conn: 17 | :class:`httplib.HTTPConnection` object. 18 | 19 | Note: For platforms like AppEngine, this will always return ``False`` to 20 | let the platform handle connection recycling transparently for us. 21 | """ 22 | sock = getattr(conn, 'sock', False) 23 | if sock is False: # Platform-specific: AppEngine 24 | return False 25 | if sock is None: # Connection already closed (such as by httplib). 26 | return True 27 | 28 | if not poll: 29 | if not select: # Platform-specific: AppEngine 30 | return False 31 | 32 | try: 33 | return select([sock], [], [], 0.0)[0] 34 | except socket.error: 35 | return True 36 | 37 | # This version is better on platforms that support it. 38 | p = poll() 39 | p.register(sock, POLLIN) 40 | for (fno, ev) in p.poll(0.0): 41 | if fno == sock.fileno(): 42 | # Either data is buffered (bad), or the connection is dropped. 43 | return True 44 | 45 | 46 | # This function is copied from socket.py in the Python 2.7 standard 47 | # library test suite. Added to its signature is only `socket_options`. 48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 49 | source_address=None, socket_options=None): 50 | """Connect to *address* and return the socket object. 51 | 52 | Convenience function. Connect to *address* (a 2-tuple ``(host, 53 | port)``) and return the socket object. Passing the optional 54 | *timeout* parameter will set the timeout on the socket instance 55 | before attempting to connect. If no *timeout* is supplied, the 56 | global default timeout setting returned by :func:`getdefaulttimeout` 57 | is used. If *source_address* is set it must be a tuple of (host, port) 58 | for the socket to bind as a source address before making the connection. 59 | An host of '' or port 0 tells the OS to use the default. 60 | """ 61 | 62 | host, port = address 63 | err = None 64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 65 | af, socktype, proto, canonname, sa = res 66 | sock = None 67 | try: 68 | sock = socket.socket(af, socktype, proto) 69 | 70 | # If provided, set socket level options before connecting. 71 | # This is the only addition urllib3 makes to this function. 72 | _set_socket_options(sock, socket_options) 73 | 74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 75 | sock.settimeout(timeout) 76 | if source_address: 77 | sock.bind(source_address) 78 | sock.connect(sa) 79 | return sock 80 | 81 | except socket.error as _: 82 | err = _ 83 | if sock is not None: 84 | sock.close() 85 | sock = None 86 | 87 | if err is not None: 88 | raise err 89 | else: 90 | raise socket.error("getaddrinfo returns an empty list") 91 | 92 | 93 | def _set_socket_options(sock, options): 94 | if options is None: 95 | return 96 | 97 | for opt in options: 98 | sock.setsockopt(*opt) 99 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | from ..packages.six import b 4 | 5 | ACCEPT_ENCODING = 'gzip,deflate' 6 | 7 | 8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 10 | """ 11 | Shortcuts for generating request headers. 12 | 13 | :param keep_alive: 14 | If ``True``, adds 'connection: keep-alive' header. 15 | 16 | :param accept_encoding: 17 | Can be a boolean, list, or string. 18 | ``True`` translates to 'gzip,deflate'. 19 | List will get joined by comma. 20 | String will be used as provided. 21 | 22 | :param user_agent: 23 | String representing the user-agent you want, such as 24 | "python-urllib3/0.6" 25 | 26 | :param basic_auth: 27 | Colon-separated username:password string for 'authorization: basic ...' 28 | auth header. 29 | 30 | :param proxy_basic_auth: 31 | Colon-separated username:password string for 'proxy-authorization: basic ...' 32 | auth header. 33 | 34 | :param disable_cache: 35 | If ``True``, adds 'cache-control: no-cache' header. 36 | 37 | Example:: 38 | 39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 41 | >>> make_headers(accept_encoding=True) 42 | {'accept-encoding': 'gzip,deflate'} 43 | """ 44 | headers = {} 45 | if accept_encoding: 46 | if isinstance(accept_encoding, str): 47 | pass 48 | elif isinstance(accept_encoding, list): 49 | accept_encoding = ','.join(accept_encoding) 50 | else: 51 | accept_encoding = ACCEPT_ENCODING 52 | headers['accept-encoding'] = accept_encoding 53 | 54 | if user_agent: 55 | headers['user-agent'] = user_agent 56 | 57 | if keep_alive: 58 | headers['connection'] = 'keep-alive' 59 | 60 | if basic_auth: 61 | headers['authorization'] = 'Basic ' + \ 62 | b64encode(b(basic_auth)).decode('utf-8') 63 | 64 | if proxy_basic_auth: 65 | headers['proxy-authorization'] = 'Basic ' + \ 66 | b64encode(b(proxy_basic_auth)).decode('utf-8') 67 | 68 | if disable_cache: 69 | headers['cache-control'] = 'no-cache' 70 | 71 | return headers 72 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | def is_fp_closed(obj): 2 | """ 3 | Checks whether a given file-like object is closed. 4 | 5 | :param obj: 6 | The file-like object to check. 7 | """ 8 | 9 | try: 10 | # Check via the official file-like-object way. 11 | return obj.closed 12 | except AttributeError: 13 | pass 14 | 15 | try: 16 | # Check if the object is a container for another file-like object that 17 | # gets released on exhaustion (e.g. HTTPResponse). 18 | return obj.fp is None 19 | except AttributeError: 20 | pass 21 | 22 | raise ValueError("Unable to determine whether fp is closed.") 23 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/timeout.py: -------------------------------------------------------------------------------- 1 | # The default socket timeout, used by httplib to indicate that no timeout was 2 | # specified by the user 3 | from socket import _GLOBAL_DEFAULT_TIMEOUT 4 | import time 5 | 6 | from ..exceptions import TimeoutStateError 7 | 8 | # A sentinel value to indicate that no timeout was specified by the user in 9 | # urllib3 10 | _Default = object() 11 | 12 | def current_time(): 13 | """ 14 | Retrieve the current time. This function is mocked out in unit testing. 15 | """ 16 | return time.time() 17 | 18 | 19 | class Timeout(object): 20 | """ Timeout configuration. 21 | 22 | Timeouts can be defined as a default for a pool:: 23 | 24 | timeout = Timeout(connect=2.0, read=7.0) 25 | http = PoolManager(timeout=timeout) 26 | response = http.request('GET', 'http://example.com/') 27 | 28 | Or per-request (which overrides the default for the pool):: 29 | 30 | response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) 31 | 32 | Timeouts can be disabled by setting all the parameters to ``None``:: 33 | 34 | no_timeout = Timeout(connect=None, read=None) 35 | response = http.request('GET', 'http://example.com/, timeout=no_timeout) 36 | 37 | 38 | :param total: 39 | This combines the connect and read timeouts into one; the read timeout 40 | will be set to the time leftover from the connect attempt. In the 41 | event that both a connect timeout and a total are specified, or a read 42 | timeout and a total are specified, the shorter timeout will be applied. 43 | 44 | Defaults to None. 45 | 46 | :type total: integer, float, or None 47 | 48 | :param connect: 49 | The maximum amount of time to wait for a connection attempt to a server 50 | to succeed. Omitting the parameter will default the connect timeout to 51 | the system default, probably `the global default timeout in socket.py 52 | `_. 53 | None will set an infinite timeout for connection attempts. 54 | 55 | :type connect: integer, float, or None 56 | 57 | :param read: 58 | The maximum amount of time to wait between consecutive 59 | read operations for a response from the server. Omitting 60 | the parameter will default the read timeout to the system 61 | default, probably `the global default timeout in socket.py 62 | `_. 63 | None will set an infinite timeout. 64 | 65 | :type read: integer, float, or None 66 | 67 | .. note:: 68 | 69 | Many factors can affect the total amount of time for urllib3 to return 70 | an HTTP response. 71 | 72 | For example, Python's DNS resolver does not obey the timeout specified 73 | on the socket. Other factors that can affect total request time include 74 | high CPU load, high swap, the program running at a low priority level, 75 | or other behaviors. 76 | 77 | In addition, the read and total timeouts only measure the time between 78 | read operations on the socket connecting the client and the server, 79 | not the total amount of time for the request to return a complete 80 | response. For most requests, the timeout is raised because the server 81 | has not sent the first byte in the specified time. This is not always 82 | the case; if a server streams one byte every fifteen seconds, a timeout 83 | of 20 seconds will not trigger, even though the request will take 84 | several minutes to complete. 85 | 86 | If your goal is to cut off any request after a set amount of wall clock 87 | time, consider having a second "watcher" thread to cut off a slow 88 | request. 89 | """ 90 | 91 | #: A sentinel object representing the default timeout value 92 | DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT 93 | 94 | def __init__(self, total=None, connect=_Default, read=_Default): 95 | self._connect = self._validate_timeout(connect, 'connect') 96 | self._read = self._validate_timeout(read, 'read') 97 | self.total = self._validate_timeout(total, 'total') 98 | self._start_connect = None 99 | 100 | def __str__(self): 101 | return '%s(connect=%r, read=%r, total=%r)' % ( 102 | type(self).__name__, self._connect, self._read, self.total) 103 | 104 | @classmethod 105 | def _validate_timeout(cls, value, name): 106 | """ Check that a timeout attribute is valid. 107 | 108 | :param value: The timeout value to validate 109 | :param name: The name of the timeout attribute to validate. This is 110 | used to specify in error messages. 111 | :return: The validated and casted version of the given value. 112 | :raises ValueError: If the type is not an integer or a float, or if it 113 | is a numeric value less than zero. 114 | """ 115 | if value is _Default: 116 | return cls.DEFAULT_TIMEOUT 117 | 118 | if value is None or value is cls.DEFAULT_TIMEOUT: 119 | return value 120 | 121 | try: 122 | float(value) 123 | except (TypeError, ValueError): 124 | raise ValueError("Timeout value %s was %s, but it must be an " 125 | "int or float." % (name, value)) 126 | 127 | try: 128 | if value < 0: 129 | raise ValueError("Attempted to set %s timeout to %s, but the " 130 | "timeout cannot be set to a value less " 131 | "than 0." % (name, value)) 132 | except TypeError: # Python 3 133 | raise ValueError("Timeout value %s was %s, but it must be an " 134 | "int or float." % (name, value)) 135 | 136 | return value 137 | 138 | @classmethod 139 | def from_float(cls, timeout): 140 | """ Create a new Timeout from a legacy timeout value. 141 | 142 | The timeout value used by httplib.py sets the same timeout on the 143 | connect(), and recv() socket requests. This creates a :class:`Timeout` 144 | object that sets the individual timeouts to the ``timeout`` value 145 | passed to this function. 146 | 147 | :param timeout: The legacy timeout value. 148 | :type timeout: integer, float, sentinel default object, or None 149 | :return: Timeout object 150 | :rtype: :class:`Timeout` 151 | """ 152 | return Timeout(read=timeout, connect=timeout) 153 | 154 | def clone(self): 155 | """ Create a copy of the timeout object 156 | 157 | Timeout properties are stored per-pool but each request needs a fresh 158 | Timeout object to ensure each one has its own start/stop configured. 159 | 160 | :return: a copy of the timeout object 161 | :rtype: :class:`Timeout` 162 | """ 163 | # We can't use copy.deepcopy because that will also create a new object 164 | # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to 165 | # detect the user default. 166 | return Timeout(connect=self._connect, read=self._read, 167 | total=self.total) 168 | 169 | def start_connect(self): 170 | """ Start the timeout clock, used during a connect() attempt 171 | 172 | :raises urllib3.exceptions.TimeoutStateError: if you attempt 173 | to start a timer that has been started already. 174 | """ 175 | if self._start_connect is not None: 176 | raise TimeoutStateError("Timeout timer has already been started.") 177 | self._start_connect = current_time() 178 | return self._start_connect 179 | 180 | def get_connect_duration(self): 181 | """ Gets the time elapsed since the call to :meth:`start_connect`. 182 | 183 | :return: Elapsed time. 184 | :rtype: float 185 | :raises urllib3.exceptions.TimeoutStateError: if you attempt 186 | to get duration for a timer that hasn't been started. 187 | """ 188 | if self._start_connect is None: 189 | raise TimeoutStateError("Can't get connect duration for timer " 190 | "that has not started.") 191 | return current_time() - self._start_connect 192 | 193 | @property 194 | def connect_timeout(self): 195 | """ Get the value to use when setting a connection timeout. 196 | 197 | This will be a positive float or integer, the value None 198 | (never timeout), or the default system timeout. 199 | 200 | :return: Connect timeout. 201 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None 202 | """ 203 | if self.total is None: 204 | return self._connect 205 | 206 | if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: 207 | return self.total 208 | 209 | return min(self._connect, self.total) 210 | 211 | @property 212 | def read_timeout(self): 213 | """ Get the value for the read timeout. 214 | 215 | This assumes some time has elapsed in the connection timeout and 216 | computes the read timeout appropriately. 217 | 218 | If self.total is set, the read timeout is dependent on the amount of 219 | time taken by the connect timeout. If the connection time has not been 220 | established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be 221 | raised. 222 | 223 | :return: Value to use for the read timeout. 224 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None 225 | :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` 226 | has not yet been called on this object. 227 | """ 228 | if (self.total is not None and 229 | self.total is not self.DEFAULT_TIMEOUT and 230 | self._read is not None and 231 | self._read is not self.DEFAULT_TIMEOUT): 232 | # In case the connect timeout has not yet been established. 233 | if self._start_connect is None: 234 | return self._read 235 | return max(0, min(self.total - self.get_connect_duration(), 236 | self._read)) 237 | elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: 238 | return max(0, self.total - self.get_connect_duration()) 239 | else: 240 | return self._read 241 | -------------------------------------------------------------------------------- /myrequests/packages/urllib3/util/url.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from ..exceptions import LocationParseError 4 | 5 | 6 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] 7 | 8 | 9 | class Url(namedtuple('Url', url_attrs)): 10 | """ 11 | Datastructure for representing an HTTP URL. Used as a return value for 12 | :func:`parse_url`. 13 | """ 14 | slots = () 15 | 16 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, 17 | query=None, fragment=None): 18 | if path and not path.startswith('/'): 19 | path = '/' + path 20 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path, 21 | query, fragment) 22 | 23 | @property 24 | def hostname(self): 25 | """For backwards-compatibility with urlparse. We're nice like that.""" 26 | return self.host 27 | 28 | @property 29 | def request_uri(self): 30 | """Absolute path including the query string.""" 31 | uri = self.path or '/' 32 | 33 | if self.query is not None: 34 | uri += '?' + self.query 35 | 36 | return uri 37 | 38 | @property 39 | def netloc(self): 40 | """Network location including host and port""" 41 | if self.port: 42 | return '%s:%d' % (self.host, self.port) 43 | return self.host 44 | 45 | @property 46 | def url(self): 47 | """ 48 | Convert self into a url 49 | 50 | This function should more or less round-trip with :func:`.parse_url`. The 51 | returned url may not be exactly the same as the url inputted to 52 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls 53 | with a blank port will have : removed). 54 | 55 | Example: :: 56 | 57 | >>> U = parse_url('http://google.com/mail/') 58 | >>> U.url 59 | 'http://google.com/mail/' 60 | >>> Url('http', 'username:password', 'host.com', 80, 61 | ... '/path', 'query', 'fragment').url 62 | 'http://username:password@host.com:80/path?query#fragment' 63 | """ 64 | scheme, auth, host, port, path, query, fragment = self 65 | url = '' 66 | 67 | # We use "is not None" we want things to happen with empty strings (or 0 port) 68 | if scheme is not None: 69 | url += scheme + '://' 70 | if auth is not None: 71 | url += auth + '@' 72 | if host is not None: 73 | url += host 74 | if port is not None: 75 | url += ':' + str(port) 76 | if path is not None: 77 | url += path 78 | if query is not None: 79 | url += '?' + query 80 | if fragment is not None: 81 | url += '#' + fragment 82 | 83 | return url 84 | 85 | def __str__(self): 86 | return self.url 87 | 88 | def split_first(s, delims): 89 | """ 90 | Given a string and an iterable of delimiters, split on the first found 91 | delimiter. Return two split parts and the matched delimiter. 92 | 93 | If not found, then the first part is the full input string. 94 | 95 | Example:: 96 | 97 | >>> split_first('foo/bar?baz', '?/=') 98 | ('foo', 'bar?baz', '/') 99 | >>> split_first('foo/bar?baz', '123') 100 | ('foo/bar?baz', '', None) 101 | 102 | Scales linearly with number of delims. Not ideal for large number of delims. 103 | """ 104 | min_idx = None 105 | min_delim = None 106 | for d in delims: 107 | idx = s.find(d) 108 | if idx < 0: 109 | continue 110 | 111 | if min_idx is None or idx < min_idx: 112 | min_idx = idx 113 | min_delim = d 114 | 115 | if min_idx is None or min_idx < 0: 116 | return s, '', None 117 | 118 | return s[:min_idx], s[min_idx+1:], min_delim 119 | 120 | 121 | def parse_url(url): 122 | """ 123 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is 124 | performed to parse incomplete urls. Fields not provided will be None. 125 | 126 | Partly backwards-compatible with :mod:`urlparse`. 127 | 128 | Example:: 129 | 130 | >>> parse_url('http://google.com/mail/') 131 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...) 132 | >>> parse_url('google.com:80') 133 | Url(scheme=None, host='google.com', port=80, path=None, ...) 134 | >>> parse_url('/foo?bar') 135 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) 136 | """ 137 | 138 | # While this code has overlap with stdlib's urlparse, it is much 139 | # simplified for our needs and less annoying. 140 | # Additionally, this implementations does silly things to be optimal 141 | # on CPython. 142 | 143 | if not url: 144 | # Empty 145 | return Url() 146 | 147 | scheme = None 148 | auth = None 149 | host = None 150 | port = None 151 | path = None 152 | fragment = None 153 | query = None 154 | 155 | # Scheme 156 | if '://' in url: 157 | scheme, url = url.split('://', 1) 158 | 159 | # Find the earliest Authority Terminator 160 | # (http://tools.ietf.org/html/rfc3986#section-3.2) 161 | url, path_, delim = split_first(url, ['/', '?', '#']) 162 | 163 | if delim: 164 | # Reassemble the path 165 | path = delim + path_ 166 | 167 | # Auth 168 | if '@' in url: 169 | # Last '@' denotes end of auth part 170 | auth, url = url.rsplit('@', 1) 171 | 172 | # IPv6 173 | if url and url[0] == '[': 174 | host, url = url.split(']', 1) 175 | host += ']' 176 | 177 | # Port 178 | if ':' in url: 179 | _host, port = url.split(':', 1) 180 | 181 | if not host: 182 | host = _host 183 | 184 | if port: 185 | # If given, ports must be integers. 186 | if not port.isdigit(): 187 | raise LocationParseError(url) 188 | port = int(port) 189 | else: 190 | # Blank ports are cool, too. (rfc3986#section-3.2.3) 191 | port = None 192 | 193 | elif not host and url: 194 | host = url 195 | 196 | if not path: 197 | return Url(scheme, auth, host, port, path, query, fragment) 198 | 199 | # Fragment 200 | if '#' in path: 201 | path, fragment = path.split('#', 1) 202 | 203 | # Query 204 | if '?' in path: 205 | path, query = path.split('?', 1) 206 | 207 | return Url(scheme, auth, host, port, path, query, fragment) 208 | 209 | def get_host(url): 210 | """ 211 | Deprecated. Use :func:`.parse_url` instead. 212 | """ 213 | p = parse_url(url) 214 | return p.scheme or 'http', p.hostname, p.port 215 | -------------------------------------------------------------------------------- /myrequests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('permanent_redirect', 34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 35 | 36 | # Client Error. 37 | 400: ('bad_request', 'bad'), 38 | 401: ('unauthorized',), 39 | 402: ('payment_required', 'payment'), 40 | 403: ('forbidden',), 41 | 404: ('not_found', '-o-'), 42 | 405: ('method_not_allowed', 'not_allowed'), 43 | 406: ('not_acceptable',), 44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 45 | 408: ('request_timeout', 'timeout'), 46 | 409: ('conflict',), 47 | 410: ('gone',), 48 | 411: ('length_required',), 49 | 412: ('precondition_failed', 'precondition'), 50 | 413: ('request_entity_too_large',), 51 | 414: ('request_uri_too_large',), 52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 54 | 417: ('expectation_failed',), 55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 56 | 422: ('unprocessable_entity', 'unprocessable'), 57 | 423: ('locked',), 58 | 424: ('failed_dependency', 'dependency'), 59 | 425: ('unordered_collection', 'unordered'), 60 | 426: ('upgrade_required', 'upgrade'), 61 | 428: ('precondition_required', 'precondition'), 62 | 429: ('too_many_requests', 'too_many'), 63 | 431: ('header_fields_too_large', 'fields_too_large'), 64 | 444: ('no_response', 'none'), 65 | 449: ('retry_with', 'retry'), 66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 68 | 499: ('client_closed_request',), 69 | 70 | # Server Error. 71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 72 | 501: ('not_implemented',), 73 | 502: ('bad_gateway',), 74 | 503: ('service_unavailable', 'unavailable'), 75 | 504: ('gateway_timeout',), 76 | 505: ('http_version_not_supported', 'http_version'), 77 | 506: ('variant_also_negotiates',), 78 | 507: ('insufficient_storage',), 79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 80 | 510: ('not_extended',), 81 | } 82 | 83 | codes = LookupDict(name='status_codes') 84 | 85 | for (code, titles) in list(_codes.items()): 86 | for title in titles: 87 | setattr(codes, title, code) 88 | if not title.startswith('\\'): 89 | setattr(codes, title.upper(), code) 90 | -------------------------------------------------------------------------------- /myrequests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | 9 | """ 10 | 11 | import collections 12 | 13 | 14 | class CaseInsensitiveDict(collections.MutableMapping): 15 | """ 16 | A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | 41 | """ 42 | def __init__(self, data=None, **kwargs): 43 | self._store = dict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, collections.Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | class LookupDict(dict): 89 | """Dictionary lookup object.""" 90 | 91 | def __init__(self, name=None): 92 | self.name = name 93 | super(LookupDict, self).__init__() 94 | 95 | def __repr__(self): 96 | return '' % (self.name) 97 | 98 | def __getitem__(self, key): 99 | # We allow fall-through here, so values default to None 100 | 101 | return self.__dict__.get(key, None) 102 | 103 | def get(self, key, default=None): 104 | return self.__dict__.get(key, default) 105 | -------------------------------------------------------------------------------- /providers.txt: -------------------------------------------------------------------------------- 1 | http://rx14.co.uk/ipfs/ 2 | https://ipfs.io/ipfs/ 3 | https://xmine128.tk/ipfs/ 4 | https://upload.global/ipfs/ 5 | https://ipfs.jes.xxx/ipfs/ 6 | https://siderus.io/ipfs/ 7 | -------------------------------------------------------------------------------- /pyhp_server.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import http.server 4 | import urllib.parse as urlparse 5 | from io import StringIO 6 | from socketserver import ThreadingMixIn 7 | 8 | PATH = "web" 9 | write_header = """__out='' 10 | def write(text): 11 | global __out 12 | __out += text 13 | 14 | """ 15 | context = {'__out': ''} 16 | 17 | 18 | mypath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "lib") 19 | sys.path.append(mypath) 20 | 21 | 22 | class ThreadingHTTPServer(ThreadingMixIn, http.server.HTTPServer): 23 | pass 24 | 25 | 26 | class Handler(http.server.BaseHTTPRequestHandler): 27 | 28 | def handle_error(self, code, message): 29 | self.send_response(code) 30 | self.wfile.write(bytes(""" 31 | 32 | 33 | %d : %s 34 | 35 | 36 |

%d : %s

37 | 38 | 39 | """ % (code, message, code, message), "UTF-8")) 40 | 41 | def do_GET(self): 42 | global PATH, context 43 | self.do_POST() 44 | 45 | def do_POST(self): 46 | global PATH, context 47 | mypath = self.path.split('?', 1) 48 | if mypath[0] == "/": 49 | mypath[0] = "/index.pyhp" 50 | filename = PATH + mypath[0] 51 | print(filename) 52 | data = "" 53 | args = {} 54 | if 'Content-Length' in self.headers.keys(): 55 | length = int(self.headers['Content-Length']) 56 | args = urlparse.parse_qs(self.rfile.read(length).decode('utf-8')) 57 | elif len(mypath) > 1: 58 | args = urlparse.parse_qs(mypath[1]) 59 | try: 60 | with open(filename, "r") as fp: 61 | data = fp.read() 62 | except Exception: 63 | return self.handle_error(404, "file %s not found" % filename) 64 | self.send_response(200) 65 | #self.send_header("Content-type", "text/html") 66 | self.end_headers() 67 | context['args'] = args 68 | self.wfile.write(bytes(parse_file(data, context),"UTF-8")) 69 | 70 | 71 | def run_while_true(port=8080, server_class=ThreadingHTTPServer, 72 | handler_class=Handler): 73 | """ 74 | This assumes that keep_running() is a function of no arguments which 75 | is tested initially and after each request. If its return value 76 | is true, the server continues. 77 | """ 78 | server_address = ('', port) 79 | httpd = server_class(server_address, handler_class) 80 | while True: 81 | httpd.handle_request() 82 | 83 | 84 | def parse_file(text, context): 85 | i = 0 86 | mode = "html" 87 | open_index = -1 88 | while(i < len(text)): 89 | if mode == "html": 90 | if text[i] == "<": 91 | if text[i + 1] == "?": 92 | i = i + 1 93 | mode = "pyhp" 94 | open_index = i + 1 95 | if mode == "pyhp": 96 | if text[i] == "?": 97 | if text[i + 1] == ">": 98 | # print text[open_index:i] 99 | ret = compile(write_header + text[open_index:i], "", "exec") 100 | context['out'] = "" 101 | try: 102 | exec(ret, context, {}) 103 | except Exception as E: 104 | return str(E) 105 | text = text[:open_index - 2] + context['__out'] + text[i + 2:] 106 | return parse_file(text, context) 107 | i = i + 1 108 | return text 109 | 110 | 111 | if __name__ == "__main__": 112 | if sys.argv[1:]: 113 | port = int(sys.argv[1]) 114 | else: 115 | port = 8000 116 | run_while_true(port) 117 | -------------------------------------------------------------------------------- /pymultihash/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .pyMultiHash import * 3 | -------------------------------------------------------------------------------- /pymultihash/base58.py: -------------------------------------------------------------------------------- 1 | """ base58 encoding / decoding functions """ 2 | """Shamelessly stolen from https://gist.github.com/ianoxley/865912""" 3 | 4 | alphabet = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz' 5 | base_count = len(alphabet) 6 | 7 | def encode(num): 8 | """ Returns num in a base58-encoded string """ 9 | encode = '' 10 | 11 | if (num < 0): 12 | return '' 13 | 14 | while (num >= base_count): 15 | mod = num % base_count 16 | encode = alphabet[mod] + encode 17 | num = num // base_count 18 | 19 | if (num): 20 | encode = alphabet[num] + encode 21 | 22 | return encode 23 | 24 | def decode(s): 25 | """ Decodes the base58-encoded string s into an integer """ 26 | decoded = 0 27 | multi = 1 28 | s = s[::-1] 29 | for char in s: 30 | decoded += multi * alphabet.index(char) 31 | multi = multi * base_count 32 | 33 | return decoded 34 | -------------------------------------------------------------------------------- /pymultihash/pyMultiHash.py: -------------------------------------------------------------------------------- 1 | """ 2 | pyMultihash is a python implementation of the Multihash standard: https://github.com/jbenet/multihash 3 | 4 | """ 5 | 6 | import hashlib 7 | from . import base58 8 | import binascii 9 | 10 | """ 11 | These first two methods are kinda inefficient, but python is not really designed to mess with bytes 12 | """ 13 | def int_to_byte_array(big_int): 14 | array = [] 15 | while big_int > 1: 16 | array.append(big_int%256) 17 | big_int = big_int // 256 18 | return array[::-1] 19 | 20 | def bytes_to_long(bytestr): 21 | assert(len(bytestr)>0) 22 | thing = bytes(bytestr) 23 | return int( binascii.hexlify(thing), 16) 24 | 25 | 26 | """ 27 | the main event! 28 | """ 29 | def parseHash(hashstr): 30 | hashint = base58.decode(hashstr) 31 | hashbytes = int_to_byte_array(hashint) 32 | if len(hashbytes) < 3: 33 | raise Exception("Multihash must be at least 3 bytes") 34 | 35 | hash_func_id = hashbytes[0] 36 | hash_length = int(hashbytes[1]) 37 | hash_contents = hashbytes[2:hash_length+2] 38 | 39 | return bytes_to_long(hash_contents) 40 | 41 | def genHash(bytestr,func_id): 42 | hashfunc = None 43 | if func_id == 0x11: 44 | #function is sha1 45 | hashfunc = hashlib.sha1() 46 | elif func_id == 0x12: 47 | #function is sha256 48 | hashfunc = hashlib.sha256() 49 | elif func_id == 0x13: 50 | #function is sha512 51 | hashfunc = hashlib.sha512() 52 | else: 53 | raise Exception("Requested hash is not supported") 54 | bytestr = bytes(bytestr,"UTF-8") 55 | hashfunc.update(bytestr) 56 | data = hashfunc.digest() 57 | size = hashfunc.digest_size 58 | bytestr = b''+func_id.to_bytes(1,"big")+size.to_bytes(1,"big")+data 59 | return base58.encode(bytes_to_long(bytestr)) 60 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | CacheWarmer 2 | ======================= 3 | 4 | CacheWarmer is a fast and hacky solution to allow people to donate ipfs gateways to cache other people's content. 5 | 6 | It practically just wgets the requested hash on a list of addresses (and aborts to avoid getting your file gumming up my ram) 7 | 8 | If you want to donate your public ipfs gateway, make a PR adding it to providers.txt 9 | 10 | If you want to run your own instance of cachewarmer (you will have to manually pull to get updates), it should only require python3. 11 | 12 | After cloning the repo, in a screen session run: 13 | 14 | ``` 15 | python3 pyhp_server.py 8001 16 | 17 | ``` 18 | -------------------------------------------------------------------------------- /web/cacheit.pyhp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
    6 | 5: 15 | selection = random.sample(lines,5) 16 | else: 17 | selection = lines 18 | for l in selection: 19 | l = l.rstrip() 20 | r = None 21 | try: 22 | print("polling",l) 23 | r = requests.get(l+addr, timeout=(0.5, .5)) 24 | except Exception: 25 | pass 26 | print("done polling",l) 27 | write("
  • "+l+addr+" has been polled
  • ") 28 | 29 | if "indexit" in args.keys() and args["indexit"][0]=="on": 30 | import indexit 31 | import json 32 | bloom = indexit.indexFile(addr) 33 | 34 | if bloom: 35 | index = {} 36 | with open("index.json","r") as fp: 37 | index = json.load(fp) 38 | with open("index.json","w") as fp: 39 | if str(bloom) not in index.keys(): 40 | index[str(bloom)]=addr 41 | json.dump(index,fp) 42 | write("

    %s has been indexed with filter %d using %d/256 ones

    "%( addr, bloom, indexit.onecount(bloom))) 43 | else: 44 | print("bloom was none") 45 | 46 | 47 | ?> 48 | 49 |
50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /web/css/main.css: -------------------------------------------------------------------------------- 1 | .content 2 | { 3 | min-width: 500px; 4 | } 5 | 6 | .window_wrapper 7 | { 8 | width: 800px; 9 | height: 420px; 10 | background-color: rgba(255,255,255,0.92); 11 | border: 2px solid #457BF7; 12 | border-radius: 5px; 13 | position: absolute; 14 | left: 0px; 15 | right: 0px; 16 | top: 0px; 17 | bottom: 0px; 18 | margin: auto; 19 | overflow: hidden; 20 | } 21 | -------------------------------------------------------------------------------- /web/css/main.css~: -------------------------------------------------------------------------------- 1 | .content 2 | { 3 | min-width: 500px; 4 | } 5 | 6 | .window_wrapper 7 | { 8 | width: 800px; 9 | height: 420px; 10 | background-color: rgba(255,255,255,0.92); 11 | border: 2px solid #457BF7; 12 | border-radius: 5px; 13 | position: absolute; 14 | left: 0px; 15 | right: 0px; 16 | top: 0px; 17 | bottom: 0px; 18 | margin: auto; 19 | overflow: hidden; 20 | } 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /web/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Pin Box

4 |

5 | This site offers a simple way to donate ipfs caching to otherusers. 6 | Feel free to add your own public gateway by creating an issue or pull request at the 7 | Github Repo 8 | 9 |

10 | 11 |
12 | IPFS Hash:
13 | 14 | 15 |
16 |

17 | It is important to note, that this essentially just sends a request to each of the donated gateways. 18 | The owners of the gateways have donated thier resources, and may not be reliable. 19 | In fact I'm not going to promise that any of this works. 20 | 21 |

22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /web/index.pyhp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 8 | 9 | 10 | 11 | 12 | 13 |

CacheWarmer

14 |

15 | This site offers a simple way to donate ipfs caching to otherusers. 16 | Feel free to add your own public gateway by creating an issue or pull request at the 17 | Github Repo 18 | 19 |

20 | 21 |

22 | Current Providers are: 23 |

    24 | "+l+"") 28 | 29 | ?> 30 |
31 | 32 |

33 | 34 |
35 | IPFS Hash: 36 | 37 |
38 | Index it: 39 |
40 | 41 |
42 |

43 | It is important to note, that this essentially just sends a request to each of the donated gateways. 44 | The owners of the gateways have donated thier resources, and may not be reliable. 45 | In fact I'm not going to promise that any of this works. 46 | 47 |

48 | 49 |

Experimental Search:

50 |
51 | Search Terms 52 | 53 |
54 | 55 |
56 |

**Note that this is kinda experimental, uses bloom filters, and has false positives

57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /web/js/live_bg.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function() { 2 | init(); 3 | //$('.bg_canvas').css('display', 'none'); 4 | }); 5 | 6 | var rad = 250; //minimal radius 7 | var trad = 550; //maximal radius 8 | var pts = []; //points array 9 | var c; //canvas 10 | var $$; //context 11 | 12 | var cww = window.innerWidth; 13 | var cwh = window.innerHeight; 14 | 15 | var offsetX = cww / 2; //x offset 16 | var offsetY = cwh / 2; //y offset 17 | var max = 65; //max object parts 18 | 19 | function init() { 20 | c = document.getElementById('bg_canvas'); 21 | //c.height = window.innerHeight; 22 | 23 | $$ = c.getContext('2d'); 24 | var angle = 0; 25 | var speed = 0; 26 | var dist = 0; 27 | for (var i = 0; i < 120; ++i) { 28 | angle = Math.random() * Math.PI * 2; 29 | speed = Math.random() * 2; 30 | dist = Math.random() * rad; 31 | pts.push({ 32 | x:Math.sin(angle) * dist, 33 | y:Math.cos(angle) * dist, 34 | incx:Math.sin(angle) * speed, 35 | incy:Math.cos(angle) * speed, 36 | speed:speed 37 | }); 38 | } 39 | draw(); 40 | } 41 | 42 | function In() { 43 | trad = 160; 44 | } 45 | 46 | function Out() { 47 | trad = 300; 48 | } 49 | 50 | function draw() { 51 | rad += (trad - rad) * .01; 52 | 53 | var i = 0; 54 | var j = 0; 55 | var l = pts.length; 56 | var part = null; 57 | var part2 = null; 58 | var dx = 0; 59 | var dy = 0; 60 | //canvasWidth = window.innerWidth; 61 | //canvasHeight = window.innerHeight; 62 | 63 | var canvasWidth = 1920; 64 | var canvasHeight = 1080; 65 | 66 | $$.fillStyle = "rgba(255, 255, 255, 1)"; 67 | $$.fillRect(0, 0, canvasHeight * 2, canvasWidth * 3); 68 | $$.strokeStyle = 'rgba(15, 91, 214, 1)'; 69 | 70 | for (i = 0; i < l; ++i) { 71 | part = pts[i]; 72 | for (j = i + 1; j < l; ++j) { 73 | part2 = pts[j]; 74 | dx = part.x - part2.x; 75 | dy = part.y - part2.y; 76 | var dif = Math.sqrt(dx * dx + dy * dy); 77 | if (dif < max) { 78 | $$.lineWidth = (max - dif) * 0.05; 79 | $$.beginPath(); 80 | $$.moveTo(offsetX + part.x * 2, offsetY + part.y * 2); 81 | $$.lineTo(offsetX + part2.x * 2, offsetY + part2.y * 2); 82 | $$.stroke(); 83 | } 84 | } 85 | 86 | // move current obj 87 | part.x += part.incx; 88 | part.y += part.incy; 89 | 90 | var ptDist = Math.sqrt((part.x * part.x) + (part.y * part.y)) 91 | if (ptDist > rad) { 92 | var mp = ( 1 / ptDist ) * 100; 93 | part.x = -part.x * mp; 94 | part.y = -part.y * mp; 95 | part.incx = (Math.random() - 0.5) * part.speed; 96 | part.incy = (Math.random() - 0.5) * part.speed; 97 | } 98 | } 99 | requestAnimFrame(draw); 100 | } 101 | 102 | window.requestAnimFrame = (function() { 103 | return window.requestAnimationFrame || 104 | window.webkitRequestAnimationFrame || 105 | window.mozRequestAnimationFrame || 106 | window.oRequestAnimationFrame || 107 | window.msRequestAnimationFrame || 108 | function(callback, element) { 109 | window.setTimeout(callback, 2000 / 60); 110 | }; 111 | })(); 112 | -------------------------------------------------------------------------------- /web/searchit.pyhp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Results: 6 |
    7 | 0): 25 | for res in output: 26 | write("""
  • %s
  • """%(res,res)) 27 | else: 28 | write("

    Sorry, no hits

    ") 29 | 30 | 31 | ?> 32 | 33 |
34 | 35 | 36 | 37 | --------------------------------------------------------------------------------