├── .gitignore
├── index.json
├── indexit.py
├── myrequests
├── __init__.py
├── adapters.py
├── api.py
├── auth.py
├── cacert.pem
├── certs.py
├── compat.py
├── cookies.py
├── exceptions.py
├── hooks.py
├── models.py
├── packages
│ ├── README.rst
│ ├── __init__.py
│ ├── chardet
│ │ ├── __init__.py
│ │ ├── big5freq.py
│ │ ├── big5prober.py
│ │ ├── chardetect.py
│ │ ├── chardistribution.py
│ │ ├── charsetgroupprober.py
│ │ ├── charsetprober.py
│ │ ├── codingstatemachine.py
│ │ ├── compat.py
│ │ ├── constants.py
│ │ ├── cp949prober.py
│ │ ├── escprober.py
│ │ ├── escsm.py
│ │ ├── eucjpprober.py
│ │ ├── euckrfreq.py
│ │ ├── euckrprober.py
│ │ ├── euctwfreq.py
│ │ ├── euctwprober.py
│ │ ├── gb2312freq.py
│ │ ├── gb2312prober.py
│ │ ├── hebrewprober.py
│ │ ├── jisfreq.py
│ │ ├── jpcntx.py
│ │ ├── langbulgarianmodel.py
│ │ ├── langcyrillicmodel.py
│ │ ├── langgreekmodel.py
│ │ ├── langhebrewmodel.py
│ │ ├── langhungarianmodel.py
│ │ ├── langthaimodel.py
│ │ ├── latin1prober.py
│ │ ├── mbcharsetprober.py
│ │ ├── mbcsgroupprober.py
│ │ ├── mbcssm.py
│ │ ├── sbcharsetprober.py
│ │ ├── sbcsgroupprober.py
│ │ ├── sjisprober.py
│ │ ├── universaldetector.py
│ │ └── utf8prober.py
│ └── urllib3
│ │ ├── __init__.py
│ │ ├── _collections.py
│ │ ├── connection.py
│ │ ├── connectionpool.py
│ │ ├── connectionpool.py~
│ │ ├── contrib
│ │ ├── __init__.py
│ │ ├── ntlmpool.py
│ │ └── pyopenssl.py
│ │ ├── exceptions.py
│ │ ├── fields.py
│ │ ├── filepost.py
│ │ ├── packages
│ │ ├── __init__.py
│ │ ├── ordered_dict.py
│ │ ├── six.py
│ │ └── ssl_match_hostname
│ │ │ ├── __init__.py
│ │ │ └── _implementation.py
│ │ ├── poolmanager.py
│ │ ├── request.py
│ │ ├── response.py
│ │ └── util
│ │ ├── __init__.py
│ │ ├── connection.py
│ │ ├── request.py
│ │ ├── response.py
│ │ ├── retry.py
│ │ ├── ssl_.py
│ │ ├── timeout.py
│ │ └── url.py
├── sessions.py
├── status_codes.py
├── structures.py
└── utils.py
├── providers.txt
├── pyhp_server.py
├── pymultihash
├── __init__.py
├── base58.py
└── pyMultiHash.py
├── readme.md
└── web
├── cacheit.pyhp
├── css
├── main.css
└── main.css~
├── index.html
├── index.pyhp
├── js
└── live_bg.js
└── searchit.pyhp
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/*
2 | *.pyc
3 |
--------------------------------------------------------------------------------
/index.json:
--------------------------------------------------------------------------------
1 | {"247940973485701776233985278752134197441944278805763437625145466167693730995444243438226614686605966479783911911996581878271923915637803741338416641610741239559262819947011238740134130313980511122350382048892227490289890084566802467610334359632997812141965131758284948715859770985812894985921710465263132240674766285595154613536524252194884890217922002050210870166744686477405957132438088180508354900132687284119912120786878482238512930053732296817721373564454808178461189879135868479116859955661811119507610265849435135932338825491685587634584796718256593241893564973945616470443599542612899069648352258652151810380975563158421876455745052846118094939082635782441474986264995056655326928252930161911709570059183186718288712625942616127340986835098518340902113509786553902": "QmUvYGTjMKarxnjg7Gg9AV8dF2eHBbgqVvtDdaL7ujm62Y"}
--------------------------------------------------------------------------------
/indexit.py:
--------------------------------------------------------------------------------
1 | import pymultihash as pmh
2 | import re
3 | from bs4 import BeautifulSoup
4 | import myrequests as requests
5 | import base64
6 |
7 | IPFSGateway = "http://blamestross.com/ipfs/"
8 |
9 | INDEX_PATH = "index.json"
10 |
11 |
12 | def onecount(bloomint):
13 | count = 0
14 | while bloomint>0:
15 | count += bloomint % 2
16 | bloomint //= 2
17 | return count
18 |
19 |
20 | def generateBloomFilter(wordlist):
21 | f = 0
22 | j = 0
23 | for w in wordlist:
24 | hashInt = 0
25 | hashVal = pmh.genHash(w, 0x12)
26 | for i in range(0, 10):
27 | try:
28 | tmpInt = 2**256-1
29 | for j in range(0,10):
30 | tmpInt &= pmh.parseHash(hashVal)
31 | hashVal = pmh.genHash(hashVal, 0x12)
32 | hashInt = (hashInt << 256) | tmpInt
33 |
34 | except Exception as e:
35 | print("error ",e)
36 | print(hashVal, w, i, j, len(wordlist))
37 | hashVal = pmh.genHash(hashVal, 0x12)
38 | f |= hashInt
39 |
40 | j += 1
41 | return f
42 |
43 |
44 | def wordInFilter(bloomInt, testWord):
45 | hashVal = pmh.genHash(testWord, 0x12)
46 | hashInt = pmh.parseHash(hashVal)
47 | return (bloomInt & hashInt) == hashInt
48 |
49 |
50 | def filterInFilter(bloomInt, testInt):
51 | return (bloomInt & testInt) == testInt
52 |
53 |
54 | def tokenizeHTML(html):
55 | raw = BeautifulSoup(html, 'html.parser').get_text()
56 | wordlist = map(lambda x: x.strip().lower(), re.split(r'[ \n\t]', raw))
57 | longlist = filter(lambda x: len(x) > 1, wordlist)
58 | return list(set(longlist))
59 |
60 |
61 | def indexFile(IPFSHash):
62 | path = IPFSGateway+IPFSHash
63 | req = requests.get(path)
64 | print("got request")
65 | rawText = req.text
66 | bloom = generateBloomFilter(tokenizeHTML(rawText))
67 | return bloom
68 |
--------------------------------------------------------------------------------
/myrequests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # __
4 | # /__) _ _ _ _ _/ _
5 | # / ( (- (/ (/ (- _) / _)
6 | # /
7 |
8 | """
9 | Requests HTTP library
10 | ~~~~~~~~~~~~~~~~~~~~~
11 |
12 | Requests is an HTTP library, written in Python, for human beings. Basic GET
13 | usage:
14 |
15 | >>> import requests
16 | >>> r = requests.get('https://www.python.org')
17 | >>> r.status_code
18 | 200
19 | >>> 'Python is a programming language' in r.content
20 | True
21 |
22 | ... or POST:
23 |
24 | >>> payload = dict(key1='value1', key2='value2')
25 | >>> r = requests.post('http://httpbin.org/post', data=payload)
26 | >>> print(r.text)
27 | {
28 | ...
29 | "form": {
30 | "key2": "value2",
31 | "key1": "value1"
32 | },
33 | ...
34 | }
35 |
36 | The other HTTP methods are supported - see `requests.api`. Full documentation
37 | is at .
38 |
39 | :copyright: (c) 2015 by Kenneth Reitz.
40 | :license: Apache 2.0, see LICENSE for more details.
41 |
42 | """
43 |
44 | __title__ = 'requests'
45 | __version__ = '2.7.0'
46 | __build__ = 0x020700
47 | __author__ = 'Kenneth Reitz'
48 | __license__ = 'Apache 2.0'
49 | __copyright__ = 'Copyright 2015 Kenneth Reitz'
50 |
51 | # Attempt to enable urllib3's SNI support, if possible
52 | try:
53 | from .packages.urllib3.contrib import pyopenssl
54 | pyopenssl.inject_into_urllib3()
55 | except ImportError:
56 | pass
57 |
58 | from . import utils
59 | from .models import Request, Response, PreparedRequest
60 | from .api import request, get, head, post, patch, put, delete, options
61 | from .sessions import session, Session
62 | from .status_codes import codes
63 | from .exceptions import (
64 | RequestException, Timeout, URLRequired,
65 | TooManyRedirects, HTTPError, ConnectionError
66 | )
67 |
68 | # Set default logging handler to avoid "No handler found" warnings.
69 | import logging
70 | try: # Python 2.7+
71 | from logging import NullHandler
72 | except ImportError:
73 | class NullHandler(logging.Handler):
74 | def emit(self, record):
75 | pass
76 |
77 | logging.getLogger(__name__).addHandler(NullHandler())
78 |
--------------------------------------------------------------------------------
/myrequests/api.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.api
5 | ~~~~~~~~~~~~
6 |
7 | This module implements the Requests API.
8 |
9 | :copyright: (c) 2012 by Kenneth Reitz.
10 | :license: Apache2, see LICENSE for more details.
11 |
12 | """
13 |
14 | from . import sessions
15 |
16 |
17 | def request(method, url, **kwargs):
18 | """Constructs and sends a :class:`Request `.
19 |
20 | :param method: method for the new :class:`Request` object.
21 | :param url: URL for the new :class:`Request` object.
22 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
23 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
24 | :param json: (optional) json data to send in the body of the :class:`Request`.
25 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
26 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
27 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload.
28 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
29 | :param timeout: (optional) How long to wait for the server to send data
30 | before giving up, as a float, or a (`connect timeout, read timeout
31 | `_) tuple.
32 | :type timeout: float or tuple
33 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
34 | :type allow_redirects: bool
35 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
36 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.
37 | :param stream: (optional) if ``False``, the response content will be immediately downloaded.
38 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
39 | :return: :class:`Response ` object
40 | :rtype: requests.Response
41 |
42 | Usage::
43 |
44 | >>> import requests
45 | >>> req = requests.request('GET', 'http://httpbin.org/get')
46 |
47 | """
48 |
49 | session = sessions.Session()
50 | response = session.request(method=method, url=url, **kwargs)
51 | # By explicitly closing the session, we avoid leaving sockets open which
52 | # can trigger a ResourceWarning in some cases, and look like a memory leak
53 | # in others.
54 | session.close()
55 | return response
56 |
57 |
58 | def get(url, params=None, **kwargs):
59 | """Sends a GET request.
60 |
61 | :param url: URL for the new :class:`Request` object.
62 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
63 | :param \*\*kwargs: Optional arguments that ``request`` takes.
64 | :return: :class:`Response ` object
65 | :rtype: requests.Response
66 | """
67 |
68 | kwargs.setdefault('allow_redirects', True)
69 | return request('get', url, params=params, **kwargs)
70 |
71 |
72 | def options(url, **kwargs):
73 | """Sends a OPTIONS request.
74 |
75 | :param url: URL for the new :class:`Request` object.
76 | :param \*\*kwargs: Optional arguments that ``request`` takes.
77 | :return: :class:`Response ` object
78 | :rtype: requests.Response
79 | """
80 |
81 | kwargs.setdefault('allow_redirects', True)
82 | return request('options', url, **kwargs)
83 |
84 |
85 | def head(url, **kwargs):
86 | """Sends a HEAD request.
87 |
88 | :param url: URL for the new :class:`Request` object.
89 | :param \*\*kwargs: Optional arguments that ``request`` takes.
90 | :return: :class:`Response ` object
91 | :rtype: requests.Response
92 | """
93 |
94 | kwargs.setdefault('allow_redirects', False)
95 | return request('head', url, **kwargs)
96 |
97 |
98 | def post(url, data=None, json=None, **kwargs):
99 | """Sends a POST request.
100 |
101 | :param url: URL for the new :class:`Request` object.
102 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
103 | :param json: (optional) json data to send in the body of the :class:`Request`.
104 | :param \*\*kwargs: Optional arguments that ``request`` takes.
105 | :return: :class:`Response ` object
106 | :rtype: requests.Response
107 | """
108 |
109 | return request('post', url, data=data, json=json, **kwargs)
110 |
111 |
112 | def put(url, data=None, **kwargs):
113 | """Sends a PUT request.
114 |
115 | :param url: URL for the new :class:`Request` object.
116 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
117 | :param \*\*kwargs: Optional arguments that ``request`` takes.
118 | :return: :class:`Response ` object
119 | :rtype: requests.Response
120 | """
121 |
122 | return request('put', url, data=data, **kwargs)
123 |
124 |
125 | def patch(url, data=None, **kwargs):
126 | """Sends a PATCH request.
127 |
128 | :param url: URL for the new :class:`Request` object.
129 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
130 | :param \*\*kwargs: Optional arguments that ``request`` takes.
131 | :return: :class:`Response ` object
132 | :rtype: requests.Response
133 | """
134 |
135 | return request('patch', url, data=data, **kwargs)
136 |
137 |
138 | def delete(url, **kwargs):
139 | """Sends a DELETE request.
140 |
141 | :param url: URL for the new :class:`Request` object.
142 | :param \*\*kwargs: Optional arguments that ``request`` takes.
143 | :return: :class:`Response ` object
144 | :rtype: requests.Response
145 | """
146 |
147 | return request('delete', url, **kwargs)
148 |
--------------------------------------------------------------------------------
/myrequests/auth.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.auth
5 | ~~~~~~~~~~~~~
6 |
7 | This module contains the authentication handlers for Requests.
8 | """
9 |
10 | import os
11 | import re
12 | import time
13 | import hashlib
14 |
15 | from base64 import b64encode
16 |
17 | from .compat import urlparse, str
18 | from .cookies import extract_cookies_to_jar
19 | from .utils import parse_dict_header, to_native_string
20 | from .status_codes import codes
21 |
22 | CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
23 | CONTENT_TYPE_MULTI_PART = 'multipart/form-data'
24 |
25 |
26 | def _basic_auth_str(username, password):
27 | """Returns a Basic Auth string."""
28 |
29 | authstr = 'Basic ' + to_native_string(
30 | b64encode(('%s:%s' % (username, password)).encode('latin1')).strip()
31 | )
32 |
33 | return authstr
34 |
35 |
36 | class AuthBase(object):
37 | """Base class that all auth implementations derive from"""
38 |
39 | def __call__(self, r):
40 | raise NotImplementedError('Auth hooks must be callable.')
41 |
42 |
43 | class HTTPBasicAuth(AuthBase):
44 | """Attaches HTTP Basic Authentication to the given Request object."""
45 | def __init__(self, username, password):
46 | self.username = username
47 | self.password = password
48 |
49 | def __call__(self, r):
50 | r.headers['Authorization'] = _basic_auth_str(self.username, self.password)
51 | return r
52 |
53 |
54 | class HTTPProxyAuth(HTTPBasicAuth):
55 | """Attaches HTTP Proxy Authentication to a given Request object."""
56 | def __call__(self, r):
57 | r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password)
58 | return r
59 |
60 |
61 | class HTTPDigestAuth(AuthBase):
62 | """Attaches HTTP Digest Authentication to the given Request object."""
63 | def __init__(self, username, password):
64 | self.username = username
65 | self.password = password
66 | self.last_nonce = ''
67 | self.nonce_count = 0
68 | self.chal = {}
69 | self.pos = None
70 | self.num_401_calls = 1
71 |
72 | def build_digest_header(self, method, url):
73 |
74 | realm = self.chal['realm']
75 | nonce = self.chal['nonce']
76 | qop = self.chal.get('qop')
77 | algorithm = self.chal.get('algorithm')
78 | opaque = self.chal.get('opaque')
79 |
80 | if algorithm is None:
81 | _algorithm = 'MD5'
82 | else:
83 | _algorithm = algorithm.upper()
84 | # lambdas assume digest modules are imported at the top level
85 | if _algorithm == 'MD5' or _algorithm == 'MD5-SESS':
86 | def md5_utf8(x):
87 | if isinstance(x, str):
88 | x = x.encode('utf-8')
89 | return hashlib.md5(x).hexdigest()
90 | hash_utf8 = md5_utf8
91 | elif _algorithm == 'SHA':
92 | def sha_utf8(x):
93 | if isinstance(x, str):
94 | x = x.encode('utf-8')
95 | return hashlib.sha1(x).hexdigest()
96 | hash_utf8 = sha_utf8
97 |
98 | KD = lambda s, d: hash_utf8("%s:%s" % (s, d))
99 |
100 | if hash_utf8 is None:
101 | return None
102 |
103 | # XXX not implemented yet
104 | entdig = None
105 | p_parsed = urlparse(url)
106 | #: path is request-uri defined in RFC 2616 which should not be empty
107 | path = p_parsed.path or "/"
108 | if p_parsed.query:
109 | path += '?' + p_parsed.query
110 |
111 | A1 = '%s:%s:%s' % (self.username, realm, self.password)
112 | A2 = '%s:%s' % (method, path)
113 |
114 | HA1 = hash_utf8(A1)
115 | HA2 = hash_utf8(A2)
116 |
117 | if nonce == self.last_nonce:
118 | self.nonce_count += 1
119 | else:
120 | self.nonce_count = 1
121 | ncvalue = '%08x' % self.nonce_count
122 | s = str(self.nonce_count).encode('utf-8')
123 | s += nonce.encode('utf-8')
124 | s += time.ctime().encode('utf-8')
125 | s += os.urandom(8)
126 |
127 | cnonce = (hashlib.sha1(s).hexdigest()[:16])
128 | if _algorithm == 'MD5-SESS':
129 | HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce))
130 |
131 | if qop is None:
132 | respdig = KD(HA1, "%s:%s" % (nonce, HA2))
133 | elif qop == 'auth' or 'auth' in qop.split(','):
134 | noncebit = "%s:%s:%s:%s:%s" % (
135 | nonce, ncvalue, cnonce, 'auth', HA2
136 | )
137 | respdig = KD(HA1, noncebit)
138 | else:
139 | # XXX handle auth-int.
140 | return None
141 |
142 | self.last_nonce = nonce
143 |
144 | # XXX should the partial digests be encoded too?
145 | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
146 | 'response="%s"' % (self.username, realm, nonce, path, respdig)
147 | if opaque:
148 | base += ', opaque="%s"' % opaque
149 | if algorithm:
150 | base += ', algorithm="%s"' % algorithm
151 | if entdig:
152 | base += ', digest="%s"' % entdig
153 | if qop:
154 | base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce)
155 |
156 | return 'Digest %s' % (base)
157 |
158 | def handle_redirect(self, r, **kwargs):
159 | """Reset num_401_calls counter on redirects."""
160 | if r.is_redirect:
161 | self.num_401_calls = 1
162 |
163 | def handle_401(self, r, **kwargs):
164 | """Takes the given response and tries digest-auth, if needed."""
165 |
166 | if self.pos is not None:
167 | # Rewind the file position indicator of the body to where
168 | # it was to resend the request.
169 | r.request.body.seek(self.pos)
170 | num_401_calls = getattr(self, 'num_401_calls', 1)
171 | s_auth = r.headers.get('www-authenticate', '')
172 |
173 | if 'digest' in s_auth.lower() and num_401_calls < 2:
174 |
175 | self.num_401_calls += 1
176 | pat = re.compile(r'digest ', flags=re.IGNORECASE)
177 | self.chal = parse_dict_header(pat.sub('', s_auth, count=1))
178 |
179 | # Consume content and release the original connection
180 | # to allow our new request to reuse the same one.
181 | r.content
182 | r.close()
183 | prep = r.request.copy()
184 | extract_cookies_to_jar(prep._cookies, r.request, r.raw)
185 | prep.prepare_cookies(prep._cookies)
186 |
187 | prep.headers['Authorization'] = self.build_digest_header(
188 | prep.method, prep.url)
189 | _r = r.connection.send(prep, **kwargs)
190 | _r.history.append(r)
191 | _r.request = prep
192 |
193 | return _r
194 |
195 | self.num_401_calls = 1
196 | return r
197 |
198 | def __call__(self, r):
199 | # If we have a saved nonce, skip the 401
200 | if self.last_nonce:
201 | r.headers['Authorization'] = self.build_digest_header(r.method, r.url)
202 | try:
203 | self.pos = r.body.tell()
204 | except AttributeError:
205 | # In the case of HTTPDigestAuth being reused and the body of
206 | # the previous request was a file-like object, pos has the
207 | # file position of the previous body. Ensure it's set to
208 | # None.
209 | self.pos = None
210 | r.register_hook('response', self.handle_401)
211 | r.register_hook('response', self.handle_redirect)
212 | return r
213 |
--------------------------------------------------------------------------------
/myrequests/certs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | certs.py
6 | ~~~~~~~~
7 |
8 | This module returns the preferred default CA certificate bundle.
9 |
10 | If you are packaging Requests, e.g., for a Linux distribution or a managed
11 | environment, you can change the definition of where() to return a separately
12 | packaged CA bundle.
13 | """
14 | import os.path
15 |
16 | try:
17 | from certifi import where
18 | except ImportError:
19 | def where():
20 | """Return the preferred certificate bundle."""
21 | # vendored bundle inside Requests
22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem')
23 |
24 | if __name__ == '__main__':
25 | print(where())
26 |
--------------------------------------------------------------------------------
/myrequests/compat.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | pythoncompat
5 | """
6 |
7 | from .packages import chardet
8 |
9 | import sys
10 |
11 | # -------
12 | # Pythons
13 | # -------
14 |
15 | # Syntax sugar.
16 | _ver = sys.version_info
17 |
18 | #: Python 2.x?
19 | is_py2 = (_ver[0] == 2)
20 |
21 | #: Python 3.x?
22 | is_py3 = (_ver[0] == 3)
23 |
24 | try:
25 | import simplejson as json
26 | except (ImportError, SyntaxError):
27 | # simplejson does not support Python 3.2, it throws a SyntaxError
28 | # because of u'...' Unicode literals.
29 | import json
30 |
31 | # ---------
32 | # Specifics
33 | # ---------
34 |
35 | if is_py2:
36 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass
37 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
38 | from urllib2 import parse_http_list
39 | import cookielib
40 | from Cookie import Morsel
41 | from StringIO import StringIO
42 | from .packages.urllib3.packages.ordered_dict import OrderedDict
43 |
44 | builtin_str = str
45 | bytes = str
46 | str = unicode
47 | basestring = basestring
48 | numeric_types = (int, long, float)
49 |
50 | elif is_py3:
51 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
52 | from urllib.request import parse_http_list, getproxies, proxy_bypass
53 | from http import cookiejar as cookielib
54 | from http.cookies import Morsel
55 | from io import StringIO
56 | from collections import OrderedDict
57 |
58 | builtin_str = str
59 | str = str
60 | bytes = bytes
61 | basestring = (str, bytes)
62 | numeric_types = (int, float)
63 |
--------------------------------------------------------------------------------
/myrequests/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.exceptions
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | This module contains the set of Requests' exceptions.
8 |
9 | """
10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
11 |
12 |
13 | class RequestException(IOError):
14 | """There was an ambiguous exception that occurred while handling your
15 | request."""
16 |
17 | def __init__(self, *args, **kwargs):
18 | """
19 | Initialize RequestException with `request` and `response` objects.
20 | """
21 | response = kwargs.pop('response', None)
22 | self.response = response
23 | self.request = kwargs.pop('request', None)
24 | if (response is not None and not self.request and
25 | hasattr(response, 'request')):
26 | self.request = self.response.request
27 | super(RequestException, self).__init__(*args, **kwargs)
28 |
29 |
30 | class HTTPError(RequestException):
31 | """An HTTP error occurred."""
32 |
33 |
34 | class ConnectionError(RequestException):
35 | """A Connection error occurred."""
36 |
37 |
38 | class ProxyError(ConnectionError):
39 | """A proxy error occurred."""
40 |
41 |
42 | class SSLError(ConnectionError):
43 | """An SSL error occurred."""
44 |
45 |
46 | class Timeout(RequestException):
47 | """The request timed out.
48 |
49 | Catching this error will catch both
50 | :exc:`~requests.exceptions.ConnectTimeout` and
51 | :exc:`~requests.exceptions.ReadTimeout` errors.
52 | """
53 |
54 |
55 | class ConnectTimeout(ConnectionError, Timeout):
56 | """The request timed out while trying to connect to the remote server.
57 |
58 | Requests that produced this error are safe to retry.
59 | """
60 |
61 |
62 | class ReadTimeout(Timeout):
63 | """The server did not send any data in the allotted amount of time."""
64 |
65 |
66 | class URLRequired(RequestException):
67 | """A valid URL is required to make a request."""
68 |
69 |
70 | class TooManyRedirects(RequestException):
71 | """Too many redirects."""
72 |
73 |
74 | class MissingSchema(RequestException, ValueError):
75 | """The URL schema (e.g. http or https) is missing."""
76 |
77 |
78 | class InvalidSchema(RequestException, ValueError):
79 | """See defaults.py for valid schemas."""
80 |
81 |
82 | class InvalidURL(RequestException, ValueError):
83 | """ The URL provided was somehow invalid. """
84 |
85 |
86 | class ChunkedEncodingError(RequestException):
87 | """The server declared chunked encoding but sent an invalid chunk."""
88 |
89 |
90 | class ContentDecodingError(RequestException, BaseHTTPError):
91 | """Failed to decode response content"""
92 |
93 |
94 | class StreamConsumedError(RequestException, TypeError):
95 | """The content for this response was already consumed"""
96 |
97 |
98 | class RetryError(RequestException):
99 | """Custom retries logic failed"""
100 |
--------------------------------------------------------------------------------
/myrequests/hooks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.hooks
5 | ~~~~~~~~~~~~~~
6 |
7 | This module provides the capabilities for the Requests hooks system.
8 |
9 | Available hooks:
10 |
11 | ``response``:
12 | The response generated from a Request.
13 |
14 | """
15 |
16 |
17 | HOOKS = ['response']
18 |
19 |
20 | def default_hooks():
21 | hooks = {}
22 | for event in HOOKS:
23 | hooks[event] = []
24 | return hooks
25 |
26 | # TODO: response is the only one
27 |
28 |
29 | def dispatch_hook(key, hooks, hook_data, **kwargs):
30 | """Dispatches a hook dictionary on a given piece of data."""
31 |
32 | hooks = hooks or dict()
33 |
34 | if key in hooks:
35 | hooks = hooks.get(key)
36 |
37 | if hasattr(hooks, '__call__'):
38 | hooks = [hooks]
39 |
40 | for hook in hooks:
41 | _hook_data = hook(hook_data, **kwargs)
42 | if _hook_data is not None:
43 | hook_data = _hook_data
44 |
45 | return hook_data
46 |
--------------------------------------------------------------------------------
/myrequests/packages/README.rst:
--------------------------------------------------------------------------------
1 | If you are planning to submit a pull request to requests with any changes in
2 | this library do not go any further. These are independent libraries which we
3 | vendor into requests. Any changes necessary to these libraries must be made in
4 | them and submitted as separate pull requests to those libraries.
5 |
6 | urllib3 pull requests go here: https://github.com/shazow/urllib3
7 |
8 | chardet pull requests go here: https://github.com/chardet/chardet
9 |
--------------------------------------------------------------------------------
/myrequests/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from . import urllib3
4 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/__init__.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # This library is free software; you can redistribute it and/or
3 | # modify it under the terms of the GNU Lesser General Public
4 | # License as published by the Free Software Foundation; either
5 | # version 2.1 of the License, or (at your option) any later version.
6 | #
7 | # This library is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301 USA
16 | ######################### END LICENSE BLOCK #########################
17 |
18 | __version__ = "2.3.0"
19 | from sys import version_info
20 |
21 |
22 | def detect(aBuf):
23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 | raise ValueError('Expected a bytes object, not a unicode object')
26 |
27 | from . import universaldetector
28 | u = universaldetector.UniversalDetector()
29 | u.reset()
30 | u.feed(aBuf)
31 | u.close()
32 | return u.result
33 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/big5prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 |
33 |
34 | class Big5Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(Big5SMModel)
38 | self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "Big5"
43 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/chardetect.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Script which takes one or more file paths and reports on their detected
4 | encodings
5 |
6 | Example::
7 |
8 | % chardetect somefile someotherfile
9 | somefile: windows-1252 with confidence 0.5
10 | someotherfile: ascii with confidence 1.0
11 |
12 | If no paths are provided, it takes its input from stdin.
13 |
14 | """
15 |
16 | from __future__ import absolute_import, print_function, unicode_literals
17 |
18 | import argparse
19 | import sys
20 | from io import open
21 |
22 | from chardet import __version__
23 | from chardet.universaldetector import UniversalDetector
24 |
25 |
26 | def description_of(lines, name='stdin'):
27 | """
28 | Return a string describing the probable encoding of a file or
29 | list of strings.
30 |
31 | :param lines: The lines to get the encoding of.
32 | :type lines: Iterable of bytes
33 | :param name: Name of file or collection of lines
34 | :type name: str
35 | """
36 | u = UniversalDetector()
37 | for line in lines:
38 | u.feed(line)
39 | u.close()
40 | result = u.result
41 | if result['encoding']:
42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43 | result['confidence'])
44 | else:
45 | return '{0}: no result'.format(name)
46 |
47 |
48 | def main(argv=None):
49 | '''
50 | Handles command line arguments and gets things started.
51 |
52 | :param argv: List of arguments, as if specified on the command-line.
53 | If None, ``sys.argv[1:]`` is used instead.
54 | :type argv: list of str
55 | '''
56 | # Get command line arguments
57 | parser = argparse.ArgumentParser(
58 | description="Takes one or more file paths and reports their detected \
59 | encodings",
60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61 | conflict_handler='resolve')
62 | parser.add_argument('input',
63 | help='File whose encoding we would like to determine.',
64 | type=argparse.FileType('rb'), nargs='*',
65 | default=[sys.stdin])
66 | parser.add_argument('--version', action='version',
67 | version='%(prog)s {0}'.format(__version__))
68 | args = parser.parse_args(argv)
69 |
70 | for f in args.input:
71 | if f.isatty():
72 | print("You are running chardetect interactively. Press " +
73 | "CTRL-D twice at the start of a blank line to signal the " +
74 | "end of your input. If you want help, run chardetect " +
75 | "--help\n", file=sys.stderr)
76 | print(description_of(f, f.name))
77 |
78 |
79 | if __name__ == '__main__':
80 | main()
81 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/chardistribution.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .euctwfreq import (EUCTWCharToFreqOrder, EUCTW_TABLE_SIZE,
29 | EUCTW_TYPICAL_DISTRIBUTION_RATIO)
30 | from .euckrfreq import (EUCKRCharToFreqOrder, EUCKR_TABLE_SIZE,
31 | EUCKR_TYPICAL_DISTRIBUTION_RATIO)
32 | from .gb2312freq import (GB2312CharToFreqOrder, GB2312_TABLE_SIZE,
33 | GB2312_TYPICAL_DISTRIBUTION_RATIO)
34 | from .big5freq import (Big5CharToFreqOrder, BIG5_TABLE_SIZE,
35 | BIG5_TYPICAL_DISTRIBUTION_RATIO)
36 | from .jisfreq import (JISCharToFreqOrder, JIS_TABLE_SIZE,
37 | JIS_TYPICAL_DISTRIBUTION_RATIO)
38 | from .compat import wrap_ord
39 |
40 | ENOUGH_DATA_THRESHOLD = 1024
41 | SURE_YES = 0.99
42 | SURE_NO = 0.01
43 | MINIMUM_DATA_THRESHOLD = 3
44 |
45 |
46 | class CharDistributionAnalysis:
47 | def __init__(self):
48 | # Mapping table to get frequency order from char order (get from
49 | # GetOrder())
50 | self._mCharToFreqOrder = None
51 | self._mTableSize = None # Size of above table
52 | # This is a constant value which varies from language to language,
53 | # used in calculating confidence. See
54 | # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
55 | # for further detail.
56 | self._mTypicalDistributionRatio = None
57 | self.reset()
58 |
59 | def reset(self):
60 | """reset analyser, clear any state"""
61 | # If this flag is set to True, detection is done and conclusion has
62 | # been made
63 | self._mDone = False
64 | self._mTotalChars = 0 # Total characters encountered
65 | # The number of characters whose frequency order is less than 512
66 | self._mFreqChars = 0
67 |
68 | def feed(self, aBuf, aCharLen):
69 | """feed a character with known length"""
70 | if aCharLen == 2:
71 | # we only care about 2-bytes character in our distribution analysis
72 | order = self.get_order(aBuf)
73 | else:
74 | order = -1
75 | if order >= 0:
76 | self._mTotalChars += 1
77 | # order is valid
78 | if order < self._mTableSize:
79 | if 512 > self._mCharToFreqOrder[order]:
80 | self._mFreqChars += 1
81 |
82 | def get_confidence(self):
83 | """return confidence based on existing data"""
84 | # if we didn't receive any character in our consideration range,
85 | # return negative answer
86 | if self._mTotalChars <= 0 or self._mFreqChars <= MINIMUM_DATA_THRESHOLD:
87 | return SURE_NO
88 |
89 | if self._mTotalChars != self._mFreqChars:
90 | r = (self._mFreqChars / ((self._mTotalChars - self._mFreqChars)
91 | * self._mTypicalDistributionRatio))
92 | if r < SURE_YES:
93 | return r
94 |
95 | # normalize confidence (we don't want to be 100% sure)
96 | return SURE_YES
97 |
98 | def got_enough_data(self):
99 | # It is not necessary to receive all data to draw conclusion.
100 | # For charset detection, certain amount of data is enough
101 | return self._mTotalChars > ENOUGH_DATA_THRESHOLD
102 |
103 | def get_order(self, aBuf):
104 | # We do not handle characters based on the original encoding string,
105 | # but convert this encoding string to a number, here called order.
106 | # This allows multiple encodings of a language to share one frequency
107 | # table.
108 | return -1
109 |
110 |
111 | class EUCTWDistributionAnalysis(CharDistributionAnalysis):
112 | def __init__(self):
113 | CharDistributionAnalysis.__init__(self)
114 | self._mCharToFreqOrder = EUCTWCharToFreqOrder
115 | self._mTableSize = EUCTW_TABLE_SIZE
116 | self._mTypicalDistributionRatio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
117 |
118 | def get_order(self, aBuf):
119 | # for euc-TW encoding, we are interested
120 | # first byte range: 0xc4 -- 0xfe
121 | # second byte range: 0xa1 -- 0xfe
122 | # no validation needed here. State machine has done that
123 | first_char = wrap_ord(aBuf[0])
124 | if first_char >= 0xC4:
125 | return 94 * (first_char - 0xC4) + wrap_ord(aBuf[1]) - 0xA1
126 | else:
127 | return -1
128 |
129 |
130 | class EUCKRDistributionAnalysis(CharDistributionAnalysis):
131 | def __init__(self):
132 | CharDistributionAnalysis.__init__(self)
133 | self._mCharToFreqOrder = EUCKRCharToFreqOrder
134 | self._mTableSize = EUCKR_TABLE_SIZE
135 | self._mTypicalDistributionRatio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
136 |
137 | def get_order(self, aBuf):
138 | # for euc-KR encoding, we are interested
139 | # first byte range: 0xb0 -- 0xfe
140 | # second byte range: 0xa1 -- 0xfe
141 | # no validation needed here. State machine has done that
142 | first_char = wrap_ord(aBuf[0])
143 | if first_char >= 0xB0:
144 | return 94 * (first_char - 0xB0) + wrap_ord(aBuf[1]) - 0xA1
145 | else:
146 | return -1
147 |
148 |
149 | class GB2312DistributionAnalysis(CharDistributionAnalysis):
150 | def __init__(self):
151 | CharDistributionAnalysis.__init__(self)
152 | self._mCharToFreqOrder = GB2312CharToFreqOrder
153 | self._mTableSize = GB2312_TABLE_SIZE
154 | self._mTypicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
155 |
156 | def get_order(self, aBuf):
157 | # for GB2312 encoding, we are interested
158 | # first byte range: 0xb0 -- 0xfe
159 | # second byte range: 0xa1 -- 0xfe
160 | # no validation needed here. State machine has done that
161 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
162 | if (first_char >= 0xB0) and (second_char >= 0xA1):
163 | return 94 * (first_char - 0xB0) + second_char - 0xA1
164 | else:
165 | return -1
166 |
167 |
168 | class Big5DistributionAnalysis(CharDistributionAnalysis):
169 | def __init__(self):
170 | CharDistributionAnalysis.__init__(self)
171 | self._mCharToFreqOrder = Big5CharToFreqOrder
172 | self._mTableSize = BIG5_TABLE_SIZE
173 | self._mTypicalDistributionRatio = BIG5_TYPICAL_DISTRIBUTION_RATIO
174 |
175 | def get_order(self, aBuf):
176 | # for big5 encoding, we are interested
177 | # first byte range: 0xa4 -- 0xfe
178 | # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
179 | # no validation needed here. State machine has done that
180 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
181 | if first_char >= 0xA4:
182 | if second_char >= 0xA1:
183 | return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
184 | else:
185 | return 157 * (first_char - 0xA4) + second_char - 0x40
186 | else:
187 | return -1
188 |
189 |
190 | class SJISDistributionAnalysis(CharDistributionAnalysis):
191 | def __init__(self):
192 | CharDistributionAnalysis.__init__(self)
193 | self._mCharToFreqOrder = JISCharToFreqOrder
194 | self._mTableSize = JIS_TABLE_SIZE
195 | self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
196 |
197 | def get_order(self, aBuf):
198 | # for sjis encoding, we are interested
199 | # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
200 | # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe
201 | # no validation needed here. State machine has done that
202 | first_char, second_char = wrap_ord(aBuf[0]), wrap_ord(aBuf[1])
203 | if (first_char >= 0x81) and (first_char <= 0x9F):
204 | order = 188 * (first_char - 0x81)
205 | elif (first_char >= 0xE0) and (first_char <= 0xEF):
206 | order = 188 * (first_char - 0xE0 + 31)
207 | else:
208 | return -1
209 | order = order + second_char - 0x40
210 | if second_char > 0x7F:
211 | order = -1
212 | return order
213 |
214 |
215 | class EUCJPDistributionAnalysis(CharDistributionAnalysis):
216 | def __init__(self):
217 | CharDistributionAnalysis.__init__(self)
218 | self._mCharToFreqOrder = JISCharToFreqOrder
219 | self._mTableSize = JIS_TABLE_SIZE
220 | self._mTypicalDistributionRatio = JIS_TYPICAL_DISTRIBUTION_RATIO
221 |
222 | def get_order(self, aBuf):
223 | # for euc-JP encoding, we are interested
224 | # first byte range: 0xa0 -- 0xfe
225 | # second byte range: 0xa1 -- 0xfe
226 | # no validation needed here. State machine has done that
227 | char = wrap_ord(aBuf[0])
228 | if char >= 0xA0:
229 | return 94 * (char - 0xA1) + wrap_ord(aBuf[1]) - 0xa1
230 | else:
231 | return -1
232 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | import sys
30 | from .charsetprober import CharSetProber
31 |
32 |
33 | class CharSetGroupProber(CharSetProber):
34 | def __init__(self):
35 | CharSetProber.__init__(self)
36 | self._mActiveNum = 0
37 | self._mProbers = []
38 | self._mBestGuessProber = None
39 |
40 | def reset(self):
41 | CharSetProber.reset(self)
42 | self._mActiveNum = 0
43 | for prober in self._mProbers:
44 | if prober:
45 | prober.reset()
46 | prober.active = True
47 | self._mActiveNum += 1
48 | self._mBestGuessProber = None
49 |
50 | def get_charset_name(self):
51 | if not self._mBestGuessProber:
52 | self.get_confidence()
53 | if not self._mBestGuessProber:
54 | return None
55 | # self._mBestGuessProber = self._mProbers[0]
56 | return self._mBestGuessProber.get_charset_name()
57 |
58 | def feed(self, aBuf):
59 | for prober in self._mProbers:
60 | if not prober:
61 | continue
62 | if not prober.active:
63 | continue
64 | st = prober.feed(aBuf)
65 | if not st:
66 | continue
67 | if st == constants.eFoundIt:
68 | self._mBestGuessProber = prober
69 | return self.get_state()
70 | elif st == constants.eNotMe:
71 | prober.active = False
72 | self._mActiveNum -= 1
73 | if self._mActiveNum <= 0:
74 | self._mState = constants.eNotMe
75 | return self.get_state()
76 | return self.get_state()
77 |
78 | def get_confidence(self):
79 | st = self.get_state()
80 | if st == constants.eFoundIt:
81 | return 0.99
82 | elif st == constants.eNotMe:
83 | return 0.01
84 | bestConf = 0.0
85 | self._mBestGuessProber = None
86 | for prober in self._mProbers:
87 | if not prober:
88 | continue
89 | if not prober.active:
90 | if constants._debug:
91 | sys.stderr.write(prober.get_charset_name()
92 | + ' not active\n')
93 | continue
94 | cf = prober.get_confidence()
95 | if constants._debug:
96 | sys.stderr.write('%s confidence = %s\n' %
97 | (prober.get_charset_name(), cf))
98 | if bestConf < cf:
99 | bestConf = cf
100 | self._mBestGuessProber = prober
101 | if not self._mBestGuessProber:
102 | return 0.0
103 | return bestConf
104 | # else:
105 | # self._mBestGuessProber = self._mProbers[0]
106 | # return self._mBestGuessProber.get_confidence()
107 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/charsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from . import constants
30 | import re
31 |
32 |
33 | class CharSetProber:
34 | def __init__(self):
35 | pass
36 |
37 | def reset(self):
38 | self._mState = constants.eDetecting
39 |
40 | def get_charset_name(self):
41 | return None
42 |
43 | def feed(self, aBuf):
44 | pass
45 |
46 | def get_state(self):
47 | return self._mState
48 |
49 | def get_confidence(self):
50 | return 0.0
51 |
52 | def filter_high_bit_only(self, aBuf):
53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 | return aBuf
55 |
56 | def filter_without_english_letters(self, aBuf):
57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 | return aBuf
59 |
60 | def filter_with_english_letters(self, aBuf):
61 | # TODO
62 | return aBuf
63 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 |
31 |
32 | class CodingStateMachine:
33 | def __init__(self, sm):
34 | self._mModel = sm
35 | self._mCurrentBytePos = 0
36 | self._mCurrentCharLen = 0
37 | self.reset()
38 |
39 | def reset(self):
40 | self._mCurrentState = eStart
41 |
42 | def next_state(self, c):
43 | # for each byte we get its class
44 | # if it is first byte, we also get byte length
45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 | byteCls = self._mModel['classTable'][wrap_ord(c)]
47 | if self._mCurrentState == eStart:
48 | self._mCurrentBytePos = 0
49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 | # from byte's class and stateTable, we get its next state
51 | curr_state = (self._mCurrentState * self._mModel['classFactor']
52 | + byteCls)
53 | self._mCurrentState = self._mModel['stateTable'][curr_state]
54 | self._mCurrentBytePos += 1
55 | return self._mCurrentState
56 |
57 | def get_current_charlen(self):
58 | return self._mCurrentCharLen
59 |
60 | def get_coding_state_machine(self):
61 | return self._mModel['name']
62 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/compat.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # Contributor(s):
3 | # Ian Cordasco - port to Python
4 | #
5 | # This library is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU Lesser General Public
7 | # License as published by the Free Software Foundation; either
8 | # version 2.1 of the License, or (at your option) any later version.
9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301 USA
19 | ######################### END LICENSE BLOCK #########################
20 |
21 | import sys
22 |
23 |
24 | if sys.version_info < (3, 0):
25 | base_str = (str, unicode)
26 | else:
27 | base_str = (bytes, str)
28 |
29 |
30 | def wrap_ord(a):
31 | if sys.version_info < (3, 0) and isinstance(a, base_str):
32 | return ord(a)
33 | else:
34 | return a
35 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/constants.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | _debug = 0
30 |
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 |
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 |
39 | SHORTCUT_THRESHOLD = 0.95
40 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/cp949prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 |
33 |
34 | class CP949Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(CP949SMModel)
38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 | # not different.
40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 | self.reset()
42 |
43 | def get_charset_name(self):
44 | return "CP949"
45 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/escprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 | ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 |
35 |
36 | class EscCharSetProber(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = [
40 | CodingStateMachine(HZSMModel),
41 | CodingStateMachine(ISO2022CNSMModel),
42 | CodingStateMachine(ISO2022JPSMModel),
43 | CodingStateMachine(ISO2022KRSMModel)
44 | ]
45 | self.reset()
46 |
47 | def reset(self):
48 | CharSetProber.reset(self)
49 | for codingSM in self._mCodingSM:
50 | if not codingSM:
51 | continue
52 | codingSM.active = True
53 | codingSM.reset()
54 | self._mActiveSM = len(self._mCodingSM)
55 | self._mDetectedCharset = None
56 |
57 | def get_charset_name(self):
58 | return self._mDetectedCharset
59 |
60 | def get_confidence(self):
61 | if self._mDetectedCharset:
62 | return 0.99
63 | else:
64 | return 0.00
65 |
66 | def feed(self, aBuf):
67 | for c in aBuf:
68 | # PY3K: aBuf is a byte array, so c is an int, not a byte
69 | for codingSM in self._mCodingSM:
70 | if not codingSM:
71 | continue
72 | if not codingSM.active:
73 | continue
74 | codingState = codingSM.next_state(wrap_ord(c))
75 | if codingState == constants.eError:
76 | codingSM.active = False
77 | self._mActiveSM -= 1
78 | if self._mActiveSM <= 0:
79 | self._mState = constants.eNotMe
80 | return self.get_state()
81 | elif codingState == constants.eItsMe:
82 | self._mState = constants.eFoundIt
83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
84 | return self.get_state()
85 |
86 | return self.get_state()
87 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/escsm.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .constants import eStart, eError, eItsMe
29 |
30 | HZ_cls = (
31 | 1,0,0,0,0,0,0,0, # 00 - 07
32 | 0,0,0,0,0,0,0,0, # 08 - 0f
33 | 0,0,0,0,0,0,0,0, # 10 - 17
34 | 0,0,0,1,0,0,0,0, # 18 - 1f
35 | 0,0,0,0,0,0,0,0, # 20 - 27
36 | 0,0,0,0,0,0,0,0, # 28 - 2f
37 | 0,0,0,0,0,0,0,0, # 30 - 37
38 | 0,0,0,0,0,0,0,0, # 38 - 3f
39 | 0,0,0,0,0,0,0,0, # 40 - 47
40 | 0,0,0,0,0,0,0,0, # 48 - 4f
41 | 0,0,0,0,0,0,0,0, # 50 - 57
42 | 0,0,0,0,0,0,0,0, # 58 - 5f
43 | 0,0,0,0,0,0,0,0, # 60 - 67
44 | 0,0,0,0,0,0,0,0, # 68 - 6f
45 | 0,0,0,0,0,0,0,0, # 70 - 77
46 | 0,0,0,4,0,5,2,0, # 78 - 7f
47 | 1,1,1,1,1,1,1,1, # 80 - 87
48 | 1,1,1,1,1,1,1,1, # 88 - 8f
49 | 1,1,1,1,1,1,1,1, # 90 - 97
50 | 1,1,1,1,1,1,1,1, # 98 - 9f
51 | 1,1,1,1,1,1,1,1, # a0 - a7
52 | 1,1,1,1,1,1,1,1, # a8 - af
53 | 1,1,1,1,1,1,1,1, # b0 - b7
54 | 1,1,1,1,1,1,1,1, # b8 - bf
55 | 1,1,1,1,1,1,1,1, # c0 - c7
56 | 1,1,1,1,1,1,1,1, # c8 - cf
57 | 1,1,1,1,1,1,1,1, # d0 - d7
58 | 1,1,1,1,1,1,1,1, # d8 - df
59 | 1,1,1,1,1,1,1,1, # e0 - e7
60 | 1,1,1,1,1,1,1,1, # e8 - ef
61 | 1,1,1,1,1,1,1,1, # f0 - f7
62 | 1,1,1,1,1,1,1,1, # f8 - ff
63 | )
64 |
65 | HZ_st = (
66 | eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
67 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
68 | eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
69 | 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
70 | 4,eError, 4, 4, 4,eError, 4,eError,# 20-27
71 | 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
72 | )
73 |
74 | HZCharLenTable = (0, 0, 0, 0, 0, 0)
75 |
76 | HZSMModel = {'classTable': HZ_cls,
77 | 'classFactor': 6,
78 | 'stateTable': HZ_st,
79 | 'charLenTable': HZCharLenTable,
80 | 'name': "HZ-GB-2312"}
81 |
82 | ISO2022CN_cls = (
83 | 2,0,0,0,0,0,0,0, # 00 - 07
84 | 0,0,0,0,0,0,0,0, # 08 - 0f
85 | 0,0,0,0,0,0,0,0, # 10 - 17
86 | 0,0,0,1,0,0,0,0, # 18 - 1f
87 | 0,0,0,0,0,0,0,0, # 20 - 27
88 | 0,3,0,0,0,0,0,0, # 28 - 2f
89 | 0,0,0,0,0,0,0,0, # 30 - 37
90 | 0,0,0,0,0,0,0,0, # 38 - 3f
91 | 0,0,0,4,0,0,0,0, # 40 - 47
92 | 0,0,0,0,0,0,0,0, # 48 - 4f
93 | 0,0,0,0,0,0,0,0, # 50 - 57
94 | 0,0,0,0,0,0,0,0, # 58 - 5f
95 | 0,0,0,0,0,0,0,0, # 60 - 67
96 | 0,0,0,0,0,0,0,0, # 68 - 6f
97 | 0,0,0,0,0,0,0,0, # 70 - 77
98 | 0,0,0,0,0,0,0,0, # 78 - 7f
99 | 2,2,2,2,2,2,2,2, # 80 - 87
100 | 2,2,2,2,2,2,2,2, # 88 - 8f
101 | 2,2,2,2,2,2,2,2, # 90 - 97
102 | 2,2,2,2,2,2,2,2, # 98 - 9f
103 | 2,2,2,2,2,2,2,2, # a0 - a7
104 | 2,2,2,2,2,2,2,2, # a8 - af
105 | 2,2,2,2,2,2,2,2, # b0 - b7
106 | 2,2,2,2,2,2,2,2, # b8 - bf
107 | 2,2,2,2,2,2,2,2, # c0 - c7
108 | 2,2,2,2,2,2,2,2, # c8 - cf
109 | 2,2,2,2,2,2,2,2, # d0 - d7
110 | 2,2,2,2,2,2,2,2, # d8 - df
111 | 2,2,2,2,2,2,2,2, # e0 - e7
112 | 2,2,2,2,2,2,2,2, # e8 - ef
113 | 2,2,2,2,2,2,2,2, # f0 - f7
114 | 2,2,2,2,2,2,2,2, # f8 - ff
115 | )
116 |
117 | ISO2022CN_st = (
118 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
119 | eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
120 | eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
121 | eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
122 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
123 | 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
124 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
125 | eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
126 | )
127 |
128 | ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
129 |
130 | ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
131 | 'classFactor': 9,
132 | 'stateTable': ISO2022CN_st,
133 | 'charLenTable': ISO2022CNCharLenTable,
134 | 'name': "ISO-2022-CN"}
135 |
136 | ISO2022JP_cls = (
137 | 2,0,0,0,0,0,0,0, # 00 - 07
138 | 0,0,0,0,0,0,2,2, # 08 - 0f
139 | 0,0,0,0,0,0,0,0, # 10 - 17
140 | 0,0,0,1,0,0,0,0, # 18 - 1f
141 | 0,0,0,0,7,0,0,0, # 20 - 27
142 | 3,0,0,0,0,0,0,0, # 28 - 2f
143 | 0,0,0,0,0,0,0,0, # 30 - 37
144 | 0,0,0,0,0,0,0,0, # 38 - 3f
145 | 6,0,4,0,8,0,0,0, # 40 - 47
146 | 0,9,5,0,0,0,0,0, # 48 - 4f
147 | 0,0,0,0,0,0,0,0, # 50 - 57
148 | 0,0,0,0,0,0,0,0, # 58 - 5f
149 | 0,0,0,0,0,0,0,0, # 60 - 67
150 | 0,0,0,0,0,0,0,0, # 68 - 6f
151 | 0,0,0,0,0,0,0,0, # 70 - 77
152 | 0,0,0,0,0,0,0,0, # 78 - 7f
153 | 2,2,2,2,2,2,2,2, # 80 - 87
154 | 2,2,2,2,2,2,2,2, # 88 - 8f
155 | 2,2,2,2,2,2,2,2, # 90 - 97
156 | 2,2,2,2,2,2,2,2, # 98 - 9f
157 | 2,2,2,2,2,2,2,2, # a0 - a7
158 | 2,2,2,2,2,2,2,2, # a8 - af
159 | 2,2,2,2,2,2,2,2, # b0 - b7
160 | 2,2,2,2,2,2,2,2, # b8 - bf
161 | 2,2,2,2,2,2,2,2, # c0 - c7
162 | 2,2,2,2,2,2,2,2, # c8 - cf
163 | 2,2,2,2,2,2,2,2, # d0 - d7
164 | 2,2,2,2,2,2,2,2, # d8 - df
165 | 2,2,2,2,2,2,2,2, # e0 - e7
166 | 2,2,2,2,2,2,2,2, # e8 - ef
167 | 2,2,2,2,2,2,2,2, # f0 - f7
168 | 2,2,2,2,2,2,2,2, # f8 - ff
169 | )
170 |
171 | ISO2022JP_st = (
172 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
173 | eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
174 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
175 | eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
176 | eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
177 | eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
178 | eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
179 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
180 | eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
181 | )
182 |
183 | ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
184 |
185 | ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
186 | 'classFactor': 10,
187 | 'stateTable': ISO2022JP_st,
188 | 'charLenTable': ISO2022JPCharLenTable,
189 | 'name': "ISO-2022-JP"}
190 |
191 | ISO2022KR_cls = (
192 | 2,0,0,0,0,0,0,0, # 00 - 07
193 | 0,0,0,0,0,0,0,0, # 08 - 0f
194 | 0,0,0,0,0,0,0,0, # 10 - 17
195 | 0,0,0,1,0,0,0,0, # 18 - 1f
196 | 0,0,0,0,3,0,0,0, # 20 - 27
197 | 0,4,0,0,0,0,0,0, # 28 - 2f
198 | 0,0,0,0,0,0,0,0, # 30 - 37
199 | 0,0,0,0,0,0,0,0, # 38 - 3f
200 | 0,0,0,5,0,0,0,0, # 40 - 47
201 | 0,0,0,0,0,0,0,0, # 48 - 4f
202 | 0,0,0,0,0,0,0,0, # 50 - 57
203 | 0,0,0,0,0,0,0,0, # 58 - 5f
204 | 0,0,0,0,0,0,0,0, # 60 - 67
205 | 0,0,0,0,0,0,0,0, # 68 - 6f
206 | 0,0,0,0,0,0,0,0, # 70 - 77
207 | 0,0,0,0,0,0,0,0, # 78 - 7f
208 | 2,2,2,2,2,2,2,2, # 80 - 87
209 | 2,2,2,2,2,2,2,2, # 88 - 8f
210 | 2,2,2,2,2,2,2,2, # 90 - 97
211 | 2,2,2,2,2,2,2,2, # 98 - 9f
212 | 2,2,2,2,2,2,2,2, # a0 - a7
213 | 2,2,2,2,2,2,2,2, # a8 - af
214 | 2,2,2,2,2,2,2,2, # b0 - b7
215 | 2,2,2,2,2,2,2,2, # b8 - bf
216 | 2,2,2,2,2,2,2,2, # c0 - c7
217 | 2,2,2,2,2,2,2,2, # c8 - cf
218 | 2,2,2,2,2,2,2,2, # d0 - d7
219 | 2,2,2,2,2,2,2,2, # d8 - df
220 | 2,2,2,2,2,2,2,2, # e0 - e7
221 | 2,2,2,2,2,2,2,2, # e8 - ef
222 | 2,2,2,2,2,2,2,2, # f0 - f7
223 | 2,2,2,2,2,2,2,2, # f8 - ff
224 | )
225 |
226 | ISO2022KR_st = (
227 | eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
228 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
229 | eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
230 | eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
231 | eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
232 | )
233 |
234 | ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
235 |
236 | ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
237 | 'classFactor': 6,
238 | 'stateTable': ISO2022KR_st,
239 | 'charLenTable': ISO2022KRCharLenTable,
240 | 'name': "ISO-2022-KR"}
241 |
242 | # flake8: noqa
243 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from . import constants
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .codingstatemachine import CodingStateMachine
32 | from .chardistribution import EUCJPDistributionAnalysis
33 | from .jpcntx import EUCJPContextAnalysis
34 | from .mbcssm import EUCJPSMModel
35 |
36 |
37 | class EUCJPProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel)
41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
42 | self._mContextAnalyzer = EUCJPContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return "EUC-JP"
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mContextAnalyzer.feed(self._mLastChar, charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
76 | charLen)
77 |
78 | self._mLastChar[0] = aBuf[aLen - 1]
79 |
80 | if self.get_state() == constants.eDetecting:
81 | if (self._mContextAnalyzer.got_enough_data() and
82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
83 | self._mState = constants.eFoundIt
84 |
85 | return self.get_state()
86 |
87 | def get_confidence(self):
88 | contxtCf = self._mContextAnalyzer.get_confidence()
89 | distribCf = self._mDistributionAnalyzer.get_confidence()
90 | return max(contxtCf, distribCf)
91 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/euckrprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 |
33 |
34 | class EUCKRProber(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "EUC-KR"
43 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/euctwprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 |
33 | class EUCTWProber(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "EUC-TW"
42 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 |
33 | class GB2312Prober(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "GB2312"
42 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/latin1prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from .charsetprober import CharSetProber
30 | from .constants import eNotMe
31 | from .compat import wrap_ord
32 |
33 | FREQ_CAT_NUM = 4
34 |
35 | UDF = 0 # undefined
36 | OTH = 1 # other
37 | ASC = 2 # ascii capital letter
38 | ASS = 3 # ascii small letter
39 | ACV = 4 # accent capital vowel
40 | ACO = 5 # accent capital other
41 | ASV = 6 # accent small vowel
42 | ASO = 7 # accent small other
43 | CLASS_NUM = 8 # total classes
44 |
45 | Latin1_CharToClass = (
46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
78 | )
79 |
80 | # 0 : illegal
81 | # 1 : very unlikely
82 | # 2 : normal
83 | # 3 : very likely
84 | Latin1ClassModel = (
85 | # UDF OTH ASC ASS ACV ACO ASV ASO
86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF
87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH
88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC
89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS
90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV
91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO
92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV
93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO
94 | )
95 |
96 |
97 | class Latin1Prober(CharSetProber):
98 | def __init__(self):
99 | CharSetProber.__init__(self)
100 | self.reset()
101 |
102 | def reset(self):
103 | self._mLastCharClass = OTH
104 | self._mFreqCounter = [0] * FREQ_CAT_NUM
105 | CharSetProber.reset(self)
106 |
107 | def get_charset_name(self):
108 | return "windows-1252"
109 |
110 | def feed(self, aBuf):
111 | aBuf = self.filter_with_english_letters(aBuf)
112 | for c in aBuf:
113 | charClass = Latin1_CharToClass[wrap_ord(c)]
114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
115 | + charClass]
116 | if freq == 0:
117 | self._mState = eNotMe
118 | break
119 | self._mFreqCounter[freq] += 1
120 | self._mLastCharClass = charClass
121 |
122 | return self.get_state()
123 |
124 | def get_confidence(self):
125 | if self.get_state() == eNotMe:
126 | return 0.01
127 |
128 | total = sum(self._mFreqCounter)
129 | if total < 0.01:
130 | confidence = 0.0
131 | else:
132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
133 | / total)
134 | if confidence < 0.0:
135 | confidence = 0.0
136 | # lower the confidence of latin1 so that other more accurate
137 | # detector can take priority.
138 | confidence = confidence * 0.73
139 | return confidence
140 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | import sys
31 | from . import constants
32 | from .charsetprober import CharSetProber
33 |
34 |
35 | class MultiByteCharSetProber(CharSetProber):
36 | def __init__(self):
37 | CharSetProber.__init__(self)
38 | self._mDistributionAnalyzer = None
39 | self._mCodingSM = None
40 | self._mLastChar = [0, 0]
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | if self._mCodingSM:
45 | self._mCodingSM.reset()
46 | if self._mDistributionAnalyzer:
47 | self._mDistributionAnalyzer.reset()
48 | self._mLastChar = [0, 0]
49 |
50 | def get_charset_name(self):
51 | pass
52 |
53 | def feed(self, aBuf):
54 | aLen = len(aBuf)
55 | for i in range(0, aLen):
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
72 | else:
73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
74 | charLen)
75 |
76 | self._mLastChar[0] = aBuf[aLen - 1]
77 |
78 | if self.get_state() == constants.eDetecting:
79 | if (self._mDistributionAnalyzer.got_enough_data() and
80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
81 | self._mState = constants.eFoundIt
82 |
83 | return self.get_state()
84 |
85 | def get_confidence(self):
86 | return self._mDistributionAnalyzer.get_confidence()
87 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 |
40 |
41 | class MBCSGroupProber(CharSetGroupProber):
42 | def __init__(self):
43 | CharSetGroupProber.__init__(self)
44 | self._mProbers = [
45 | UTF8Prober(),
46 | SJISProber(),
47 | EUCJPProber(),
48 | GB2312Prober(),
49 | EUCKRProber(),
50 | CP949Prober(),
51 | Big5Prober(),
52 | EUCTWProber()
53 | ]
54 | self.reset()
55 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/sbcharsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | import sys
30 | from . import constants
31 | from .charsetprober import CharSetProber
32 | from .compat import wrap_ord
33 |
34 | SAMPLE_SIZE = 64
35 | SB_ENOUGH_REL_THRESHOLD = 1024
36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95
37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05
38 | SYMBOL_CAT_ORDER = 250
39 | NUMBER_OF_SEQ_CAT = 4
40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
41 | #NEGATIVE_CAT = 0
42 |
43 |
44 | class SingleByteCharSetProber(CharSetProber):
45 | def __init__(self, model, reversed=False, nameProber=None):
46 | CharSetProber.__init__(self)
47 | self._mModel = model
48 | # TRUE if we need to reverse every pair in the model lookup
49 | self._mReversed = reversed
50 | # Optional auxiliary prober for name decision
51 | self._mNameProber = nameProber
52 | self.reset()
53 |
54 | def reset(self):
55 | CharSetProber.reset(self)
56 | # char order of last character
57 | self._mLastOrder = 255
58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
59 | self._mTotalSeqs = 0
60 | self._mTotalChar = 0
61 | # characters that fall in our sampling range
62 | self._mFreqChar = 0
63 |
64 | def get_charset_name(self):
65 | if self._mNameProber:
66 | return self._mNameProber.get_charset_name()
67 | else:
68 | return self._mModel['charsetName']
69 |
70 | def feed(self, aBuf):
71 | if not self._mModel['keepEnglishLetter']:
72 | aBuf = self.filter_without_english_letters(aBuf)
73 | aLen = len(aBuf)
74 | if not aLen:
75 | return self.get_state()
76 | for c in aBuf:
77 | order = self._mModel['charToOrderMap'][wrap_ord(c)]
78 | if order < SYMBOL_CAT_ORDER:
79 | self._mTotalChar += 1
80 | if order < SAMPLE_SIZE:
81 | self._mFreqChar += 1
82 | if self._mLastOrder < SAMPLE_SIZE:
83 | self._mTotalSeqs += 1
84 | if not self._mReversed:
85 | i = (self._mLastOrder * SAMPLE_SIZE) + order
86 | model = self._mModel['precedenceMatrix'][i]
87 | else: # reverse the order of the letters in the lookup
88 | i = (order * SAMPLE_SIZE) + self._mLastOrder
89 | model = self._mModel['precedenceMatrix'][i]
90 | self._mSeqCounters[model] += 1
91 | self._mLastOrder = order
92 |
93 | if self.get_state() == constants.eDetecting:
94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
95 | cf = self.get_confidence()
96 | if cf > POSITIVE_SHORTCUT_THRESHOLD:
97 | if constants._debug:
98 | sys.stderr.write('%s confidence = %s, we have a'
99 | 'winner\n' %
100 | (self._mModel['charsetName'], cf))
101 | self._mState = constants.eFoundIt
102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
103 | if constants._debug:
104 | sys.stderr.write('%s confidence = %s, below negative'
105 | 'shortcut threshhold %s\n' %
106 | (self._mModel['charsetName'], cf,
107 | NEGATIVE_SHORTCUT_THRESHOLD))
108 | self._mState = constants.eNotMe
109 |
110 | return self.get_state()
111 |
112 | def get_confidence(self):
113 | r = 0.01
114 | if self._mTotalSeqs > 0:
115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
116 | / self._mModel['mTypicalPositiveRatio'])
117 | r = r * self._mFreqChar / self._mTotalChar
118 | if r >= 1.0:
119 | r = 0.99
120 | return r
121 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 | Latin5CyrillicModel, MacCyrillicModel,
33 | Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 |
41 |
42 | class SBCSGroupProber(CharSetGroupProber):
43 | def __init__(self):
44 | CharSetGroupProber.__init__(self)
45 | self._mProbers = [
46 | SingleByteCharSetProber(Win1251CyrillicModel),
47 | SingleByteCharSetProber(Koi8rModel),
48 | SingleByteCharSetProber(Latin5CyrillicModel),
49 | SingleByteCharSetProber(MacCyrillicModel),
50 | SingleByteCharSetProber(Ibm866Model),
51 | SingleByteCharSetProber(Ibm855Model),
52 | SingleByteCharSetProber(Latin7GreekModel),
53 | SingleByteCharSetProber(Win1253GreekModel),
54 | SingleByteCharSetProber(Latin5BulgarianModel),
55 | SingleByteCharSetProber(Win1251BulgarianModel),
56 | SingleByteCharSetProber(Latin2HungarianModel),
57 | SingleByteCharSetProber(Win1250HungarianModel),
58 | SingleByteCharSetProber(TIS620ThaiModel),
59 | ]
60 | hebrewProber = HebrewProber()
61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 | False, hebrewProber)
63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 | hebrewProber)
65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 | self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 | visualHebrewProber])
68 |
69 | self.reset()
70 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/sjisprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import SJISDistributionAnalysis
32 | from .jpcntx import SJISContextAnalysis
33 | from .mbcssm import SJISSMModel
34 | from . import constants
35 |
36 |
37 | class SJISProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(SJISSMModel)
41 | self._mDistributionAnalyzer = SJISDistributionAnalysis()
42 | self._mContextAnalyzer = SJISContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return self._mContextAnalyzer.get_charset_name()
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | codingState = self._mCodingSM.next_state(aBuf[i])
56 | if codingState == constants.eError:
57 | if constants._debug:
58 | sys.stderr.write(self.get_charset_name()
59 | + ' prober hit error at byte ' + str(i)
60 | + '\n')
61 | self._mState = constants.eNotMe
62 | break
63 | elif codingState == constants.eItsMe:
64 | self._mState = constants.eFoundIt
65 | break
66 | elif codingState == constants.eStart:
67 | charLen = self._mCodingSM.get_current_charlen()
68 | if i == 0:
69 | self._mLastChar[1] = aBuf[0]
70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
71 | charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
75 | - charLen], charLen)
76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
77 | charLen)
78 |
79 | self._mLastChar[0] = aBuf[aLen - 1]
80 |
81 | if self.get_state() == constants.eDetecting:
82 | if (self._mContextAnalyzer.got_enough_data() and
83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
84 | self._mState = constants.eFoundIt
85 |
86 | return self.get_state()
87 |
88 | def get_confidence(self):
89 | contxtCf = self._mContextAnalyzer.get_confidence()
90 | distribCf = self._mDistributionAnalyzer.get_confidence()
91 | return max(contxtCf, distribCf)
92 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/universaldetector.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from . import constants
30 | import sys
31 | import codecs
32 | from .latin1prober import Latin1Prober # windows-1252
33 | from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
34 | from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
35 | from .escprober import EscCharSetProber # ISO-2122, etc.
36 | import re
37 |
38 | MINIMUM_THRESHOLD = 0.20
39 | ePureAscii = 0
40 | eEscAscii = 1
41 | eHighbyte = 2
42 |
43 |
44 | class UniversalDetector:
45 | def __init__(self):
46 | self._highBitDetector = re.compile(b'[\x80-\xFF]')
47 | self._escDetector = re.compile(b'(\033|~{)')
48 | self._mEscCharSetProber = None
49 | self._mCharSetProbers = []
50 | self.reset()
51 |
52 | def reset(self):
53 | self.result = {'encoding': None, 'confidence': 0.0}
54 | self.done = False
55 | self._mStart = True
56 | self._mGotData = False
57 | self._mInputState = ePureAscii
58 | self._mLastChar = b''
59 | if self._mEscCharSetProber:
60 | self._mEscCharSetProber.reset()
61 | for prober in self._mCharSetProbers:
62 | prober.reset()
63 |
64 | def feed(self, aBuf):
65 | if self.done:
66 | return
67 |
68 | aLen = len(aBuf)
69 | if not aLen:
70 | return
71 |
72 | if not self._mGotData:
73 | # If the data starts with BOM, we know it is UTF
74 | if aBuf[:3] == codecs.BOM_UTF8:
75 | # EF BB BF UTF-8 with BOM
76 | self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
77 | elif aBuf[:4] == codecs.BOM_UTF32_LE:
78 | # FF FE 00 00 UTF-32, little-endian BOM
79 | self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
80 | elif aBuf[:4] == codecs.BOM_UTF32_BE:
81 | # 00 00 FE FF UTF-32, big-endian BOM
82 | self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
83 | elif aBuf[:4] == b'\xFE\xFF\x00\x00':
84 | # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
85 | self.result = {
86 | 'encoding': "X-ISO-10646-UCS-4-3412",
87 | 'confidence': 1.0
88 | }
89 | elif aBuf[:4] == b'\x00\x00\xFF\xFE':
90 | # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
91 | self.result = {
92 | 'encoding': "X-ISO-10646-UCS-4-2143",
93 | 'confidence': 1.0
94 | }
95 | elif aBuf[:2] == codecs.BOM_LE:
96 | # FF FE UTF-16, little endian BOM
97 | self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
98 | elif aBuf[:2] == codecs.BOM_BE:
99 | # FE FF UTF-16, big endian BOM
100 | self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
101 |
102 | self._mGotData = True
103 | if self.result['encoding'] and (self.result['confidence'] > 0.0):
104 | self.done = True
105 | return
106 |
107 | if self._mInputState == ePureAscii:
108 | if self._highBitDetector.search(aBuf):
109 | self._mInputState = eHighbyte
110 | elif ((self._mInputState == ePureAscii) and
111 | self._escDetector.search(self._mLastChar + aBuf)):
112 | self._mInputState = eEscAscii
113 |
114 | self._mLastChar = aBuf[-1:]
115 |
116 | if self._mInputState == eEscAscii:
117 | if not self._mEscCharSetProber:
118 | self._mEscCharSetProber = EscCharSetProber()
119 | if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
120 | self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
121 | 'confidence': self._mEscCharSetProber.get_confidence()}
122 | self.done = True
123 | elif self._mInputState == eHighbyte:
124 | if not self._mCharSetProbers:
125 | self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
126 | Latin1Prober()]
127 | for prober in self._mCharSetProbers:
128 | if prober.feed(aBuf) == constants.eFoundIt:
129 | self.result = {'encoding': prober.get_charset_name(),
130 | 'confidence': prober.get_confidence()}
131 | self.done = True
132 | break
133 |
134 | def close(self):
135 | if self.done:
136 | return
137 | if not self._mGotData:
138 | if constants._debug:
139 | sys.stderr.write('no data received!\n')
140 | return
141 | self.done = True
142 |
143 | if self._mInputState == ePureAscii:
144 | self.result = {'encoding': 'ascii', 'confidence': 1.0}
145 | return self.result
146 |
147 | if self._mInputState == eHighbyte:
148 | proberConfidence = None
149 | maxProberConfidence = 0.0
150 | maxProber = None
151 | for prober in self._mCharSetProbers:
152 | if not prober:
153 | continue
154 | proberConfidence = prober.get_confidence()
155 | if proberConfidence > maxProberConfidence:
156 | maxProberConfidence = proberConfidence
157 | maxProber = prober
158 | if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
159 | self.result = {'encoding': maxProber.get_charset_name(),
160 | 'confidence': maxProber.get_confidence()}
161 | return self.result
162 |
163 | if constants._debug:
164 | sys.stderr.write('no probers hit minimum threshhold\n')
165 | for prober in self._mCharSetProbers[0].mProbers:
166 | if not prober:
167 | continue
168 | sys.stderr.write('%s confidence = %s\n' %
169 | (prober.get_charset_name(),
170 | prober.get_confidence()))
171 |
--------------------------------------------------------------------------------
/myrequests/packages/chardet/utf8prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 |
33 | ONE_CHAR_PROB = 0.5
34 |
35 |
36 | class UTF8Prober(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 | self.reset()
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | self._mCodingSM.reset()
45 | self._mNumOfMBChar = 0
46 |
47 | def get_charset_name(self):
48 | return "utf-8"
49 |
50 | def feed(self, aBuf):
51 | for c in aBuf:
52 | codingState = self._mCodingSM.next_state(c)
53 | if codingState == constants.eError:
54 | self._mState = constants.eNotMe
55 | break
56 | elif codingState == constants.eItsMe:
57 | self._mState = constants.eFoundIt
58 | break
59 | elif codingState == constants.eStart:
60 | if self._mCodingSM.get_current_charlen() >= 2:
61 | self._mNumOfMBChar += 1
62 |
63 | if self.get_state() == constants.eDetecting:
64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 | self._mState = constants.eFoundIt
66 |
67 | return self.get_state()
68 |
69 | def get_confidence(self):
70 | unlike = 0.99
71 | if self._mNumOfMBChar < 6:
72 | for i in range(0, self._mNumOfMBChar):
73 | unlike = unlike * ONE_CHAR_PROB
74 | return 1.0 - unlike
75 | else:
76 | return unlike
77 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | urllib3 - Thread-safe connection pooling and re-using.
3 | """
4 |
5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
6 | __license__ = 'MIT'
7 | __version__ = '1.10.4'
8 |
9 |
10 | from .connectionpool import (
11 | HTTPConnectionPool,
12 | HTTPSConnectionPool,
13 | connection_from_url
14 | )
15 |
16 | from . import exceptions
17 | from .filepost import encode_multipart_formdata
18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
19 | from .response import HTTPResponse
20 | from .util.request import make_headers
21 | from .util.url import get_host
22 | from .util.timeout import Timeout
23 | from .util.retry import Retry
24 |
25 |
26 | # Set default logging handler to avoid "No handler found" warnings.
27 | import logging
28 | try: # Python 2.7+
29 | from logging import NullHandler
30 | except ImportError:
31 | class NullHandler(logging.Handler):
32 | def emit(self, record):
33 | pass
34 |
35 | logging.getLogger(__name__).addHandler(NullHandler())
36 |
37 | def add_stderr_logger(level=logging.DEBUG):
38 | """
39 | Helper for quickly adding a StreamHandler to the logger. Useful for
40 | debugging.
41 |
42 | Returns the handler after adding it.
43 | """
44 | # This method needs to be in this __init__.py to get the __name__ correct
45 | # even if urllib3 is vendored within another package.
46 | logger = logging.getLogger(__name__)
47 | handler = logging.StreamHandler()
48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
49 | logger.addHandler(handler)
50 | logger.setLevel(level)
51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__)
52 | return handler
53 |
54 | # ... Clean up.
55 | del NullHandler
56 |
57 |
58 | import warnings
59 | # SecurityWarning's always go off by default.
60 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
61 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
62 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
63 | append=True)
64 |
65 | def disable_warnings(category=exceptions.HTTPWarning):
66 | """
67 | Helper for quickly disabling all urllib3 warnings.
68 | """
69 | warnings.simplefilter('ignore', category)
70 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/connection.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import sys
3 | import socket
4 | from socket import timeout as SocketTimeout
5 | import warnings
6 | from .packages import six
7 |
8 | try: # Python 3
9 | from http.client import HTTPConnection as _HTTPConnection, HTTPException
10 | except ImportError:
11 | from httplib import HTTPConnection as _HTTPConnection, HTTPException
12 |
13 |
14 | class DummyConnection(object):
15 | "Used to detect a failed ConnectionCls import."
16 | pass
17 |
18 |
19 | try: # Compiled with SSL?
20 | HTTPSConnection = DummyConnection
21 | import ssl
22 | BaseSSLError = ssl.SSLError
23 | except (ImportError, AttributeError): # Platform-specific: No SSL.
24 | ssl = None
25 |
26 | class BaseSSLError(BaseException):
27 | pass
28 |
29 |
30 | try: # Python 3:
31 | # Not a no-op, we're adding this to the namespace so it can be imported.
32 | ConnectionError = ConnectionError
33 | except NameError: # Python 2:
34 | class ConnectionError(Exception):
35 | pass
36 |
37 |
38 | from .exceptions import (
39 | ConnectTimeoutError,
40 | SystemTimeWarning,
41 | SecurityWarning,
42 | )
43 | from .packages.ssl_match_hostname import match_hostname
44 |
45 | from .util.ssl_ import (
46 | resolve_cert_reqs,
47 | resolve_ssl_version,
48 | ssl_wrap_socket,
49 | assert_fingerprint,
50 | )
51 |
52 |
53 | from .util import connection
54 |
55 | port_by_scheme = {
56 | 'http': 80,
57 | 'https': 443,
58 | }
59 |
60 | RECENT_DATE = datetime.date(2014, 1, 1)
61 |
62 |
63 | class HTTPConnection(_HTTPConnection, object):
64 | """
65 | Based on httplib.HTTPConnection but provides an extra constructor
66 | backwards-compatibility layer between older and newer Pythons.
67 |
68 | Additional keyword parameters are used to configure attributes of the connection.
69 | Accepted parameters include:
70 |
71 | - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
72 | - ``source_address``: Set the source address for the current connection.
73 |
74 | .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
75 |
76 | - ``socket_options``: Set specific options on the underlying socket. If not specified, then
77 | defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
78 | Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
79 |
80 | For example, if you wish to enable TCP Keep Alive in addition to the defaults,
81 | you might pass::
82 |
83 | HTTPConnection.default_socket_options + [
84 | (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
85 | ]
86 |
87 | Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
88 | """
89 |
90 | default_port = port_by_scheme['http']
91 |
92 | #: Disable Nagle's algorithm by default.
93 | #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
94 | default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
95 |
96 | #: Whether this connection verifies the host's certificate.
97 | is_verified = False
98 |
99 | def __init__(self, *args, **kw):
100 | if six.PY3: # Python 3
101 | kw.pop('strict', None)
102 |
103 | # Pre-set source_address in case we have an older Python like 2.6.
104 | self.source_address = kw.get('source_address')
105 |
106 | if sys.version_info < (2, 7): # Python 2.6
107 | # _HTTPConnection on Python 2.6 will balk at this keyword arg, but
108 | # not newer versions. We can still use it when creating a
109 | # connection though, so we pop it *after* we have saved it as
110 | # self.source_address.
111 | kw.pop('source_address', None)
112 |
113 | #: The socket options provided by the user. If no options are
114 | #: provided, we use the default options.
115 | self.socket_options = kw.pop('socket_options', self.default_socket_options)
116 |
117 | # Superclass also sets self.source_address in Python 2.7+.
118 | _HTTPConnection.__init__(self, *args, **kw)
119 |
120 | def _new_conn(self):
121 | """ Establish a socket connection and set nodelay settings on it.
122 |
123 | :return: New socket connection.
124 | """
125 | extra_kw = {}
126 | if self.source_address:
127 | extra_kw['source_address'] = self.source_address
128 |
129 | if self.socket_options:
130 | extra_kw['socket_options'] = self.socket_options
131 |
132 | try:
133 | conn = connection.create_connection(
134 | (self.host, self.port), self.timeout, **extra_kw)
135 |
136 | except SocketTimeout:
137 | raise ConnectTimeoutError(
138 | self, "Connection to %s timed out. (connect timeout=%s)" %
139 | (self.host, self.timeout))
140 |
141 | return conn
142 |
143 | def _prepare_conn(self, conn):
144 | self.sock = conn
145 | # the _tunnel_host attribute was added in python 2.6.3 (via
146 | # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
147 | # not have them.
148 | if getattr(self, '_tunnel_host', None):
149 | # TODO: Fix tunnel so it doesn't depend on self.sock state.
150 | self._tunnel()
151 | # Mark this connection as not reusable
152 | self.auto_open = 0
153 |
154 | def connect(self):
155 | conn = self._new_conn()
156 | self._prepare_conn(conn)
157 |
158 |
159 | class HTTPSConnection(HTTPConnection):
160 | default_port = port_by_scheme['https']
161 |
162 | def __init__(self, host, port=None, key_file=None, cert_file=None,
163 | strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw):
164 |
165 | HTTPConnection.__init__(self, host, port, strict=strict,
166 | timeout=timeout, **kw)
167 |
168 | self.key_file = key_file
169 | self.cert_file = cert_file
170 |
171 | # Required property for Google AppEngine 1.9.0 which otherwise causes
172 | # HTTPS requests to go out as HTTP. (See Issue #356)
173 | self._protocol = 'https'
174 |
175 | def connect(self):
176 | conn = self._new_conn()
177 | self._prepare_conn(conn)
178 | self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
179 |
180 |
181 | class VerifiedHTTPSConnection(HTTPSConnection):
182 | """
183 | Based on httplib.HTTPSConnection but wraps the socket with
184 | SSL certification.
185 | """
186 | cert_reqs = None
187 | ca_certs = None
188 | ssl_version = None
189 | assert_fingerprint = None
190 |
191 | def set_cert(self, key_file=None, cert_file=None,
192 | cert_reqs=None, ca_certs=None,
193 | assert_hostname=None, assert_fingerprint=None):
194 |
195 | self.key_file = key_file
196 | self.cert_file = cert_file
197 | self.cert_reqs = cert_reqs
198 | self.ca_certs = ca_certs
199 | self.assert_hostname = assert_hostname
200 | self.assert_fingerprint = assert_fingerprint
201 |
202 | def connect(self):
203 | # Add certificate verification
204 | conn = self._new_conn()
205 |
206 | resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
207 | resolved_ssl_version = resolve_ssl_version(self.ssl_version)
208 |
209 | hostname = self.host
210 | if getattr(self, '_tunnel_host', None):
211 | # _tunnel_host was added in Python 2.6.3
212 | # (See: http://hg.python.org/cpython/rev/0f57b30a152f)
213 |
214 | self.sock = conn
215 | # Calls self._set_hostport(), so self.host is
216 | # self._tunnel_host below.
217 | self._tunnel()
218 | # Mark this connection as not reusable
219 | self.auto_open = 0
220 |
221 | # Override the host with the one we're requesting data from.
222 | hostname = self._tunnel_host
223 |
224 | is_time_off = datetime.date.today() < RECENT_DATE
225 | if is_time_off:
226 | warnings.warn((
227 | 'System time is way off (before {0}). This will probably '
228 | 'lead to SSL verification errors').format(RECENT_DATE),
229 | SystemTimeWarning
230 | )
231 |
232 | # Wrap socket using verification with the root certs in
233 | # trusted_root_certs
234 | self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
235 | cert_reqs=resolved_cert_reqs,
236 | ca_certs=self.ca_certs,
237 | server_hostname=hostname,
238 | ssl_version=resolved_ssl_version)
239 |
240 | if self.assert_fingerprint:
241 | assert_fingerprint(self.sock.getpeercert(binary_form=True),
242 | self.assert_fingerprint)
243 | elif resolved_cert_reqs != ssl.CERT_NONE \
244 | and self.assert_hostname is not False:
245 | cert = self.sock.getpeercert()
246 | if not cert.get('subjectAltName', ()):
247 | warnings.warn((
248 | 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. '
249 | 'This feature is being removed by major browsers and deprecated by RFC 2818. '
250 | '(See https://github.com/shazow/urllib3/issues/497 for details.)'),
251 | SecurityWarning
252 | )
253 | match_hostname(cert, self.assert_hostname or hostname)
254 |
255 | self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED
256 | or self.assert_fingerprint is not None)
257 |
258 |
259 | if ssl:
260 | # Make a copy for testing.
261 | UnverifiedHTTPSConnection = HTTPSConnection
262 | HTTPSConnection = VerifiedHTTPSConnection
263 | else:
264 | HTTPSConnection = DummyConnection
265 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BrendanBenshoof/cachewarmer/2472f51a4032326ba76a6c2865a75c5a86a6b659/myrequests/packages/urllib3/contrib/__init__.py
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/contrib/ntlmpool.py:
--------------------------------------------------------------------------------
1 | """
2 | NTLM authenticating pool, contributed by erikcederstran
3 |
4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
5 | """
6 |
7 | try:
8 | from http.client import HTTPSConnection
9 | except ImportError:
10 | from httplib import HTTPSConnection
11 | from logging import getLogger
12 | from ntlm import ntlm
13 |
14 | from urllib3 import HTTPSConnectionPool
15 |
16 |
17 | log = getLogger(__name__)
18 |
19 |
20 | class NTLMConnectionPool(HTTPSConnectionPool):
21 | """
22 | Implements an NTLM authentication version of an urllib3 connection pool
23 | """
24 |
25 | scheme = 'https'
26 |
27 | def __init__(self, user, pw, authurl, *args, **kwargs):
28 | """
29 | authurl is a random URL on the server that is protected by NTLM.
30 | user is the Windows user, probably in the DOMAIN\\username format.
31 | pw is the password for the user.
32 | """
33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs)
34 | self.authurl = authurl
35 | self.rawuser = user
36 | user_parts = user.split('\\', 1)
37 | self.domain = user_parts[0].upper()
38 | self.user = user_parts[1]
39 | self.pw = pw
40 |
41 | def _new_conn(self):
42 | # Performs the NTLM handshake that secures the connection. The socket
43 | # must be kept open while requests are performed.
44 | self.num_connections += 1
45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' %
46 | (self.num_connections, self.host, self.authurl))
47 |
48 | headers = {}
49 | headers['Connection'] = 'Keep-Alive'
50 | req_header = 'Authorization'
51 | resp_header = 'www-authenticate'
52 |
53 | conn = HTTPSConnection(host=self.host, port=self.port)
54 |
55 | # Send negotiation message
56 | headers[req_header] = (
57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
58 | log.debug('Request headers: %s' % headers)
59 | conn.request('GET', self.authurl, None, headers)
60 | res = conn.getresponse()
61 | reshdr = dict(res.getheaders())
62 | log.debug('Response status: %s %s' % (res.status, res.reason))
63 | log.debug('Response headers: %s' % reshdr)
64 | log.debug('Response data: %s [...]' % res.read(100))
65 |
66 | # Remove the reference to the socket, so that it can not be closed by
67 | # the response object (we want to keep the socket open)
68 | res.fp = None
69 |
70 | # Server should respond with a challenge message
71 | auth_header_values = reshdr[resp_header].split(', ')
72 | auth_header_value = None
73 | for s in auth_header_values:
74 | if s[:5] == 'NTLM ':
75 | auth_header_value = s[5:]
76 | if auth_header_value is None:
77 | raise Exception('Unexpected %s response header: %s' %
78 | (resp_header, reshdr[resp_header]))
79 |
80 | # Send authentication message
81 | ServerChallenge, NegotiateFlags = \
82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
84 | self.user,
85 | self.domain,
86 | self.pw,
87 | NegotiateFlags)
88 | headers[req_header] = 'NTLM %s' % auth_msg
89 | log.debug('Request headers: %s' % headers)
90 | conn.request('GET', self.authurl, None, headers)
91 | res = conn.getresponse()
92 | log.debug('Response status: %s %s' % (res.status, res.reason))
93 | log.debug('Response headers: %s' % dict(res.getheaders()))
94 | log.debug('Response data: %s [...]' % res.read()[:100])
95 | if res.status != 200:
96 | if res.status == 401:
97 | raise Exception('Server rejected request: wrong '
98 | 'username or password')
99 | raise Exception('Wrong server response: %s %s' %
100 | (res.status, res.reason))
101 |
102 | res.fp = None
103 | log.debug('Connection established')
104 | return conn
105 |
106 | def urlopen(self, method, url, body=None, headers=None, retries=3,
107 | redirect=True, assert_same_host=True):
108 | if headers is None:
109 | headers = {}
110 | headers['Connection'] = 'Keep-Alive'
111 | return super(NTLMConnectionPool, self).urlopen(method, url, body,
112 | headers, retries,
113 | redirect,
114 | assert_same_host)
115 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | ## Base Exceptions
3 |
4 | class HTTPError(Exception):
5 | "Base exception used by this module."
6 | pass
7 |
8 | class HTTPWarning(Warning):
9 | "Base warning used by this module."
10 | pass
11 |
12 |
13 |
14 | class PoolError(HTTPError):
15 | "Base exception for errors caused within a pool."
16 | def __init__(self, pool, message):
17 | self.pool = pool
18 | HTTPError.__init__(self, "%s: %s" % (pool, message))
19 |
20 | def __reduce__(self):
21 | # For pickling purposes.
22 | return self.__class__, (None, None)
23 |
24 |
25 | class RequestError(PoolError):
26 | "Base exception for PoolErrors that have associated URLs."
27 | def __init__(self, pool, url, message):
28 | self.url = url
29 | PoolError.__init__(self, pool, message)
30 |
31 | def __reduce__(self):
32 | # For pickling purposes.
33 | return self.__class__, (None, self.url, None)
34 |
35 |
36 | class SSLError(HTTPError):
37 | "Raised when SSL certificate fails in an HTTPS connection."
38 | pass
39 |
40 |
41 | class ProxyError(HTTPError):
42 | "Raised when the connection to a proxy fails."
43 | pass
44 |
45 |
46 | class DecodeError(HTTPError):
47 | "Raised when automatic decoding based on Content-Type fails."
48 | pass
49 |
50 |
51 | class ProtocolError(HTTPError):
52 | "Raised when something unexpected happens mid-request/response."
53 | pass
54 |
55 |
56 | #: Renamed to ProtocolError but aliased for backwards compatibility.
57 | ConnectionError = ProtocolError
58 |
59 |
60 | ## Leaf Exceptions
61 |
62 | class MaxRetryError(RequestError):
63 | """Raised when the maximum number of retries is exceeded.
64 |
65 | :param pool: The connection pool
66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
67 | :param string url: The requested Url
68 | :param exceptions.Exception reason: The underlying error
69 |
70 | """
71 |
72 | def __init__(self, pool, url, reason=None):
73 | self.reason = reason
74 |
75 | message = "Max retries exceeded with url: %s (Caused by %r)" % (
76 | url, reason)
77 |
78 | RequestError.__init__(self, pool, url, message)
79 |
80 |
81 | class HostChangedError(RequestError):
82 | "Raised when an existing pool gets a request for a foreign host."
83 |
84 | def __init__(self, pool, url, retries=3):
85 | message = "Tried to open a foreign host with url: %s" % url
86 | RequestError.__init__(self, pool, url, message)
87 | self.retries = retries
88 |
89 |
90 | class TimeoutStateError(HTTPError):
91 | """ Raised when passing an invalid state to a timeout """
92 | pass
93 |
94 |
95 | class TimeoutError(HTTPError):
96 | """ Raised when a socket timeout error occurs.
97 |
98 | Catching this error will catch both :exc:`ReadTimeoutErrors
99 | ` and :exc:`ConnectTimeoutErrors `.
100 | """
101 | pass
102 |
103 |
104 | class ReadTimeoutError(TimeoutError, RequestError):
105 | "Raised when a socket timeout occurs while receiving data from a server"
106 | pass
107 |
108 |
109 | # This timeout error does not have a URL attached and needs to inherit from the
110 | # base HTTPError
111 | class ConnectTimeoutError(TimeoutError):
112 | "Raised when a socket timeout occurs while connecting to a server"
113 | pass
114 |
115 |
116 | class EmptyPoolError(PoolError):
117 | "Raised when a pool runs out of connections and no more are allowed."
118 | pass
119 |
120 |
121 | class ClosedPoolError(PoolError):
122 | "Raised when a request enters a pool after the pool has been closed."
123 | pass
124 |
125 |
126 | class LocationValueError(ValueError, HTTPError):
127 | "Raised when there is something wrong with a given URL input."
128 | pass
129 |
130 |
131 | class LocationParseError(LocationValueError):
132 | "Raised when get_host or similar fails to parse the URL input."
133 |
134 | def __init__(self, location):
135 | message = "Failed to parse: %s" % location
136 | HTTPError.__init__(self, message)
137 |
138 | self.location = location
139 |
140 |
141 | class ResponseError(HTTPError):
142 | "Used as a container for an error reason supplied in a MaxRetryError."
143 | GENERIC_ERROR = 'too many error responses'
144 | SPECIFIC_ERROR = 'too many {status_code} error responses'
145 |
146 |
147 | class SecurityWarning(HTTPWarning):
148 | "Warned when perfoming security reducing actions"
149 | pass
150 |
151 |
152 | class InsecureRequestWarning(SecurityWarning):
153 | "Warned when making an unverified HTTPS request."
154 | pass
155 |
156 |
157 | class SystemTimeWarning(SecurityWarning):
158 | "Warned when system time is suspected to be wrong"
159 | pass
160 |
161 |
162 | class InsecurePlatformWarning(SecurityWarning):
163 | "Warned when certain SSL configuration is not available on a platform."
164 | pass
165 |
166 |
167 | class ResponseNotChunked(ProtocolError, ValueError):
168 | "Response needs to be chunked in order to read it as chunks."
169 | pass
170 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/fields.py:
--------------------------------------------------------------------------------
1 | import email.utils
2 | import mimetypes
3 |
4 | from .packages import six
5 |
6 |
7 | def guess_content_type(filename, default='application/octet-stream'):
8 | """
9 | Guess the "Content-Type" of a file.
10 |
11 | :param filename:
12 | The filename to guess the "Content-Type" of using :mod:`mimetypes`.
13 | :param default:
14 | If no "Content-Type" can be guessed, default to `default`.
15 | """
16 | if filename:
17 | return mimetypes.guess_type(filename)[0] or default
18 | return default
19 |
20 |
21 | def format_header_param(name, value):
22 | """
23 | Helper function to format and quote a single header parameter.
24 |
25 | Particularly useful for header parameters which might contain
26 | non-ASCII values, like file names. This follows RFC 2231, as
27 | suggested by RFC 2388 Section 4.4.
28 |
29 | :param name:
30 | The name of the parameter, a string expected to be ASCII only.
31 | :param value:
32 | The value of the parameter, provided as a unicode string.
33 | """
34 | if not any(ch in value for ch in '"\\\r\n'):
35 | result = '%s="%s"' % (name, value)
36 | try:
37 | result.encode('ascii')
38 | except UnicodeEncodeError:
39 | pass
40 | else:
41 | return result
42 | if not six.PY3: # Python 2:
43 | value = value.encode('utf-8')
44 | value = email.utils.encode_rfc2231(value, 'utf-8')
45 | value = '%s*=%s' % (name, value)
46 | return value
47 |
48 |
49 | class RequestField(object):
50 | """
51 | A data container for request body parameters.
52 |
53 | :param name:
54 | The name of this request field.
55 | :param data:
56 | The data/value body.
57 | :param filename:
58 | An optional filename of the request field.
59 | :param headers:
60 | An optional dict-like object of headers to initially use for the field.
61 | """
62 | def __init__(self, name, data, filename=None, headers=None):
63 | self._name = name
64 | self._filename = filename
65 | self.data = data
66 | self.headers = {}
67 | if headers:
68 | self.headers = dict(headers)
69 |
70 | @classmethod
71 | def from_tuples(cls, fieldname, value):
72 | """
73 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
74 |
75 | Supports constructing :class:`~urllib3.fields.RequestField` from
76 | parameter of key/value strings AND key/filetuple. A filetuple is a
77 | (filename, data, MIME type) tuple where the MIME type is optional.
78 | For example::
79 |
80 | 'foo': 'bar',
81 | 'fakefile': ('foofile.txt', 'contents of foofile'),
82 | 'realfile': ('barfile.txt', open('realfile').read()),
83 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
84 | 'nonamefile': 'contents of nonamefile field',
85 |
86 | Field names and filenames must be unicode.
87 | """
88 | if isinstance(value, tuple):
89 | if len(value) == 3:
90 | filename, data, content_type = value
91 | else:
92 | filename, data = value
93 | content_type = guess_content_type(filename)
94 | else:
95 | filename = None
96 | content_type = None
97 | data = value
98 |
99 | request_param = cls(fieldname, data, filename=filename)
100 | request_param.make_multipart(content_type=content_type)
101 |
102 | return request_param
103 |
104 | def _render_part(self, name, value):
105 | """
106 | Overridable helper function to format a single header parameter.
107 |
108 | :param name:
109 | The name of the parameter, a string expected to be ASCII only.
110 | :param value:
111 | The value of the parameter, provided as a unicode string.
112 | """
113 | return format_header_param(name, value)
114 |
115 | def _render_parts(self, header_parts):
116 | """
117 | Helper function to format and quote a single header.
118 |
119 | Useful for single headers that are composed of multiple items. E.g.,
120 | 'Content-Disposition' fields.
121 |
122 | :param header_parts:
123 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
124 | as `k1="v1"; k2="v2"; ...`.
125 | """
126 | parts = []
127 | iterable = header_parts
128 | if isinstance(header_parts, dict):
129 | iterable = header_parts.items()
130 |
131 | for name, value in iterable:
132 | if value:
133 | parts.append(self._render_part(name, value))
134 |
135 | return '; '.join(parts)
136 |
137 | def render_headers(self):
138 | """
139 | Renders the headers for this request field.
140 | """
141 | lines = []
142 |
143 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
144 | for sort_key in sort_keys:
145 | if self.headers.get(sort_key, False):
146 | lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
147 |
148 | for header_name, header_value in self.headers.items():
149 | if header_name not in sort_keys:
150 | if header_value:
151 | lines.append('%s: %s' % (header_name, header_value))
152 |
153 | lines.append('\r\n')
154 | return '\r\n'.join(lines)
155 |
156 | def make_multipart(self, content_disposition=None, content_type=None,
157 | content_location=None):
158 | """
159 | Makes this request field into a multipart request field.
160 |
161 | This method overrides "Content-Disposition", "Content-Type" and
162 | "Content-Location" headers to the request parameter.
163 |
164 | :param content_type:
165 | The 'Content-Type' of the request body.
166 | :param content_location:
167 | The 'Content-Location' of the request body.
168 |
169 | """
170 | self.headers['Content-Disposition'] = content_disposition or 'form-data'
171 | self.headers['Content-Disposition'] += '; '.join([
172 | '', self._render_parts(
173 | (('name', self._name), ('filename', self._filename))
174 | )
175 | ])
176 | self.headers['Content-Type'] = content_type
177 | self.headers['Content-Location'] = content_location
178 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/filepost.py:
--------------------------------------------------------------------------------
1 | import codecs
2 |
3 | from uuid import uuid4
4 | from io import BytesIO
5 |
6 | from .packages import six
7 | from .packages.six import b
8 | from .fields import RequestField
9 |
10 | writer = codecs.lookup('utf-8')[3]
11 |
12 |
13 | def choose_boundary():
14 | """
15 | Our embarassingly-simple replacement for mimetools.choose_boundary.
16 | """
17 | return uuid4().hex
18 |
19 |
20 | def iter_field_objects(fields):
21 | """
22 | Iterate over fields.
23 |
24 | Supports list of (k, v) tuples and dicts, and lists of
25 | :class:`~urllib3.fields.RequestField`.
26 |
27 | """
28 | if isinstance(fields, dict):
29 | i = six.iteritems(fields)
30 | else:
31 | i = iter(fields)
32 |
33 | for field in i:
34 | if isinstance(field, RequestField):
35 | yield field
36 | else:
37 | yield RequestField.from_tuples(*field)
38 |
39 |
40 | def iter_fields(fields):
41 | """
42 | .. deprecated:: 1.6
43 |
44 | Iterate over fields.
45 |
46 | The addition of :class:`~urllib3.fields.RequestField` makes this function
47 | obsolete. Instead, use :func:`iter_field_objects`, which returns
48 | :class:`~urllib3.fields.RequestField` objects.
49 |
50 | Supports list of (k, v) tuples and dicts.
51 | """
52 | if isinstance(fields, dict):
53 | return ((k, v) for k, v in six.iteritems(fields))
54 |
55 | return ((k, v) for k, v in fields)
56 |
57 |
58 | def encode_multipart_formdata(fields, boundary=None):
59 | """
60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
61 |
62 | :param fields:
63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
64 |
65 | :param boundary:
66 | If not specified, then a random boundary will be generated using
67 | :func:`mimetools.choose_boundary`.
68 | """
69 | body = BytesIO()
70 | if boundary is None:
71 | boundary = choose_boundary()
72 |
73 | for field in iter_field_objects(fields):
74 | body.write(b('--%s\r\n' % (boundary)))
75 |
76 | writer(body).write(field.render_headers())
77 | data = field.data
78 |
79 | if isinstance(data, int):
80 | data = str(data) # Backwards compatibility
81 |
82 | if isinstance(data, six.text_type):
83 | writer(body).write(data)
84 | else:
85 | body.write(data)
86 |
87 | body.write(b'\r\n')
88 |
89 | body.write(b('--%s--\r\n' % (boundary)))
90 |
91 | content_type = str('multipart/form-data; boundary=%s' % boundary)
92 |
93 | return body.getvalue(), content_type
94 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from . import ssl_match_hostname
4 |
5 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/packages/ordered_dict.py:
--------------------------------------------------------------------------------
1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
2 | # Passes Python2.7's test suite and incorporates all the latest updates.
3 | # Copyright 2009 Raymond Hettinger, released under the MIT License.
4 | # http://code.activestate.com/recipes/576693/
5 | try:
6 | from thread import get_ident as _get_ident
7 | except ImportError:
8 | from dummy_thread import get_ident as _get_ident
9 |
10 | try:
11 | from _abcoll import KeysView, ValuesView, ItemsView
12 | except ImportError:
13 | pass
14 |
15 |
16 | class OrderedDict(dict):
17 | 'Dictionary that remembers insertion order'
18 | # An inherited dict maps keys to values.
19 | # The inherited dict provides __getitem__, __len__, __contains__, and get.
20 | # The remaining methods are order-aware.
21 | # Big-O running times for all methods are the same as for regular dictionaries.
22 |
23 | # The internal self.__map dictionary maps keys to links in a doubly linked list.
24 | # The circular doubly linked list starts and ends with a sentinel element.
25 | # The sentinel element never gets deleted (this simplifies the algorithm).
26 | # Each link is stored as a list of length three: [PREV, NEXT, KEY].
27 |
28 | def __init__(self, *args, **kwds):
29 | '''Initialize an ordered dictionary. Signature is the same as for
30 | regular dictionaries, but keyword arguments are not recommended
31 | because their insertion order is arbitrary.
32 |
33 | '''
34 | if len(args) > 1:
35 | raise TypeError('expected at most 1 arguments, got %d' % len(args))
36 | try:
37 | self.__root
38 | except AttributeError:
39 | self.__root = root = [] # sentinel node
40 | root[:] = [root, root, None]
41 | self.__map = {}
42 | self.__update(*args, **kwds)
43 |
44 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
45 | 'od.__setitem__(i, y) <==> od[i]=y'
46 | # Setting a new item creates a new link which goes at the end of the linked
47 | # list, and the inherited dictionary is updated with the new key/value pair.
48 | if key not in self:
49 | root = self.__root
50 | last = root[0]
51 | last[1] = root[0] = self.__map[key] = [last, root, key]
52 | dict_setitem(self, key, value)
53 |
54 | def __delitem__(self, key, dict_delitem=dict.__delitem__):
55 | 'od.__delitem__(y) <==> del od[y]'
56 | # Deleting an existing item uses self.__map to find the link which is
57 | # then removed by updating the links in the predecessor and successor nodes.
58 | dict_delitem(self, key)
59 | link_prev, link_next, key = self.__map.pop(key)
60 | link_prev[1] = link_next
61 | link_next[0] = link_prev
62 |
63 | def __iter__(self):
64 | 'od.__iter__() <==> iter(od)'
65 | root = self.__root
66 | curr = root[1]
67 | while curr is not root:
68 | yield curr[2]
69 | curr = curr[1]
70 |
71 | def __reversed__(self):
72 | 'od.__reversed__() <==> reversed(od)'
73 | root = self.__root
74 | curr = root[0]
75 | while curr is not root:
76 | yield curr[2]
77 | curr = curr[0]
78 |
79 | def clear(self):
80 | 'od.clear() -> None. Remove all items from od.'
81 | try:
82 | for node in self.__map.itervalues():
83 | del node[:]
84 | root = self.__root
85 | root[:] = [root, root, None]
86 | self.__map.clear()
87 | except AttributeError:
88 | pass
89 | dict.clear(self)
90 |
91 | def popitem(self, last=True):
92 | '''od.popitem() -> (k, v), return and remove a (key, value) pair.
93 | Pairs are returned in LIFO order if last is true or FIFO order if false.
94 |
95 | '''
96 | if not self:
97 | raise KeyError('dictionary is empty')
98 | root = self.__root
99 | if last:
100 | link = root[0]
101 | link_prev = link[0]
102 | link_prev[1] = root
103 | root[0] = link_prev
104 | else:
105 | link = root[1]
106 | link_next = link[1]
107 | root[1] = link_next
108 | link_next[0] = root
109 | key = link[2]
110 | del self.__map[key]
111 | value = dict.pop(self, key)
112 | return key, value
113 |
114 | # -- the following methods do not depend on the internal structure --
115 |
116 | def keys(self):
117 | 'od.keys() -> list of keys in od'
118 | return list(self)
119 |
120 | def values(self):
121 | 'od.values() -> list of values in od'
122 | return [self[key] for key in self]
123 |
124 | def items(self):
125 | 'od.items() -> list of (key, value) pairs in od'
126 | return [(key, self[key]) for key in self]
127 |
128 | def iterkeys(self):
129 | 'od.iterkeys() -> an iterator over the keys in od'
130 | return iter(self)
131 |
132 | def itervalues(self):
133 | 'od.itervalues -> an iterator over the values in od'
134 | for k in self:
135 | yield self[k]
136 |
137 | def iteritems(self):
138 | 'od.iteritems -> an iterator over the (key, value) items in od'
139 | for k in self:
140 | yield (k, self[k])
141 |
142 | def update(*args, **kwds):
143 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F.
144 |
145 | If E is a dict instance, does: for k in E: od[k] = E[k]
146 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
147 | Or if E is an iterable of items, does: for k, v in E: od[k] = v
148 | In either case, this is followed by: for k, v in F.items(): od[k] = v
149 |
150 | '''
151 | if len(args) > 2:
152 | raise TypeError('update() takes at most 2 positional '
153 | 'arguments (%d given)' % (len(args),))
154 | elif not args:
155 | raise TypeError('update() takes at least 1 argument (0 given)')
156 | self = args[0]
157 | # Make progressively weaker assumptions about "other"
158 | other = ()
159 | if len(args) == 2:
160 | other = args[1]
161 | if isinstance(other, dict):
162 | for key in other:
163 | self[key] = other[key]
164 | elif hasattr(other, 'keys'):
165 | for key in other.keys():
166 | self[key] = other[key]
167 | else:
168 | for key, value in other:
169 | self[key] = value
170 | for key, value in kwds.items():
171 | self[key] = value
172 |
173 | __update = update # let subclasses override update without breaking __init__
174 |
175 | __marker = object()
176 |
177 | def pop(self, key, default=__marker):
178 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
179 | If key is not found, d is returned if given, otherwise KeyError is raised.
180 |
181 | '''
182 | if key in self:
183 | result = self[key]
184 | del self[key]
185 | return result
186 | if default is self.__marker:
187 | raise KeyError(key)
188 | return default
189 |
190 | def setdefault(self, key, default=None):
191 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
192 | if key in self:
193 | return self[key]
194 | self[key] = default
195 | return default
196 |
197 | def __repr__(self, _repr_running={}):
198 | 'od.__repr__() <==> repr(od)'
199 | call_key = id(self), _get_ident()
200 | if call_key in _repr_running:
201 | return '...'
202 | _repr_running[call_key] = 1
203 | try:
204 | if not self:
205 | return '%s()' % (self.__class__.__name__,)
206 | return '%s(%r)' % (self.__class__.__name__, self.items())
207 | finally:
208 | del _repr_running[call_key]
209 |
210 | def __reduce__(self):
211 | 'Return state information for pickling'
212 | items = [[k, self[k]] for k in self]
213 | inst_dict = vars(self).copy()
214 | for k in vars(OrderedDict()):
215 | inst_dict.pop(k, None)
216 | if inst_dict:
217 | return (self.__class__, (items,), inst_dict)
218 | return self.__class__, (items,)
219 |
220 | def copy(self):
221 | 'od.copy() -> a shallow copy of od'
222 | return self.__class__(self)
223 |
224 | @classmethod
225 | def fromkeys(cls, iterable, value=None):
226 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
227 | and values equal to v (which defaults to None).
228 |
229 | '''
230 | d = cls()
231 | for key in iterable:
232 | d[key] = value
233 | return d
234 |
235 | def __eq__(self, other):
236 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
237 | while comparison to a regular mapping is order-insensitive.
238 |
239 | '''
240 | if isinstance(other, OrderedDict):
241 | return len(self)==len(other) and self.items() == other.items()
242 | return dict.__eq__(self, other)
243 |
244 | def __ne__(self, other):
245 | return not self == other
246 |
247 | # -- the following methods are only used in Python 2.7 --
248 |
249 | def viewkeys(self):
250 | "od.viewkeys() -> a set-like object providing a view on od's keys"
251 | return KeysView(self)
252 |
253 | def viewvalues(self):
254 | "od.viewvalues() -> an object providing a view on od's values"
255 | return ValuesView(self)
256 |
257 | def viewitems(self):
258 | "od.viewitems() -> a set-like object providing a view on od's items"
259 | return ItemsView(self)
260 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | # Python 3.2+
3 | from ssl import CertificateError, match_hostname
4 | except ImportError:
5 | try:
6 | # Backport of the function from a pypi module
7 | from backports.ssl_match_hostname import CertificateError, match_hostname
8 | except ImportError:
9 | # Our vendored copy
10 | from ._implementation import CertificateError, match_hostname
11 |
12 | # Not needed, but documenting what we provide.
13 | __all__ = ('CertificateError', 'match_hostname')
14 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
2 |
3 | # Note: This file is under the PSF license as the code comes from the python
4 | # stdlib. http://docs.python.org/3/license.html
5 |
6 | import re
7 |
8 | __version__ = '3.4.0.2'
9 |
10 | class CertificateError(ValueError):
11 | pass
12 |
13 |
14 | def _dnsname_match(dn, hostname, max_wildcards=1):
15 | """Matching according to RFC 6125, section 6.4.3
16 |
17 | http://tools.ietf.org/html/rfc6125#section-6.4.3
18 | """
19 | pats = []
20 | if not dn:
21 | return False
22 |
23 | # Ported from python3-syntax:
24 | # leftmost, *remainder = dn.split(r'.')
25 | parts = dn.split(r'.')
26 | leftmost = parts[0]
27 | remainder = parts[1:]
28 |
29 | wildcards = leftmost.count('*')
30 | if wildcards > max_wildcards:
31 | # Issue #17980: avoid denials of service by refusing more
32 | # than one wildcard per fragment. A survey of established
33 | # policy among SSL implementations showed it to be a
34 | # reasonable choice.
35 | raise CertificateError(
36 | "too many wildcards in certificate DNS name: " + repr(dn))
37 |
38 | # speed up common case w/o wildcards
39 | if not wildcards:
40 | return dn.lower() == hostname.lower()
41 |
42 | # RFC 6125, section 6.4.3, subitem 1.
43 | # The client SHOULD NOT attempt to match a presented identifier in which
44 | # the wildcard character comprises a label other than the left-most label.
45 | if leftmost == '*':
46 | # When '*' is a fragment by itself, it matches a non-empty dotless
47 | # fragment.
48 | pats.append('[^.]+')
49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
50 | # RFC 6125, section 6.4.3, subitem 3.
51 | # The client SHOULD NOT attempt to match a presented identifier
52 | # where the wildcard character is embedded within an A-label or
53 | # U-label of an internationalized domain name.
54 | pats.append(re.escape(leftmost))
55 | else:
56 | # Otherwise, '*' matches any dotless string, e.g. www*
57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
58 |
59 | # add the remaining fragments, ignore any wildcards
60 | for frag in remainder:
61 | pats.append(re.escape(frag))
62 |
63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
64 | return pat.match(hostname)
65 |
66 |
67 | def match_hostname(cert, hostname):
68 | """Verify that *cert* (in decoded format as returned by
69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
70 | rules are followed, but IP addresses are not accepted for *hostname*.
71 |
72 | CertificateError is raised on failure. On success, the function
73 | returns nothing.
74 | """
75 | if not cert:
76 | raise ValueError("empty or no certificate")
77 | dnsnames = []
78 | san = cert.get('subjectAltName', ())
79 | for key, value in san:
80 | if key == 'DNS':
81 | if _dnsname_match(value, hostname):
82 | return
83 | dnsnames.append(value)
84 | if not dnsnames:
85 | # The subject is only checked when there is no dNSName entry
86 | # in subjectAltName
87 | for sub in cert.get('subject', ()):
88 | for key, value in sub:
89 | # XXX according to RFC 2818, the most specific Common Name
90 | # must be used.
91 | if key == 'commonName':
92 | if _dnsname_match(value, hostname):
93 | return
94 | dnsnames.append(value)
95 | if len(dnsnames) > 1:
96 | raise CertificateError("hostname %r "
97 | "doesn't match either of %s"
98 | % (hostname, ', '.join(map(repr, dnsnames))))
99 | elif len(dnsnames) == 1:
100 | raise CertificateError("hostname %r "
101 | "doesn't match %r"
102 | % (hostname, dnsnames[0]))
103 | else:
104 | raise CertificateError("no appropriate commonName or "
105 | "subjectAltName fields were found")
106 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/poolmanager.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | try: # Python 3
4 | from urllib.parse import urljoin
5 | except ImportError:
6 | from urlparse import urljoin
7 |
8 | from ._collections import RecentlyUsedContainer
9 | from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool
10 | from .connectionpool import port_by_scheme
11 | from .exceptions import LocationValueError, MaxRetryError
12 | from .request import RequestMethods
13 | from .util.url import parse_url
14 | from .util.retry import Retry
15 |
16 |
17 | __all__ = ['PoolManager', 'ProxyManager', 'proxy_from_url']
18 |
19 |
20 | pool_classes_by_scheme = {
21 | 'http': HTTPConnectionPool,
22 | 'https': HTTPSConnectionPool,
23 | }
24 |
25 | log = logging.getLogger(__name__)
26 |
27 | SSL_KEYWORDS = ('key_file', 'cert_file', 'cert_reqs', 'ca_certs',
28 | 'ssl_version')
29 |
30 |
31 | class PoolManager(RequestMethods):
32 | """
33 | Allows for arbitrary requests while transparently keeping track of
34 | necessary connection pools for you.
35 |
36 | :param num_pools:
37 | Number of connection pools to cache before discarding the least
38 | recently used pool.
39 |
40 | :param headers:
41 | Headers to include with all requests, unless other headers are given
42 | explicitly.
43 |
44 | :param \**connection_pool_kw:
45 | Additional parameters are used to create fresh
46 | :class:`urllib3.connectionpool.ConnectionPool` instances.
47 |
48 | Example::
49 |
50 | >>> manager = PoolManager(num_pools=2)
51 | >>> r = manager.request('GET', 'http://google.com/')
52 | >>> r = manager.request('GET', 'http://google.com/mail')
53 | >>> r = manager.request('GET', 'http://yahoo.com/')
54 | >>> len(manager.pools)
55 | 2
56 |
57 | """
58 |
59 | proxy = None
60 |
61 | def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
62 | RequestMethods.__init__(self, headers)
63 | self.connection_pool_kw = connection_pool_kw
64 | self.pools = RecentlyUsedContainer(num_pools,
65 | dispose_func=lambda p: p.close())
66 |
67 | def __enter__(self):
68 | return self
69 |
70 | def __exit__(self, exc_type, exc_val, exc_tb):
71 | self.clear()
72 | # Return False to re-raise any potential exceptions
73 | return False
74 |
75 | def _new_pool(self, scheme, host, port):
76 | """
77 | Create a new :class:`ConnectionPool` based on host, port and scheme.
78 |
79 | This method is used to actually create the connection pools handed out
80 | by :meth:`connection_from_url` and companion methods. It is intended
81 | to be overridden for customization.
82 | """
83 | pool_cls = pool_classes_by_scheme[scheme]
84 | kwargs = self.connection_pool_kw
85 | if scheme == 'http':
86 | kwargs = self.connection_pool_kw.copy()
87 | for kw in SSL_KEYWORDS:
88 | kwargs.pop(kw, None)
89 |
90 | return pool_cls(host, port, **kwargs)
91 |
92 | def clear(self):
93 | """
94 | Empty our store of pools and direct them all to close.
95 |
96 | This will not affect in-flight connections, but they will not be
97 | re-used after completion.
98 | """
99 | self.pools.clear()
100 |
101 | def connection_from_host(self, host, port=None, scheme='http'):
102 | """
103 | Get a :class:`ConnectionPool` based on the host, port, and scheme.
104 |
105 | If ``port`` isn't given, it will be derived from the ``scheme`` using
106 | ``urllib3.connectionpool.port_by_scheme``.
107 | """
108 |
109 | if not host:
110 | raise LocationValueError("No host specified.")
111 |
112 | scheme = scheme or 'http'
113 | port = port or port_by_scheme.get(scheme, 80)
114 | pool_key = (scheme, host, port)
115 |
116 | with self.pools.lock:
117 | # If the scheme, host, or port doesn't match existing open
118 | # connections, open a new ConnectionPool.
119 | pool = self.pools.get(pool_key)
120 | if pool:
121 | return pool
122 |
123 | # Make a fresh ConnectionPool of the desired type
124 | pool = self._new_pool(scheme, host, port)
125 | self.pools[pool_key] = pool
126 |
127 | return pool
128 |
129 | def connection_from_url(self, url):
130 | """
131 | Similar to :func:`urllib3.connectionpool.connection_from_url` but
132 | doesn't pass any additional parameters to the
133 | :class:`urllib3.connectionpool.ConnectionPool` constructor.
134 |
135 | Additional parameters are taken from the :class:`.PoolManager`
136 | constructor.
137 | """
138 | u = parse_url(url)
139 | return self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
140 |
141 | def urlopen(self, method, url, redirect=True, **kw):
142 | """
143 | Same as :meth:`urllib3.connectionpool.HTTPConnectionPool.urlopen`
144 | with custom cross-host redirect logic and only sends the request-uri
145 | portion of the ``url``.
146 |
147 | The given ``url`` parameter must be absolute, such that an appropriate
148 | :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
149 | """
150 | u = parse_url(url)
151 | conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
152 |
153 | kw['assert_same_host'] = False
154 | kw['redirect'] = False
155 | if 'headers' not in kw:
156 | kw['headers'] = self.headers
157 |
158 | if self.proxy is not None and u.scheme == "http":
159 | response = conn.urlopen(method, url, **kw)
160 | else:
161 | response = conn.urlopen(method, u.request_uri, **kw)
162 |
163 | redirect_location = redirect and response.get_redirect_location()
164 | if not redirect_location:
165 | return response
166 |
167 | # Support relative URLs for redirecting.
168 | redirect_location = urljoin(url, redirect_location)
169 |
170 | # RFC 7231, Section 6.4.4
171 | if response.status == 303:
172 | method = 'GET'
173 |
174 | retries = kw.get('retries')
175 | if not isinstance(retries, Retry):
176 | retries = Retry.from_int(retries, redirect=redirect)
177 |
178 | try:
179 | retries = retries.increment(method, url, response=response, _pool=conn)
180 | except MaxRetryError:
181 | if retries.raise_on_redirect:
182 | raise
183 | return response
184 |
185 | kw['retries'] = retries
186 | kw['redirect'] = redirect
187 |
188 | log.info("Redirecting %s -> %s" % (url, redirect_location))
189 | return self.urlopen(method, redirect_location, **kw)
190 |
191 |
192 | class ProxyManager(PoolManager):
193 | """
194 | Behaves just like :class:`PoolManager`, but sends all requests through
195 | the defined proxy, using the CONNECT method for HTTPS URLs.
196 |
197 | :param proxy_url:
198 | The URL of the proxy to be used.
199 |
200 | :param proxy_headers:
201 | A dictionary contaning headers that will be sent to the proxy. In case
202 | of HTTP they are being sent with each request, while in the
203 | HTTPS/CONNECT case they are sent only once. Could be used for proxy
204 | authentication.
205 |
206 | Example:
207 | >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
208 | >>> r1 = proxy.request('GET', 'http://google.com/')
209 | >>> r2 = proxy.request('GET', 'http://httpbin.org/')
210 | >>> len(proxy.pools)
211 | 1
212 | >>> r3 = proxy.request('GET', 'https://httpbin.org/')
213 | >>> r4 = proxy.request('GET', 'https://twitter.com/')
214 | >>> len(proxy.pools)
215 | 3
216 |
217 | """
218 |
219 | def __init__(self, proxy_url, num_pools=10, headers=None,
220 | proxy_headers=None, **connection_pool_kw):
221 |
222 | if isinstance(proxy_url, HTTPConnectionPool):
223 | proxy_url = '%s://%s:%i' % (proxy_url.scheme, proxy_url.host,
224 | proxy_url.port)
225 | proxy = parse_url(proxy_url)
226 | if not proxy.port:
227 | port = port_by_scheme.get(proxy.scheme, 80)
228 | proxy = proxy._replace(port=port)
229 |
230 | assert proxy.scheme in ("http", "https"), \
231 | 'Not supported proxy scheme %s' % proxy.scheme
232 |
233 | self.proxy = proxy
234 | self.proxy_headers = proxy_headers or {}
235 |
236 | connection_pool_kw['_proxy'] = self.proxy
237 | connection_pool_kw['_proxy_headers'] = self.proxy_headers
238 |
239 | super(ProxyManager, self).__init__(
240 | num_pools, headers, **connection_pool_kw)
241 |
242 | def connection_from_host(self, host, port=None, scheme='http'):
243 | if scheme == "https":
244 | return super(ProxyManager, self).connection_from_host(
245 | host, port, scheme)
246 |
247 | return super(ProxyManager, self).connection_from_host(
248 | self.proxy.host, self.proxy.port, self.proxy.scheme)
249 |
250 | def _set_proxy_headers(self, url, headers=None):
251 | """
252 | Sets headers needed by proxies: specifically, the Accept and Host
253 | headers. Only sets headers not provided by the user.
254 | """
255 | headers_ = {'Accept': '*/*'}
256 |
257 | netloc = parse_url(url).netloc
258 | if netloc:
259 | headers_['Host'] = netloc
260 |
261 | if headers:
262 | headers_.update(headers)
263 | return headers_
264 |
265 | def urlopen(self, method, url, redirect=True, **kw):
266 | "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
267 | u = parse_url(url)
268 |
269 | if u.scheme == "http":
270 | # For proxied HTTPS requests, httplib sets the necessary headers
271 | # on the CONNECT to the proxy. For HTTP, we'll definitely
272 | # need to set 'Host' at the very least.
273 | headers = kw.get('headers', self.headers)
274 | kw['headers'] = self._set_proxy_headers(url, headers)
275 |
276 | return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
277 |
278 |
279 | def proxy_from_url(url, **kw):
280 | return ProxyManager(proxy_url=url, **kw)
281 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/request.py:
--------------------------------------------------------------------------------
1 | try:
2 | from urllib.parse import urlencode
3 | except ImportError:
4 | from urllib import urlencode
5 |
6 | from .filepost import encode_multipart_formdata
7 |
8 |
9 | __all__ = ['RequestMethods']
10 |
11 |
12 | class RequestMethods(object):
13 | """
14 | Convenience mixin for classes who implement a :meth:`urlopen` method, such
15 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
16 | :class:`~urllib3.poolmanager.PoolManager`.
17 |
18 | Provides behavior for making common types of HTTP request methods and
19 | decides which type of request field encoding to use.
20 |
21 | Specifically,
22 |
23 | :meth:`.request_encode_url` is for sending requests whose fields are
24 | encoded in the URL (such as GET, HEAD, DELETE).
25 |
26 | :meth:`.request_encode_body` is for sending requests whose fields are
27 | encoded in the *body* of the request using multipart or www-form-urlencoded
28 | (such as for POST, PUT, PATCH).
29 |
30 | :meth:`.request` is for making any kind of request, it will look up the
31 | appropriate encoding format and use one of the above two methods to make
32 | the request.
33 |
34 | Initializer parameters:
35 |
36 | :param headers:
37 | Headers to include with all requests, unless other headers are given
38 | explicitly.
39 | """
40 |
41 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
42 |
43 | def __init__(self, headers=None):
44 | self.headers = headers or {}
45 |
46 | def urlopen(self, method, url, body=None, headers=None,
47 | encode_multipart=True, multipart_boundary=None,
48 | **kw): # Abstract
49 | raise NotImplemented("Classes extending RequestMethods must implement "
50 | "their own ``urlopen`` method.")
51 |
52 | def request(self, method, url, fields=None, headers=None, **urlopen_kw):
53 | """
54 | Make a request using :meth:`urlopen` with the appropriate encoding of
55 | ``fields`` based on the ``method`` used.
56 |
57 | This is a convenience method that requires the least amount of manual
58 | effort. It can be used in most situations, while still having the
59 | option to drop down to more specific methods when necessary, such as
60 | :meth:`request_encode_url`, :meth:`request_encode_body`,
61 | or even the lowest level :meth:`urlopen`.
62 | """
63 | method = method.upper()
64 |
65 | if method in self._encode_url_methods:
66 | return self.request_encode_url(method, url, fields=fields,
67 | headers=headers,
68 | **urlopen_kw)
69 | else:
70 | return self.request_encode_body(method, url, fields=fields,
71 | headers=headers,
72 | **urlopen_kw)
73 |
74 | def request_encode_url(self, method, url, fields=None, **urlopen_kw):
75 | """
76 | Make a request using :meth:`urlopen` with the ``fields`` encoded in
77 | the url. This is useful for request methods like GET, HEAD, DELETE, etc.
78 | """
79 | if fields:
80 | url += '?' + urlencode(fields)
81 | return self.urlopen(method, url, **urlopen_kw)
82 |
83 | def request_encode_body(self, method, url, fields=None, headers=None,
84 | encode_multipart=True, multipart_boundary=None,
85 | **urlopen_kw):
86 | """
87 | Make a request using :meth:`urlopen` with the ``fields`` encoded in
88 | the body. This is useful for request methods like POST, PUT, PATCH, etc.
89 |
90 | When ``encode_multipart=True`` (default), then
91 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
92 | the payload with the appropriate content type. Otherwise
93 | :meth:`urllib.urlencode` is used with the
94 | 'application/x-www-form-urlencoded' content type.
95 |
96 | Multipart encoding must be used when posting files, and it's reasonably
97 | safe to use it in other times too. However, it may break request
98 | signing, such as with OAuth.
99 |
100 | Supports an optional ``fields`` parameter of key/value strings AND
101 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
102 | the MIME type is optional. For example::
103 |
104 | fields = {
105 | 'foo': 'bar',
106 | 'fakefile': ('foofile.txt', 'contents of foofile'),
107 | 'realfile': ('barfile.txt', open('realfile').read()),
108 | 'typedfile': ('bazfile.bin', open('bazfile').read(),
109 | 'image/jpeg'),
110 | 'nonamefile': 'contents of nonamefile field',
111 | }
112 |
113 | When uploading a file, providing a filename (the first parameter of the
114 | tuple) is optional but recommended to best mimick behavior of browsers.
115 |
116 | Note that if ``headers`` are supplied, the 'Content-Type' header will
117 | be overwritten because it depends on the dynamic random boundary string
118 | which is used to compose the body of the request. The random boundary
119 | string can be explicitly set with the ``multipart_boundary`` parameter.
120 | """
121 | if headers is None:
122 | headers = self.headers
123 |
124 | extra_kw = {'headers': {}}
125 |
126 | if fields:
127 | if 'body' in urlopen_kw:
128 | raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.')
129 |
130 | if encode_multipart:
131 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary)
132 | else:
133 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded'
134 |
135 | extra_kw['body'] = body
136 | extra_kw['headers'] = {'Content-Type': content_type}
137 |
138 | extra_kw['headers'].update(headers)
139 | extra_kw.update(urlopen_kw)
140 |
141 | return self.urlopen(method, url, **extra_kw)
142 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
1 | # For backwards compatibility, provide imports that used to be here.
2 | from .connection import is_connection_dropped
3 | from .request import make_headers
4 | from .response import is_fp_closed
5 | from .ssl_ import (
6 | SSLContext,
7 | HAS_SNI,
8 | assert_fingerprint,
9 | resolve_cert_reqs,
10 | resolve_ssl_version,
11 | ssl_wrap_socket,
12 | )
13 | from .timeout import (
14 | current_time,
15 | Timeout,
16 | )
17 |
18 | from .retry import Retry
19 | from .url import (
20 | get_host,
21 | parse_url,
22 | split_first,
23 | Url,
24 | )
25 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/connection.py:
--------------------------------------------------------------------------------
1 | import socket
2 | try:
3 | from select import poll, POLLIN
4 | except ImportError: # `poll` doesn't exist on OSX and other platforms
5 | poll = False
6 | try:
7 | from select import select
8 | except ImportError: # `select` doesn't exist on AppEngine.
9 | select = False
10 |
11 |
12 | def is_connection_dropped(conn): # Platform-specific
13 | """
14 | Returns True if the connection is dropped and should be closed.
15 |
16 | :param conn:
17 | :class:`httplib.HTTPConnection` object.
18 |
19 | Note: For platforms like AppEngine, this will always return ``False`` to
20 | let the platform handle connection recycling transparently for us.
21 | """
22 | sock = getattr(conn, 'sock', False)
23 | if sock is False: # Platform-specific: AppEngine
24 | return False
25 | if sock is None: # Connection already closed (such as by httplib).
26 | return True
27 |
28 | if not poll:
29 | if not select: # Platform-specific: AppEngine
30 | return False
31 |
32 | try:
33 | return select([sock], [], [], 0.0)[0]
34 | except socket.error:
35 | return True
36 |
37 | # This version is better on platforms that support it.
38 | p = poll()
39 | p.register(sock, POLLIN)
40 | for (fno, ev) in p.poll(0.0):
41 | if fno == sock.fileno():
42 | # Either data is buffered (bad), or the connection is dropped.
43 | return True
44 |
45 |
46 | # This function is copied from socket.py in the Python 2.7 standard
47 | # library test suite. Added to its signature is only `socket_options`.
48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
49 | source_address=None, socket_options=None):
50 | """Connect to *address* and return the socket object.
51 |
52 | Convenience function. Connect to *address* (a 2-tuple ``(host,
53 | port)``) and return the socket object. Passing the optional
54 | *timeout* parameter will set the timeout on the socket instance
55 | before attempting to connect. If no *timeout* is supplied, the
56 | global default timeout setting returned by :func:`getdefaulttimeout`
57 | is used. If *source_address* is set it must be a tuple of (host, port)
58 | for the socket to bind as a source address before making the connection.
59 | An host of '' or port 0 tells the OS to use the default.
60 | """
61 |
62 | host, port = address
63 | err = None
64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
65 | af, socktype, proto, canonname, sa = res
66 | sock = None
67 | try:
68 | sock = socket.socket(af, socktype, proto)
69 |
70 | # If provided, set socket level options before connecting.
71 | # This is the only addition urllib3 makes to this function.
72 | _set_socket_options(sock, socket_options)
73 |
74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
75 | sock.settimeout(timeout)
76 | if source_address:
77 | sock.bind(source_address)
78 | sock.connect(sa)
79 | return sock
80 |
81 | except socket.error as _:
82 | err = _
83 | if sock is not None:
84 | sock.close()
85 | sock = None
86 |
87 | if err is not None:
88 | raise err
89 | else:
90 | raise socket.error("getaddrinfo returns an empty list")
91 |
92 |
93 | def _set_socket_options(sock, options):
94 | if options is None:
95 | return
96 |
97 | for opt in options:
98 | sock.setsockopt(*opt)
99 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/request.py:
--------------------------------------------------------------------------------
1 | from base64 import b64encode
2 |
3 | from ..packages.six import b
4 |
5 | ACCEPT_ENCODING = 'gzip,deflate'
6 |
7 |
8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None):
10 | """
11 | Shortcuts for generating request headers.
12 |
13 | :param keep_alive:
14 | If ``True``, adds 'connection: keep-alive' header.
15 |
16 | :param accept_encoding:
17 | Can be a boolean, list, or string.
18 | ``True`` translates to 'gzip,deflate'.
19 | List will get joined by comma.
20 | String will be used as provided.
21 |
22 | :param user_agent:
23 | String representing the user-agent you want, such as
24 | "python-urllib3/0.6"
25 |
26 | :param basic_auth:
27 | Colon-separated username:password string for 'authorization: basic ...'
28 | auth header.
29 |
30 | :param proxy_basic_auth:
31 | Colon-separated username:password string for 'proxy-authorization: basic ...'
32 | auth header.
33 |
34 | :param disable_cache:
35 | If ``True``, adds 'cache-control: no-cache' header.
36 |
37 | Example::
38 |
39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
41 | >>> make_headers(accept_encoding=True)
42 | {'accept-encoding': 'gzip,deflate'}
43 | """
44 | headers = {}
45 | if accept_encoding:
46 | if isinstance(accept_encoding, str):
47 | pass
48 | elif isinstance(accept_encoding, list):
49 | accept_encoding = ','.join(accept_encoding)
50 | else:
51 | accept_encoding = ACCEPT_ENCODING
52 | headers['accept-encoding'] = accept_encoding
53 |
54 | if user_agent:
55 | headers['user-agent'] = user_agent
56 |
57 | if keep_alive:
58 | headers['connection'] = 'keep-alive'
59 |
60 | if basic_auth:
61 | headers['authorization'] = 'Basic ' + \
62 | b64encode(b(basic_auth)).decode('utf-8')
63 |
64 | if proxy_basic_auth:
65 | headers['proxy-authorization'] = 'Basic ' + \
66 | b64encode(b(proxy_basic_auth)).decode('utf-8')
67 |
68 | if disable_cache:
69 | headers['cache-control'] = 'no-cache'
70 |
71 | return headers
72 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/response.py:
--------------------------------------------------------------------------------
1 | def is_fp_closed(obj):
2 | """
3 | Checks whether a given file-like object is closed.
4 |
5 | :param obj:
6 | The file-like object to check.
7 | """
8 |
9 | try:
10 | # Check via the official file-like-object way.
11 | return obj.closed
12 | except AttributeError:
13 | pass
14 |
15 | try:
16 | # Check if the object is a container for another file-like object that
17 | # gets released on exhaustion (e.g. HTTPResponse).
18 | return obj.fp is None
19 | except AttributeError:
20 | pass
21 |
22 | raise ValueError("Unable to determine whether fp is closed.")
23 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/timeout.py:
--------------------------------------------------------------------------------
1 | # The default socket timeout, used by httplib to indicate that no timeout was
2 | # specified by the user
3 | from socket import _GLOBAL_DEFAULT_TIMEOUT
4 | import time
5 |
6 | from ..exceptions import TimeoutStateError
7 |
8 | # A sentinel value to indicate that no timeout was specified by the user in
9 | # urllib3
10 | _Default = object()
11 |
12 | def current_time():
13 | """
14 | Retrieve the current time. This function is mocked out in unit testing.
15 | """
16 | return time.time()
17 |
18 |
19 | class Timeout(object):
20 | """ Timeout configuration.
21 |
22 | Timeouts can be defined as a default for a pool::
23 |
24 | timeout = Timeout(connect=2.0, read=7.0)
25 | http = PoolManager(timeout=timeout)
26 | response = http.request('GET', 'http://example.com/')
27 |
28 | Or per-request (which overrides the default for the pool)::
29 |
30 | response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
31 |
32 | Timeouts can be disabled by setting all the parameters to ``None``::
33 |
34 | no_timeout = Timeout(connect=None, read=None)
35 | response = http.request('GET', 'http://example.com/, timeout=no_timeout)
36 |
37 |
38 | :param total:
39 | This combines the connect and read timeouts into one; the read timeout
40 | will be set to the time leftover from the connect attempt. In the
41 | event that both a connect timeout and a total are specified, or a read
42 | timeout and a total are specified, the shorter timeout will be applied.
43 |
44 | Defaults to None.
45 |
46 | :type total: integer, float, or None
47 |
48 | :param connect:
49 | The maximum amount of time to wait for a connection attempt to a server
50 | to succeed. Omitting the parameter will default the connect timeout to
51 | the system default, probably `the global default timeout in socket.py
52 | `_.
53 | None will set an infinite timeout for connection attempts.
54 |
55 | :type connect: integer, float, or None
56 |
57 | :param read:
58 | The maximum amount of time to wait between consecutive
59 | read operations for a response from the server. Omitting
60 | the parameter will default the read timeout to the system
61 | default, probably `the global default timeout in socket.py
62 | `_.
63 | None will set an infinite timeout.
64 |
65 | :type read: integer, float, or None
66 |
67 | .. note::
68 |
69 | Many factors can affect the total amount of time for urllib3 to return
70 | an HTTP response.
71 |
72 | For example, Python's DNS resolver does not obey the timeout specified
73 | on the socket. Other factors that can affect total request time include
74 | high CPU load, high swap, the program running at a low priority level,
75 | or other behaviors.
76 |
77 | In addition, the read and total timeouts only measure the time between
78 | read operations on the socket connecting the client and the server,
79 | not the total amount of time for the request to return a complete
80 | response. For most requests, the timeout is raised because the server
81 | has not sent the first byte in the specified time. This is not always
82 | the case; if a server streams one byte every fifteen seconds, a timeout
83 | of 20 seconds will not trigger, even though the request will take
84 | several minutes to complete.
85 |
86 | If your goal is to cut off any request after a set amount of wall clock
87 | time, consider having a second "watcher" thread to cut off a slow
88 | request.
89 | """
90 |
91 | #: A sentinel object representing the default timeout value
92 | DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
93 |
94 | def __init__(self, total=None, connect=_Default, read=_Default):
95 | self._connect = self._validate_timeout(connect, 'connect')
96 | self._read = self._validate_timeout(read, 'read')
97 | self.total = self._validate_timeout(total, 'total')
98 | self._start_connect = None
99 |
100 | def __str__(self):
101 | return '%s(connect=%r, read=%r, total=%r)' % (
102 | type(self).__name__, self._connect, self._read, self.total)
103 |
104 | @classmethod
105 | def _validate_timeout(cls, value, name):
106 | """ Check that a timeout attribute is valid.
107 |
108 | :param value: The timeout value to validate
109 | :param name: The name of the timeout attribute to validate. This is
110 | used to specify in error messages.
111 | :return: The validated and casted version of the given value.
112 | :raises ValueError: If the type is not an integer or a float, or if it
113 | is a numeric value less than zero.
114 | """
115 | if value is _Default:
116 | return cls.DEFAULT_TIMEOUT
117 |
118 | if value is None or value is cls.DEFAULT_TIMEOUT:
119 | return value
120 |
121 | try:
122 | float(value)
123 | except (TypeError, ValueError):
124 | raise ValueError("Timeout value %s was %s, but it must be an "
125 | "int or float." % (name, value))
126 |
127 | try:
128 | if value < 0:
129 | raise ValueError("Attempted to set %s timeout to %s, but the "
130 | "timeout cannot be set to a value less "
131 | "than 0." % (name, value))
132 | except TypeError: # Python 3
133 | raise ValueError("Timeout value %s was %s, but it must be an "
134 | "int or float." % (name, value))
135 |
136 | return value
137 |
138 | @classmethod
139 | def from_float(cls, timeout):
140 | """ Create a new Timeout from a legacy timeout value.
141 |
142 | The timeout value used by httplib.py sets the same timeout on the
143 | connect(), and recv() socket requests. This creates a :class:`Timeout`
144 | object that sets the individual timeouts to the ``timeout`` value
145 | passed to this function.
146 |
147 | :param timeout: The legacy timeout value.
148 | :type timeout: integer, float, sentinel default object, or None
149 | :return: Timeout object
150 | :rtype: :class:`Timeout`
151 | """
152 | return Timeout(read=timeout, connect=timeout)
153 |
154 | def clone(self):
155 | """ Create a copy of the timeout object
156 |
157 | Timeout properties are stored per-pool but each request needs a fresh
158 | Timeout object to ensure each one has its own start/stop configured.
159 |
160 | :return: a copy of the timeout object
161 | :rtype: :class:`Timeout`
162 | """
163 | # We can't use copy.deepcopy because that will also create a new object
164 | # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
165 | # detect the user default.
166 | return Timeout(connect=self._connect, read=self._read,
167 | total=self.total)
168 |
169 | def start_connect(self):
170 | """ Start the timeout clock, used during a connect() attempt
171 |
172 | :raises urllib3.exceptions.TimeoutStateError: if you attempt
173 | to start a timer that has been started already.
174 | """
175 | if self._start_connect is not None:
176 | raise TimeoutStateError("Timeout timer has already been started.")
177 | self._start_connect = current_time()
178 | return self._start_connect
179 |
180 | def get_connect_duration(self):
181 | """ Gets the time elapsed since the call to :meth:`start_connect`.
182 |
183 | :return: Elapsed time.
184 | :rtype: float
185 | :raises urllib3.exceptions.TimeoutStateError: if you attempt
186 | to get duration for a timer that hasn't been started.
187 | """
188 | if self._start_connect is None:
189 | raise TimeoutStateError("Can't get connect duration for timer "
190 | "that has not started.")
191 | return current_time() - self._start_connect
192 |
193 | @property
194 | def connect_timeout(self):
195 | """ Get the value to use when setting a connection timeout.
196 |
197 | This will be a positive float or integer, the value None
198 | (never timeout), or the default system timeout.
199 |
200 | :return: Connect timeout.
201 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
202 | """
203 | if self.total is None:
204 | return self._connect
205 |
206 | if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
207 | return self.total
208 |
209 | return min(self._connect, self.total)
210 |
211 | @property
212 | def read_timeout(self):
213 | """ Get the value for the read timeout.
214 |
215 | This assumes some time has elapsed in the connection timeout and
216 | computes the read timeout appropriately.
217 |
218 | If self.total is set, the read timeout is dependent on the amount of
219 | time taken by the connect timeout. If the connection time has not been
220 | established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
221 | raised.
222 |
223 | :return: Value to use for the read timeout.
224 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
225 | :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
226 | has not yet been called on this object.
227 | """
228 | if (self.total is not None and
229 | self.total is not self.DEFAULT_TIMEOUT and
230 | self._read is not None and
231 | self._read is not self.DEFAULT_TIMEOUT):
232 | # In case the connect timeout has not yet been established.
233 | if self._start_connect is None:
234 | return self._read
235 | return max(0, min(self.total - self.get_connect_duration(),
236 | self._read))
237 | elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
238 | return max(0, self.total - self.get_connect_duration())
239 | else:
240 | return self._read
241 |
--------------------------------------------------------------------------------
/myrequests/packages/urllib3/util/url.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | from ..exceptions import LocationParseError
4 |
5 |
6 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
7 |
8 |
9 | class Url(namedtuple('Url', url_attrs)):
10 | """
11 | Datastructure for representing an HTTP URL. Used as a return value for
12 | :func:`parse_url`.
13 | """
14 | slots = ()
15 |
16 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
17 | query=None, fragment=None):
18 | if path and not path.startswith('/'):
19 | path = '/' + path
20 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
21 | query, fragment)
22 |
23 | @property
24 | def hostname(self):
25 | """For backwards-compatibility with urlparse. We're nice like that."""
26 | return self.host
27 |
28 | @property
29 | def request_uri(self):
30 | """Absolute path including the query string."""
31 | uri = self.path or '/'
32 |
33 | if self.query is not None:
34 | uri += '?' + self.query
35 |
36 | return uri
37 |
38 | @property
39 | def netloc(self):
40 | """Network location including host and port"""
41 | if self.port:
42 | return '%s:%d' % (self.host, self.port)
43 | return self.host
44 |
45 | @property
46 | def url(self):
47 | """
48 | Convert self into a url
49 |
50 | This function should more or less round-trip with :func:`.parse_url`. The
51 | returned url may not be exactly the same as the url inputted to
52 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
53 | with a blank port will have : removed).
54 |
55 | Example: ::
56 |
57 | >>> U = parse_url('http://google.com/mail/')
58 | >>> U.url
59 | 'http://google.com/mail/'
60 | >>> Url('http', 'username:password', 'host.com', 80,
61 | ... '/path', 'query', 'fragment').url
62 | 'http://username:password@host.com:80/path?query#fragment'
63 | """
64 | scheme, auth, host, port, path, query, fragment = self
65 | url = ''
66 |
67 | # We use "is not None" we want things to happen with empty strings (or 0 port)
68 | if scheme is not None:
69 | url += scheme + '://'
70 | if auth is not None:
71 | url += auth + '@'
72 | if host is not None:
73 | url += host
74 | if port is not None:
75 | url += ':' + str(port)
76 | if path is not None:
77 | url += path
78 | if query is not None:
79 | url += '?' + query
80 | if fragment is not None:
81 | url += '#' + fragment
82 |
83 | return url
84 |
85 | def __str__(self):
86 | return self.url
87 |
88 | def split_first(s, delims):
89 | """
90 | Given a string and an iterable of delimiters, split on the first found
91 | delimiter. Return two split parts and the matched delimiter.
92 |
93 | If not found, then the first part is the full input string.
94 |
95 | Example::
96 |
97 | >>> split_first('foo/bar?baz', '?/=')
98 | ('foo', 'bar?baz', '/')
99 | >>> split_first('foo/bar?baz', '123')
100 | ('foo/bar?baz', '', None)
101 |
102 | Scales linearly with number of delims. Not ideal for large number of delims.
103 | """
104 | min_idx = None
105 | min_delim = None
106 | for d in delims:
107 | idx = s.find(d)
108 | if idx < 0:
109 | continue
110 |
111 | if min_idx is None or idx < min_idx:
112 | min_idx = idx
113 | min_delim = d
114 |
115 | if min_idx is None or min_idx < 0:
116 | return s, '', None
117 |
118 | return s[:min_idx], s[min_idx+1:], min_delim
119 |
120 |
121 | def parse_url(url):
122 | """
123 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
124 | performed to parse incomplete urls. Fields not provided will be None.
125 |
126 | Partly backwards-compatible with :mod:`urlparse`.
127 |
128 | Example::
129 |
130 | >>> parse_url('http://google.com/mail/')
131 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
132 | >>> parse_url('google.com:80')
133 | Url(scheme=None, host='google.com', port=80, path=None, ...)
134 | >>> parse_url('/foo?bar')
135 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
136 | """
137 |
138 | # While this code has overlap with stdlib's urlparse, it is much
139 | # simplified for our needs and less annoying.
140 | # Additionally, this implementations does silly things to be optimal
141 | # on CPython.
142 |
143 | if not url:
144 | # Empty
145 | return Url()
146 |
147 | scheme = None
148 | auth = None
149 | host = None
150 | port = None
151 | path = None
152 | fragment = None
153 | query = None
154 |
155 | # Scheme
156 | if '://' in url:
157 | scheme, url = url.split('://', 1)
158 |
159 | # Find the earliest Authority Terminator
160 | # (http://tools.ietf.org/html/rfc3986#section-3.2)
161 | url, path_, delim = split_first(url, ['/', '?', '#'])
162 |
163 | if delim:
164 | # Reassemble the path
165 | path = delim + path_
166 |
167 | # Auth
168 | if '@' in url:
169 | # Last '@' denotes end of auth part
170 | auth, url = url.rsplit('@', 1)
171 |
172 | # IPv6
173 | if url and url[0] == '[':
174 | host, url = url.split(']', 1)
175 | host += ']'
176 |
177 | # Port
178 | if ':' in url:
179 | _host, port = url.split(':', 1)
180 |
181 | if not host:
182 | host = _host
183 |
184 | if port:
185 | # If given, ports must be integers.
186 | if not port.isdigit():
187 | raise LocationParseError(url)
188 | port = int(port)
189 | else:
190 | # Blank ports are cool, too. (rfc3986#section-3.2.3)
191 | port = None
192 |
193 | elif not host and url:
194 | host = url
195 |
196 | if not path:
197 | return Url(scheme, auth, host, port, path, query, fragment)
198 |
199 | # Fragment
200 | if '#' in path:
201 | path, fragment = path.split('#', 1)
202 |
203 | # Query
204 | if '?' in path:
205 | path, query = path.split('?', 1)
206 |
207 | return Url(scheme, auth, host, port, path, query, fragment)
208 |
209 | def get_host(url):
210 | """
211 | Deprecated. Use :func:`.parse_url` instead.
212 | """
213 | p = parse_url(url)
214 | return p.scheme or 'http', p.hostname, p.port
215 |
--------------------------------------------------------------------------------
/myrequests/status_codes.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .structures import LookupDict
4 |
5 | _codes = {
6 |
7 | # Informational.
8 | 100: ('continue',),
9 | 101: ('switching_protocols',),
10 | 102: ('processing',),
11 | 103: ('checkpoint',),
12 | 122: ('uri_too_long', 'request_uri_too_long'),
13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
14 | 201: ('created',),
15 | 202: ('accepted',),
16 | 203: ('non_authoritative_info', 'non_authoritative_information'),
17 | 204: ('no_content',),
18 | 205: ('reset_content', 'reset'),
19 | 206: ('partial_content', 'partial'),
20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
21 | 208: ('already_reported',),
22 | 226: ('im_used',),
23 |
24 | # Redirection.
25 | 300: ('multiple_choices',),
26 | 301: ('moved_permanently', 'moved', '\\o-'),
27 | 302: ('found',),
28 | 303: ('see_other', 'other'),
29 | 304: ('not_modified',),
30 | 305: ('use_proxy',),
31 | 306: ('switch_proxy',),
32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'),
33 | 308: ('permanent_redirect',
34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
35 |
36 | # Client Error.
37 | 400: ('bad_request', 'bad'),
38 | 401: ('unauthorized',),
39 | 402: ('payment_required', 'payment'),
40 | 403: ('forbidden',),
41 | 404: ('not_found', '-o-'),
42 | 405: ('method_not_allowed', 'not_allowed'),
43 | 406: ('not_acceptable',),
44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
45 | 408: ('request_timeout', 'timeout'),
46 | 409: ('conflict',),
47 | 410: ('gone',),
48 | 411: ('length_required',),
49 | 412: ('precondition_failed', 'precondition'),
50 | 413: ('request_entity_too_large',),
51 | 414: ('request_uri_too_large',),
52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
54 | 417: ('expectation_failed',),
55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
56 | 422: ('unprocessable_entity', 'unprocessable'),
57 | 423: ('locked',),
58 | 424: ('failed_dependency', 'dependency'),
59 | 425: ('unordered_collection', 'unordered'),
60 | 426: ('upgrade_required', 'upgrade'),
61 | 428: ('precondition_required', 'precondition'),
62 | 429: ('too_many_requests', 'too_many'),
63 | 431: ('header_fields_too_large', 'fields_too_large'),
64 | 444: ('no_response', 'none'),
65 | 449: ('retry_with', 'retry'),
66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'),
67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'),
68 | 499: ('client_closed_request',),
69 |
70 | # Server Error.
71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'),
72 | 501: ('not_implemented',),
73 | 502: ('bad_gateway',),
74 | 503: ('service_unavailable', 'unavailable'),
75 | 504: ('gateway_timeout',),
76 | 505: ('http_version_not_supported', 'http_version'),
77 | 506: ('variant_also_negotiates',),
78 | 507: ('insufficient_storage',),
79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'),
80 | 510: ('not_extended',),
81 | }
82 |
83 | codes = LookupDict(name='status_codes')
84 |
85 | for (code, titles) in list(_codes.items()):
86 | for title in titles:
87 | setattr(codes, title, code)
88 | if not title.startswith('\\'):
89 | setattr(codes, title.upper(), code)
90 |
--------------------------------------------------------------------------------
/myrequests/structures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.structures
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | Data structures that power Requests.
8 |
9 | """
10 |
11 | import collections
12 |
13 |
14 | class CaseInsensitiveDict(collections.MutableMapping):
15 | """
16 | A case-insensitive ``dict``-like object.
17 |
18 | Implements all methods and operations of
19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also
20 | provides ``lower_items``.
21 |
22 | All keys are expected to be strings. The structure remembers the
23 | case of the last key to be set, and ``iter(instance)``,
24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
25 | will contain case-sensitive keys. However, querying and contains
26 | testing is case insensitive::
27 |
28 | cid = CaseInsensitiveDict()
29 | cid['Accept'] = 'application/json'
30 | cid['aCCEPT'] == 'application/json' # True
31 | list(cid) == ['Accept'] # True
32 |
33 | For example, ``headers['content-encoding']`` will return the
34 | value of a ``'Content-Encoding'`` response header, regardless
35 | of how the header name was originally stored.
36 |
37 | If the constructor, ``.update``, or equality comparison
38 | operations are given keys that have equal ``.lower()``s, the
39 | behavior is undefined.
40 |
41 | """
42 | def __init__(self, data=None, **kwargs):
43 | self._store = dict()
44 | if data is None:
45 | data = {}
46 | self.update(data, **kwargs)
47 |
48 | def __setitem__(self, key, value):
49 | # Use the lowercased key for lookups, but store the actual
50 | # key alongside the value.
51 | self._store[key.lower()] = (key, value)
52 |
53 | def __getitem__(self, key):
54 | return self._store[key.lower()][1]
55 |
56 | def __delitem__(self, key):
57 | del self._store[key.lower()]
58 |
59 | def __iter__(self):
60 | return (casedkey for casedkey, mappedvalue in self._store.values())
61 |
62 | def __len__(self):
63 | return len(self._store)
64 |
65 | def lower_items(self):
66 | """Like iteritems(), but with all lowercase keys."""
67 | return (
68 | (lowerkey, keyval[1])
69 | for (lowerkey, keyval)
70 | in self._store.items()
71 | )
72 |
73 | def __eq__(self, other):
74 | if isinstance(other, collections.Mapping):
75 | other = CaseInsensitiveDict(other)
76 | else:
77 | return NotImplemented
78 | # Compare insensitively
79 | return dict(self.lower_items()) == dict(other.lower_items())
80 |
81 | # Copy is required
82 | def copy(self):
83 | return CaseInsensitiveDict(self._store.values())
84 |
85 | def __repr__(self):
86 | return str(dict(self.items()))
87 |
88 | class LookupDict(dict):
89 | """Dictionary lookup object."""
90 |
91 | def __init__(self, name=None):
92 | self.name = name
93 | super(LookupDict, self).__init__()
94 |
95 | def __repr__(self):
96 | return '' % (self.name)
97 |
98 | def __getitem__(self, key):
99 | # We allow fall-through here, so values default to None
100 |
101 | return self.__dict__.get(key, None)
102 |
103 | def get(self, key, default=None):
104 | return self.__dict__.get(key, default)
105 |
--------------------------------------------------------------------------------
/providers.txt:
--------------------------------------------------------------------------------
1 | http://rx14.co.uk/ipfs/
2 | https://ipfs.io/ipfs/
3 | https://xmine128.tk/ipfs/
4 | https://upload.global/ipfs/
5 | https://ipfs.jes.xxx/ipfs/
6 | https://siderus.io/ipfs/
7 |
--------------------------------------------------------------------------------
/pyhp_server.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import http.server
4 | import urllib.parse as urlparse
5 | from io import StringIO
6 | from socketserver import ThreadingMixIn
7 |
8 | PATH = "web"
9 | write_header = """__out=''
10 | def write(text):
11 | global __out
12 | __out += text
13 |
14 | """
15 | context = {'__out': ''}
16 |
17 |
18 | mypath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "lib")
19 | sys.path.append(mypath)
20 |
21 |
22 | class ThreadingHTTPServer(ThreadingMixIn, http.server.HTTPServer):
23 | pass
24 |
25 |
26 | class Handler(http.server.BaseHTTPRequestHandler):
27 |
28 | def handle_error(self, code, message):
29 | self.send_response(code)
30 | self.wfile.write(bytes("""
31 |
32 |
33 | %d : %s
34 |
35 |
36 | %d : %s
37 |
38 |
39 | """ % (code, message, code, message), "UTF-8"))
40 |
41 | def do_GET(self):
42 | global PATH, context
43 | self.do_POST()
44 |
45 | def do_POST(self):
46 | global PATH, context
47 | mypath = self.path.split('?', 1)
48 | if mypath[0] == "/":
49 | mypath[0] = "/index.pyhp"
50 | filename = PATH + mypath[0]
51 | print(filename)
52 | data = ""
53 | args = {}
54 | if 'Content-Length' in self.headers.keys():
55 | length = int(self.headers['Content-Length'])
56 | args = urlparse.parse_qs(self.rfile.read(length).decode('utf-8'))
57 | elif len(mypath) > 1:
58 | args = urlparse.parse_qs(mypath[1])
59 | try:
60 | with open(filename, "r") as fp:
61 | data = fp.read()
62 | except Exception:
63 | return self.handle_error(404, "file %s not found" % filename)
64 | self.send_response(200)
65 | #self.send_header("Content-type", "text/html")
66 | self.end_headers()
67 | context['args'] = args
68 | self.wfile.write(bytes(parse_file(data, context),"UTF-8"))
69 |
70 |
71 | def run_while_true(port=8080, server_class=ThreadingHTTPServer,
72 | handler_class=Handler):
73 | """
74 | This assumes that keep_running() is a function of no arguments which
75 | is tested initially and after each request. If its return value
76 | is true, the server continues.
77 | """
78 | server_address = ('', port)
79 | httpd = server_class(server_address, handler_class)
80 | while True:
81 | httpd.handle_request()
82 |
83 |
84 | def parse_file(text, context):
85 | i = 0
86 | mode = "html"
87 | open_index = -1
88 | while(i < len(text)):
89 | if mode == "html":
90 | if text[i] == "<":
91 | if text[i + 1] == "?":
92 | i = i + 1
93 | mode = "pyhp"
94 | open_index = i + 1
95 | if mode == "pyhp":
96 | if text[i] == "?":
97 | if text[i + 1] == ">":
98 | # print text[open_index:i]
99 | ret = compile(write_header + text[open_index:i], "", "exec")
100 | context['out'] = ""
101 | try:
102 | exec(ret, context, {})
103 | except Exception as E:
104 | return str(E)
105 | text = text[:open_index - 2] + context['__out'] + text[i + 2:]
106 | return parse_file(text, context)
107 | i = i + 1
108 | return text
109 |
110 |
111 | if __name__ == "__main__":
112 | if sys.argv[1:]:
113 | port = int(sys.argv[1])
114 | else:
115 | port = 8000
116 | run_while_true(port)
117 |
--------------------------------------------------------------------------------
/pymultihash/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .pyMultiHash import *
3 |
--------------------------------------------------------------------------------
/pymultihash/base58.py:
--------------------------------------------------------------------------------
1 | """ base58 encoding / decoding functions """
2 | """Shamelessly stolen from https://gist.github.com/ianoxley/865912"""
3 |
4 | alphabet = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
5 | base_count = len(alphabet)
6 |
7 | def encode(num):
8 | """ Returns num in a base58-encoded string """
9 | encode = ''
10 |
11 | if (num < 0):
12 | return ''
13 |
14 | while (num >= base_count):
15 | mod = num % base_count
16 | encode = alphabet[mod] + encode
17 | num = num // base_count
18 |
19 | if (num):
20 | encode = alphabet[num] + encode
21 |
22 | return encode
23 |
24 | def decode(s):
25 | """ Decodes the base58-encoded string s into an integer """
26 | decoded = 0
27 | multi = 1
28 | s = s[::-1]
29 | for char in s:
30 | decoded += multi * alphabet.index(char)
31 | multi = multi * base_count
32 |
33 | return decoded
34 |
--------------------------------------------------------------------------------
/pymultihash/pyMultiHash.py:
--------------------------------------------------------------------------------
1 | """
2 | pyMultihash is a python implementation of the Multihash standard: https://github.com/jbenet/multihash
3 |
4 | """
5 |
6 | import hashlib
7 | from . import base58
8 | import binascii
9 |
10 | """
11 | These first two methods are kinda inefficient, but python is not really designed to mess with bytes
12 | """
13 | def int_to_byte_array(big_int):
14 | array = []
15 | while big_int > 1:
16 | array.append(big_int%256)
17 | big_int = big_int // 256
18 | return array[::-1]
19 |
20 | def bytes_to_long(bytestr):
21 | assert(len(bytestr)>0)
22 | thing = bytes(bytestr)
23 | return int( binascii.hexlify(thing), 16)
24 |
25 |
26 | """
27 | the main event!
28 | """
29 | def parseHash(hashstr):
30 | hashint = base58.decode(hashstr)
31 | hashbytes = int_to_byte_array(hashint)
32 | if len(hashbytes) < 3:
33 | raise Exception("Multihash must be at least 3 bytes")
34 |
35 | hash_func_id = hashbytes[0]
36 | hash_length = int(hashbytes[1])
37 | hash_contents = hashbytes[2:hash_length+2]
38 |
39 | return bytes_to_long(hash_contents)
40 |
41 | def genHash(bytestr,func_id):
42 | hashfunc = None
43 | if func_id == 0x11:
44 | #function is sha1
45 | hashfunc = hashlib.sha1()
46 | elif func_id == 0x12:
47 | #function is sha256
48 | hashfunc = hashlib.sha256()
49 | elif func_id == 0x13:
50 | #function is sha512
51 | hashfunc = hashlib.sha512()
52 | else:
53 | raise Exception("Requested hash is not supported")
54 | bytestr = bytes(bytestr,"UTF-8")
55 | hashfunc.update(bytestr)
56 | data = hashfunc.digest()
57 | size = hashfunc.digest_size
58 | bytestr = b''+func_id.to_bytes(1,"big")+size.to_bytes(1,"big")+data
59 | return base58.encode(bytes_to_long(bytestr))
60 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | CacheWarmer
2 | =======================
3 |
4 | CacheWarmer is a fast and hacky solution to allow people to donate ipfs gateways to cache other people's content.
5 |
6 | It practically just wgets the requested hash on a list of addresses (and aborts to avoid getting your file gumming up my ram)
7 |
8 | If you want to donate your public ipfs gateway, make a PR adding it to providers.txt
9 |
10 | If you want to run your own instance of cachewarmer (you will have to manually pull to get updates), it should only require python3.
11 |
12 | After cloning the repo, in a screen session run:
13 |
14 | ```
15 | python3 pyhp_server.py 8001
16 |
17 | ```
18 |
--------------------------------------------------------------------------------
/web/cacheit.pyhp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/web/css/main.css:
--------------------------------------------------------------------------------
1 | .content
2 | {
3 | min-width: 500px;
4 | }
5 |
6 | .window_wrapper
7 | {
8 | width: 800px;
9 | height: 420px;
10 | background-color: rgba(255,255,255,0.92);
11 | border: 2px solid #457BF7;
12 | border-radius: 5px;
13 | position: absolute;
14 | left: 0px;
15 | right: 0px;
16 | top: 0px;
17 | bottom: 0px;
18 | margin: auto;
19 | overflow: hidden;
20 | }
21 |
--------------------------------------------------------------------------------
/web/css/main.css~:
--------------------------------------------------------------------------------
1 | .content
2 | {
3 | min-width: 500px;
4 | }
5 |
6 | .window_wrapper
7 | {
8 | width: 800px;
9 | height: 420px;
10 | background-color: rgba(255,255,255,0.92);
11 | border: 2px solid #457BF7;
12 | border-radius: 5px;
13 | position: absolute;
14 | left: 0px;
15 | right: 0px;
16 | top: 0px;
17 | bottom: 0px;
18 | margin: auto;
19 | overflow: hidden;
20 | }
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/web/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Pin Box
4 |
5 | This site offers a simple way to donate ipfs caching to otherusers.
6 | Feel free to add your own public gateway by creating an issue or pull request at the
7 | Github Repo
8 |
9 |
10 |
11 |
16 |
17 | It is important to note, that this essentially just sends a request to each of the donated gateways.
18 | The owners of the gateways have donated thier resources, and may not be reliable.
19 | In fact I'm not going to promise that any of this works.
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/web/index.pyhp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
8 |
9 |
10 |
11 |
12 |
13 | CacheWarmer
14 |
15 | This site offers a simple way to donate ipfs caching to otherusers.
16 | Feel free to add your own public gateway by creating an issue or pull request at the
17 | Github Repo
18 |
19 |
20 |
21 |
22 | Current Providers are:
23 |
24 |
25 | with open("providers.txt") as fp:
26 | for l in fp:
27 | write(" "+l+"")
28 |
29 | ?>
30 |
31 |
32 |
33 |
34 |
42 |
43 | It is important to note, that this essentially just sends a request to each of the donated gateways.
44 | The owners of the gateways have donated thier resources, and may not be reliable.
45 | In fact I'm not going to promise that any of this works.
46 |
47 |
48 |
49 | Experimental Search:
50 |
56 | **Note that this is kinda experimental, uses bloom filters, and has false positives
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/web/js/live_bg.js:
--------------------------------------------------------------------------------
1 | $(document).ready(function() {
2 | init();
3 | //$('.bg_canvas').css('display', 'none');
4 | });
5 |
6 | var rad = 250; //minimal radius
7 | var trad = 550; //maximal radius
8 | var pts = []; //points array
9 | var c; //canvas
10 | var $$; //context
11 |
12 | var cww = window.innerWidth;
13 | var cwh = window.innerHeight;
14 |
15 | var offsetX = cww / 2; //x offset
16 | var offsetY = cwh / 2; //y offset
17 | var max = 65; //max object parts
18 |
19 | function init() {
20 | c = document.getElementById('bg_canvas');
21 | //c.height = window.innerHeight;
22 |
23 | $$ = c.getContext('2d');
24 | var angle = 0;
25 | var speed = 0;
26 | var dist = 0;
27 | for (var i = 0; i < 120; ++i) {
28 | angle = Math.random() * Math.PI * 2;
29 | speed = Math.random() * 2;
30 | dist = Math.random() * rad;
31 | pts.push({
32 | x:Math.sin(angle) * dist,
33 | y:Math.cos(angle) * dist,
34 | incx:Math.sin(angle) * speed,
35 | incy:Math.cos(angle) * speed,
36 | speed:speed
37 | });
38 | }
39 | draw();
40 | }
41 |
42 | function In() {
43 | trad = 160;
44 | }
45 |
46 | function Out() {
47 | trad = 300;
48 | }
49 |
50 | function draw() {
51 | rad += (trad - rad) * .01;
52 |
53 | var i = 0;
54 | var j = 0;
55 | var l = pts.length;
56 | var part = null;
57 | var part2 = null;
58 | var dx = 0;
59 | var dy = 0;
60 | //canvasWidth = window.innerWidth;
61 | //canvasHeight = window.innerHeight;
62 |
63 | var canvasWidth = 1920;
64 | var canvasHeight = 1080;
65 |
66 | $$.fillStyle = "rgba(255, 255, 255, 1)";
67 | $$.fillRect(0, 0, canvasHeight * 2, canvasWidth * 3);
68 | $$.strokeStyle = 'rgba(15, 91, 214, 1)';
69 |
70 | for (i = 0; i < l; ++i) {
71 | part = pts[i];
72 | for (j = i + 1; j < l; ++j) {
73 | part2 = pts[j];
74 | dx = part.x - part2.x;
75 | dy = part.y - part2.y;
76 | var dif = Math.sqrt(dx * dx + dy * dy);
77 | if (dif < max) {
78 | $$.lineWidth = (max - dif) * 0.05;
79 | $$.beginPath();
80 | $$.moveTo(offsetX + part.x * 2, offsetY + part.y * 2);
81 | $$.lineTo(offsetX + part2.x * 2, offsetY + part2.y * 2);
82 | $$.stroke();
83 | }
84 | }
85 |
86 | // move current obj
87 | part.x += part.incx;
88 | part.y += part.incy;
89 |
90 | var ptDist = Math.sqrt((part.x * part.x) + (part.y * part.y))
91 | if (ptDist > rad) {
92 | var mp = ( 1 / ptDist ) * 100;
93 | part.x = -part.x * mp;
94 | part.y = -part.y * mp;
95 | part.incx = (Math.random() - 0.5) * part.speed;
96 | part.incy = (Math.random() - 0.5) * part.speed;
97 | }
98 | }
99 | requestAnimFrame(draw);
100 | }
101 |
102 | window.requestAnimFrame = (function() {
103 | return window.requestAnimationFrame ||
104 | window.webkitRequestAnimationFrame ||
105 | window.mozRequestAnimationFrame ||
106 | window.oRequestAnimationFrame ||
107 | window.msRequestAnimationFrame ||
108 | function(callback, element) {
109 | window.setTimeout(callback, 2000 / 60);
110 | };
111 | })();
112 |
--------------------------------------------------------------------------------
/web/searchit.pyhp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Results:
6 |
7 |
8 |
9 | terms = args["terms"][0]
10 |
11 | import myrequests as requests
12 |
13 |
14 | import indexit
15 | import json
16 | lookup = indexit.generateBloomFilter(terms.split(" "))
17 | output = []
18 | with open("index.json","r") as fp:
19 | index = json.load(fp)
20 | for k in index.keys():
21 | if indexit.filterInFilter(int(k),lookup):
22 | output.append(index[k])
23 |
24 | if(len(output) > 0):
25 | for res in output:
26 | write("""%s """%(res,res))
27 | else:
28 | write("
Sorry, no hits ")
29 |
30 |
31 | ?>
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------