├── .gitignore
├── chardet
    ├── cli
    │   ├── __init__.py
    │   └── chardetect.py
    ├── version.py
    ├── compat.py
    ├── __init__.py
    ├── euctwprober.py
    ├── euckrprober.py
    ├── gb2312prober.py
    ├── big5prober.py
    ├── enums.py
    ├── cp949prober.py
    ├── mbcsgroupprober.py
    ├── utf8prober.py
    ├── mbcharsetprober.py
    ├── sbcsgroupprober.py
    ├── codingstatemachine.py
    ├── eucjpprober.py
    ├── sjisprober.py
    ├── charsetgroupprober.py
    ├── escprober.py
    ├── charsetprober.py
    ├── latin1prober.py
    ├── sbcharsetprober.py
    └── chardistribution.py
├── urllib3
    ├── contrib
    │   ├── __init__.py
    │   ├── _securetransport
    │   │   └── __init__.py
    │   ├── _appengine_environ.py
    │   ├── ntlmpool.py
    │   └── socks.py
    ├── packages
    │   ├── backports
    │   │   ├── __init__.py
    │   │   └── makefile.py
    │   ├── __init__.py
    │   └── ssl_match_hostname
    │   │   ├── __init__.py
    │   │   └── _implementation.py
    ├── util
    │   ├── queue.py
    │   ├── __init__.py
    │   ├── response.py
    │   ├── request.py
    │   ├── connection.py
    │   ├── wait.py
    │   └── timeout.py
    ├── filepost.py
    ├── __init__.py
    ├── request.py
    ├── exceptions.py
    └── fields.py
├── idna
    ├── package_data.py
    ├── __init__.py
    ├── compat.py
    ├── intranges.py
    └── codec.py
├── fuzzywuzzy
    ├── __init__.py
    ├── string_processing.py
    ├── StringMatcher.py
    └── utils.py
├── certifi
    ├── __init__.py
    ├── __main__.py
    └── core.py
├── lambda_function.zip
├── Levenshtein
    ├── _levenshtein.so
    ├── __init__.py
    └── StringMatcher.py
├── scripts
    ├── prep_for_commit.sh
    └── download_lambda_function.sh
└── requests
    ├── __version__.py
    ├── certs.py
    ├── packages.py
    ├── hooks.py
    ├── _internal_utils.py
    ├── compat.py
    ├── structures.py
    ├── exceptions.py
    ├── help.py
    ├── __init__.py
    ├── status_codes.py
    └── api.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | 


--------------------------------------------------------------------------------
/chardet/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/urllib3/packages/backports/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/urllib3/contrib/_securetransport/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/idna/package_data.py:
--------------------------------------------------------------------------------
1 | __version__ = '2.9'
2 | 
3 | 


--------------------------------------------------------------------------------
/fuzzywuzzy/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | __version__ = '0.16.0'
3 | 


--------------------------------------------------------------------------------
/idna/__init__.py:
--------------------------------------------------------------------------------
1 | from .package_data import __version__
2 | from .core import *
3 | 


--------------------------------------------------------------------------------
/certifi/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import contents, where
2 | 
3 | __version__ = "2020.04.05.1"
4 | 


--------------------------------------------------------------------------------
/lambda_function.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ndg63276/alexa-sky-hd/HEAD/lambda_function.zip


--------------------------------------------------------------------------------
/Levenshtein/_levenshtein.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ndg63276/alexa-sky-hd/HEAD/Levenshtein/_levenshtein.so


--------------------------------------------------------------------------------
/Levenshtein/__init__.py:
--------------------------------------------------------------------------------
1 | from Levenshtein import _levenshtein
2 | from Levenshtein._levenshtein import *
3 | 
4 | __doc__ = _levenshtein.__doc__    
5 | 


--------------------------------------------------------------------------------
/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import ssl_match_hostname
4 | 
5 | __all__ = ("ssl_match_hostname",)
6 | 


--------------------------------------------------------------------------------
/chardet/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module exists only to simplify retrieving the version number of chardet
 3 | from within setup.py and from chardet subpackages.
 4 | 
 5 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 6 | """
 7 | 
 8 | __version__ = "3.0.4"
 9 | VERSION = __version__.split('.')
10 | 


--------------------------------------------------------------------------------
/idna/compat.py:
--------------------------------------------------------------------------------
 1 | from .core import *
 2 | from .codec import *
 3 | 
 4 | def ToASCII(label):
 5 |     return encode(label)
 6 | 
 7 | def ToUnicode(label):
 8 |     return decode(label)
 9 | 
10 | def nameprep(s):
11 |     raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
12 | 
13 | 


--------------------------------------------------------------------------------
/certifi/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from certifi import contents, where
 4 | 
 5 | parser = argparse.ArgumentParser()
 6 | parser.add_argument("-c", "--contents", action="store_true")
 7 | args = parser.parse_args()
 8 | 
 9 | if args.contents:
10 |     print(contents())
11 | else:
12 |     print(where())
13 | 


--------------------------------------------------------------------------------
/scripts/prep_for_commit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | gitroot=`git rev-parse --show-toplevel`
 4 | rm $gitroot/lambda_function.zip 2>/dev/null
 5 | find . -type f -name *~ -exec rm -rf {} \;
 6 | find . -type d -name __pycache__ -exec rm -rf {} \;
 7 | find . -type f -name *.py[cod] -exec rm -rf {} \;
 8 | find . -type f -name *\$py.class -exec rm -rf {} \;
 9 | 
10 | 
11 | GLOBIGNORE="*"
12 | command="cd $gitroot; zip -r lambda_function.zip * -x '.git*' -x 'scripts*'"
13 | echo $command
14 | 


--------------------------------------------------------------------------------
/scripts/download_lambda_function.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | gitroot=`git rev-parse --show-toplevel`
 4 | 
 5 | if [[ $1 == "" ]]; then
 6 | 	echo "Specify full arn"
 7 | 	exit
 8 | else
 9 | 	arn="$1"
10 | 	region=`echo $arn | cut -d: -f4`
11 | fi
12 | 
13 | url=$(aws lambda --region $region get-function --function-name $arn | grep Location |  cut -d'"' -f4)
14 | wget -O /tmp/aws.zip "$url"
15 | unzip -o /tmp/aws.zip lambda_function.py -d $gitroot
16 | rm /tmp/aws.zip
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/requests/__version__.py:
--------------------------------------------------------------------------------
 1 | # .-. .-. .-. . . .-. .-. .-. .-.
 2 | # |(  |-  |.| | | |-  `-.  |  `-.
 3 | # ' ' `-' `-`.`-' `-' `-'  '  `-'
 4 | 
 5 | __title__ = 'requests'
 6 | __description__ = 'Python HTTP for Humans.'
 7 | __url__ = 'https://requests.readthedocs.io'
 8 | __version__ = '2.23.0'
 9 | __build__ = 0x022300
10 | __author__ = 'Kenneth Reitz'
11 | __author_email__ = 'me@kennethreitz.org'
12 | __license__ = 'Apache 2.0'
13 | __copyright__ = 'Copyright 2020 Kenneth Reitz'
14 | __cake__ = u'\u2728 \U0001f370 \u2728'
15 | 


--------------------------------------------------------------------------------
/requests/certs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | requests.certs
 6 | ~~~~~~~~~~~~~~
 7 | 
 8 | This module returns the preferred default CA certificate bundle. There is
 9 | only one — the one from the certifi package.
10 | 
11 | If you are packaging Requests, e.g., for a Linux distribution or a managed
12 | environment, you can change the definition of where() to return a separately
13 | packaged CA bundle.
14 | """
15 | from certifi import where
16 | 
17 | if __name__ == '__main__':
18 |     print(where())
19 | 


--------------------------------------------------------------------------------
/urllib3/util/queue.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | from ..packages import six
 3 | from ..packages.six.moves import queue
 4 | 
 5 | if six.PY2:
 6 |     # Queue is imported for side effects on MS Windows. See issue #229.
 7 |     import Queue as _unused_module_Queue  # noqa: F401
 8 | 
 9 | 
10 | class LifoQueue(queue.Queue):
11 |     def _init(self, _):
12 |         self.queue = collections.deque()
13 | 
14 |     def _qsize(self, len=len):
15 |         return len(self.queue)
16 | 
17 |     def _put(self, item):
18 |         self.queue.append(item)
19 | 
20 |     def _get(self):
21 |         return self.queue.pop()
22 | 


--------------------------------------------------------------------------------
/requests/packages.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | # This code exists for backwards compatibility reasons.
 4 | # I don't like it either. Just look the other way. :)
 5 | 
 6 | for package in ('urllib3', 'idna', 'chardet'):
 7 |     locals()[package] = __import__(package)
 8 |     # This traversal is apparently necessary such that the identities are
 9 |     # preserved (requests.packages.urllib3.* is urllib3.*)
10 |     for mod in list(sys.modules):
11 |         if mod == package or mod.startswith(package + '.'):
12 |             sys.modules['requests.packages.' + mod] = sys.modules[mod]
13 | 
14 | # Kinda cool, though, right?
15 | 


--------------------------------------------------------------------------------
/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | try:
 4 |     # Our match_hostname function is the same as 3.5's, so we only want to
 5 |     # import the match_hostname function if it's at least that good.
 6 |     if sys.version_info < (3, 5):
 7 |         raise ImportError("Fallback to vendored code")
 8 | 
 9 |     from ssl import CertificateError, match_hostname
10 | except ImportError:
11 |     try:
12 |         # Backport of the function from a pypi module
13 |         from backports.ssl_match_hostname import CertificateError, match_hostname
14 |     except ImportError:
15 |         # Our vendored copy
16 |         from ._implementation import CertificateError, match_hostname
17 | 
18 | # Not needed, but documenting what we provide.
19 | __all__ = ("CertificateError", "match_hostname")
20 | 


--------------------------------------------------------------------------------
/requests/hooks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.hooks
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | This module provides the capabilities for the Requests hooks system.
 8 | 
 9 | Available hooks:
10 | 
11 | ``response``:
12 |     The response generated from a Request.
13 | """
14 | HOOKS = ['response']
15 | 
16 | 
17 | def default_hooks():
18 |     return {event: [] for event in HOOKS}
19 | 
20 | # TODO: response is the only one
21 | 
22 | 
23 | def dispatch_hook(key, hooks, hook_data, **kwargs):
24 |     """Dispatches a hook dictionary on a given piece of data."""
25 |     hooks = hooks or {}
26 |     hooks = hooks.get(key)
27 |     if hooks:
28 |         if hasattr(hooks, '__call__'):
29 |             hooks = [hooks]
30 |         for hook in hooks:
31 |             _hook_data = hook(hook_data, **kwargs)
32 |             if _hook_data is not None:
33 |                 hook_data = _hook_data
34 |     return hook_data
35 | 


--------------------------------------------------------------------------------
/certifi/core.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | certifi.py
 5 | ~~~~~~~~~~
 6 | 
 7 | This module returns the installation location of cacert.pem or its contents.
 8 | """
 9 | import os
10 | 
11 | try:
12 |     from importlib.resources import read_text
13 | except ImportError:
14 |     # This fallback will work for Python versions prior to 3.7 that lack the
15 |     # importlib.resources module but relies on the existing `where` function
16 |     # so won't address issues with environments like PyOxidizer that don't set
17 |     # __file__ on modules.
18 |     def read_text(_module, _path, encoding="ascii"):
19 |         with open(where(), "r", encoding=encoding) as data:
20 |             return data.read()
21 | 
22 | 
23 | def where():
24 |     f = os.path.dirname(__file__)
25 | 
26 |     return os.path.join(f, "cacert.pem")
27 | 
28 | 
29 | def contents():
30 |     return read_text("certifi", "cacert.pem", encoding="ascii")
31 | 


--------------------------------------------------------------------------------
/fuzzywuzzy/string_processing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | import re
 3 | import string
 4 | import sys
 5 | 
 6 | PY3 = sys.version_info[0] == 3
 7 | if PY3:
 8 |     string = str
 9 | 
10 | 
11 | class StringProcessor(object):
12 |     """
13 |     This class defines method to process strings in the most
14 |     efficient way. Ideally all the methods below use unicode strings
15 |     for both input and output.
16 |     """
17 | 
18 |     regex = re.compile(r"(?ui)\W")
19 | 
20 |     @classmethod
21 |     def replace_non_letters_non_numbers_with_whitespace(cls, a_string):
22 |         """
23 |         This function replaces any sequence of non letters and non
24 |         numbers with a single white space.
25 |         """
26 |         return cls.regex.sub(" ", a_string)
27 | 
28 |     strip = staticmethod(string.strip)
29 |     to_lower_case = staticmethod(string.lower)
30 |     to_upper_case = staticmethod(string.upper)
31 | 


--------------------------------------------------------------------------------
/urllib3/contrib/_appengine_environ.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module provides means to detect the App Engine environment.
 3 | """
 4 | 
 5 | import os
 6 | 
 7 | 
 8 | def is_appengine():
 9 |     return is_local_appengine() or is_prod_appengine()
10 | 
11 | 
12 | def is_appengine_sandbox():
13 |     """Reports if the app is running in the first generation sandbox.
14 | 
15 |     The second generation runtimes are technically still in a sandbox, but it
16 |     is much less restrictive, so generally you shouldn't need to check for it.
17 |     see https://cloud.google.com/appengine/docs/standard/runtimes
18 |     """
19 |     return is_appengine() and os.environ["APPENGINE_RUNTIME"] == "python27"
20 | 
21 | 
22 | def is_local_appengine():
23 |     return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
24 |         "SERVER_SOFTWARE", ""
25 |     ).startswith("Development/")
26 | 
27 | 
28 | def is_prod_appengine():
29 |     return "APPENGINE_RUNTIME" in os.environ and os.environ.get(
30 |         "SERVER_SOFTWARE", ""
31 |     ).startswith("Google App Engine/")
32 | 
33 | 
34 | def is_prod_appengine_mvms():
35 |     """Deprecated."""
36 |     return False
37 | 


--------------------------------------------------------------------------------
/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | # For backwards compatibility, provide imports that used to be here.
 4 | from .connection import is_connection_dropped
 5 | from .request import make_headers
 6 | from .response import is_fp_closed
 7 | from .ssl_ import (
 8 |     SSLContext,
 9 |     HAS_SNI,
10 |     IS_PYOPENSSL,
11 |     IS_SECURETRANSPORT,
12 |     assert_fingerprint,
13 |     resolve_cert_reqs,
14 |     resolve_ssl_version,
15 |     ssl_wrap_socket,
16 |     PROTOCOL_TLS,
17 | )
18 | from .timeout import current_time, Timeout
19 | 
20 | from .retry import Retry
21 | from .url import get_host, parse_url, split_first, Url
22 | from .wait import wait_for_read, wait_for_write
23 | 
24 | __all__ = (
25 |     "HAS_SNI",
26 |     "IS_PYOPENSSL",
27 |     "IS_SECURETRANSPORT",
28 |     "SSLContext",
29 |     "PROTOCOL_TLS",
30 |     "Retry",
31 |     "Timeout",
32 |     "Url",
33 |     "assert_fingerprint",
34 |     "current_time",
35 |     "is_connection_dropped",
36 |     "is_fp_closed",
37 |     "get_host",
38 |     "parse_url",
39 |     "make_headers",
40 |     "resolve_cert_reqs",
41 |     "resolve_ssl_version",
42 |     "split_first",
43 |     "ssl_wrap_socket",
44 |     "wait_for_read",
45 |     "wait_for_write",
46 | )
47 | 


--------------------------------------------------------------------------------
/chardet/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Dan Blanchard
 4 | #   Ian Cordasco
 5 | #
 6 | # This library is free software; you can redistribute it and/or
 7 | # modify it under the terms of the GNU Lesser General Public
 8 | # License as published by the Free Software Foundation; either
 9 | # version 2.1 of the License, or (at your option) any later version.
10 | #
11 | # This library is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 | # Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public
17 | # License along with this library; if not, write to the Free Software
18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
19 | # 02110-1301  USA
20 | ######################### END LICENSE BLOCK #########################
21 | 
22 | import sys
23 | 
24 | 
25 | if sys.version_info < (3, 0):
26 |     PY2 = True
27 |     PY3 = False
28 |     base_str = (str, unicode)
29 |     text_type = unicode
30 | else:
31 |     PY2 = False
32 |     PY3 = True
33 |     base_str = (bytes, str)
34 |     text_type = str
35 | 


--------------------------------------------------------------------------------
/requests/_internal_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests._internal_utils
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | Provides utility functions that are consumed internally by Requests
 8 | which depend on extremely few external helpers (such as compat)
 9 | """
10 | 
11 | from .compat import is_py2, builtin_str, str
12 | 
13 | 
14 | def to_native_string(string, encoding='ascii'):
15 |     """Given a string object, regardless of type, returns a representation of
16 |     that string in the native string type, encoding and decoding where
17 |     necessary. This assumes ASCII unless told otherwise.
18 |     """
19 |     if isinstance(string, builtin_str):
20 |         out = string
21 |     else:
22 |         if is_py2:
23 |             out = string.encode(encoding)
24 |         else:
25 |             out = string.decode(encoding)
26 | 
27 |     return out
28 | 
29 | 
30 | def unicode_is_ascii(u_string):
31 |     """Determine if unicode string only contains ASCII characters.
32 | 
33 |     :param str u_string: unicode string to check. Must be unicode
34 |         and not Python 2 `str`.
35 |     :rtype: bool
36 |     """
37 |     assert isinstance(u_string, str)
38 |     try:
39 |         u_string.encode('ascii')
40 |         return True
41 |     except UnicodeEncodeError:
42 |         return False
43 | 


--------------------------------------------------------------------------------
/urllib3/packages/backports/makefile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | backports.makefile
 4 | ~~~~~~~~~~~~~~~~~~
 5 | 
 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that
 7 | wants to create a "fake" socket object.
 8 | """
 9 | import io
10 | 
11 | from socket import SocketIO
12 | 
13 | 
14 | def backport_makefile(
15 |     self, mode="r", buffering=None, encoding=None, errors=None, newline=None
16 | ):
17 |     """
18 |     Backport of ``socket.makefile`` from Python 3.5.
19 |     """
20 |     if not set(mode) <= {"r", "w", "b"}:
21 |         raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,))
22 |     writing = "w" in mode
23 |     reading = "r" in mode or not writing
24 |     assert reading or writing
25 |     binary = "b" in mode
26 |     rawmode = ""
27 |     if reading:
28 |         rawmode += "r"
29 |     if writing:
30 |         rawmode += "w"
31 |     raw = SocketIO(self, rawmode)
32 |     self._makefile_refs += 1
33 |     if buffering is None:
34 |         buffering = -1
35 |     if buffering < 0:
36 |         buffering = io.DEFAULT_BUFFER_SIZE
37 |     if buffering == 0:
38 |         if not binary:
39 |             raise ValueError("unbuffered streams must be binary")
40 |         return raw
41 |     if reading and writing:
42 |         buffer = io.BufferedRWPair(raw, raw, buffering)
43 |     elif reading:
44 |         buffer = io.BufferedReader(raw, buffering)
45 |     else:
46 |         assert writing
47 |         buffer = io.BufferedWriter(raw, buffering)
48 |     if binary:
49 |         return buffer
50 |     text = io.TextIOWrapper(buffer, encoding, errors, newline)
51 |     text.mode = mode
52 |     return text
53 | 


--------------------------------------------------------------------------------
/chardet/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | 
19 | from .compat import PY2, PY3
20 | from .universaldetector import UniversalDetector
21 | from .version import __version__, VERSION
22 | 
23 | 
24 | def detect(byte_str):
25 |     """
26 |     Detect the encoding of the given byte string.
27 | 
28 |     :param byte_str:     The byte sequence to examine.
29 |     :type byte_str:      ``bytes`` or ``bytearray``
30 |     """
31 |     if not isinstance(byte_str, bytearray):
32 |         if not isinstance(byte_str, bytes):
33 |             raise TypeError('Expected object of type bytes or bytearray, got: '
34 |                             '{0}'.format(type(byte_str)))
35 |         else:
36 |             byte_str = bytearray(byte_str)
37 |     detector = UniversalDetector()
38 |     detector.feed(byte_str)
39 |     return detector.close()
40 | 


--------------------------------------------------------------------------------
/chardet/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTW_SM_MODEL
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(EUCTWProber, self).__init__()
36 |         self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
37 |         self.distribution_analyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "EUC-TW"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Taiwan"
47 | 


--------------------------------------------------------------------------------
/chardet/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKR_SM_MODEL
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(EUCKRProber, self).__init__()
37 |         self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
38 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "EUC-KR"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Korean"
48 | 


--------------------------------------------------------------------------------
/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312_SM_MODEL
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(GB2312Prober, self).__init__()
36 |         self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
37 |         self.distribution_analyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "GB2312"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Chinese"
47 | 


--------------------------------------------------------------------------------
/chardet/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import BIG5_SM_MODEL
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(Big5Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
38 |         self.distribution_analyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "Big5"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Chinese"
48 | 


--------------------------------------------------------------------------------
/idna/intranges.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Given a list of integers, made up of (hopefully) a small number of long runs
 3 | of consecutive integers, compute a representation of the form
 4 | ((start1, end1), (start2, end2) ...). Then answer the question "was x present
 5 | in the original list?" in time O(log(# runs)).
 6 | """
 7 | 
 8 | import bisect
 9 | 
10 | def intranges_from_list(list_):
11 |     """Represent a list of integers as a sequence of ranges:
12 |     ((start_0, end_0), (start_1, end_1), ...), such that the original
13 |     integers are exactly those x such that start_i <= x < end_i for some i.
14 | 
15 |     Ranges are encoded as single integers (start << 32 | end), not as tuples.
16 |     """
17 | 
18 |     sorted_list = sorted(list_)
19 |     ranges = []
20 |     last_write = -1
21 |     for i in range(len(sorted_list)):
22 |         if i+1 < len(sorted_list):
23 |             if sorted_list[i] == sorted_list[i+1]-1:
24 |                 continue
25 |         current_range = sorted_list[last_write+1:i+1]
26 |         ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
27 |         last_write = i
28 | 
29 |     return tuple(ranges)
30 | 
31 | def _encode_range(start, end):
32 |     return (start << 32) | end
33 | 
34 | def _decode_range(r):
35 |     return (r >> 32), (r & ((1 << 32) - 1))
36 | 
37 | 
38 | def intranges_contain(int_, ranges):
39 |     """Determine if `int_` falls into one of the ranges in `ranges`."""
40 |     tuple_ = _encode_range(int_, 0)
41 |     pos = bisect.bisect_left(ranges, tuple_)
42 |     # we could be immediately ahead of a tuple (start, end)
43 |     # with start < int_ <= end
44 |     if pos > 0:
45 |         left, right = _decode_range(ranges[pos-1])
46 |         if left <= int_ < right:
47 |             return True
48 |     # or we could be immediately behind a tuple (int_, end)
49 |     if pos < len(ranges):
50 |         left, _ = _decode_range(ranges[pos])
51 |         if left == int_:
52 |             return True
53 |     return False
54 | 


--------------------------------------------------------------------------------
/chardet/enums.py:
--------------------------------------------------------------------------------
 1 | """
 2 | All of the Enums that are used throughout the chardet package.
 3 | 
 4 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 5 | """
 6 | 
 7 | 
 8 | class InputState(object):
 9 |     """
10 |     This enum represents the different states a universal detector can be in.
11 |     """
12 |     PURE_ASCII = 0
13 |     ESC_ASCII = 1
14 |     HIGH_BYTE = 2
15 | 
16 | 
17 | class LanguageFilter(object):
18 |     """
19 |     This enum represents the different language filters we can apply to a
20 |     ``UniversalDetector``.
21 |     """
22 |     CHINESE_SIMPLIFIED = 0x01
23 |     CHINESE_TRADITIONAL = 0x02
24 |     JAPANESE = 0x04
25 |     KOREAN = 0x08
26 |     NON_CJK = 0x10
27 |     ALL = 0x1F
28 |     CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
29 |     CJK = CHINESE | JAPANESE | KOREAN
30 | 
31 | 
32 | class ProbingState(object):
33 |     """
34 |     This enum represents the different states a prober can be in.
35 |     """
36 |     DETECTING = 0
37 |     FOUND_IT = 1
38 |     NOT_ME = 2
39 | 
40 | 
41 | class MachineState(object):
42 |     """
43 |     This enum represents the different states a state machine can be in.
44 |     """
45 |     START = 0
46 |     ERROR = 1
47 |     ITS_ME = 2
48 | 
49 | 
50 | class SequenceLikelihood(object):
51 |     """
52 |     This enum represents the likelihood of a character following the previous one.
53 |     """
54 |     NEGATIVE = 0
55 |     UNLIKELY = 1
56 |     LIKELY = 2
57 |     POSITIVE = 3
58 | 
59 |     @classmethod
60 |     def get_num_categories(cls):
61 |         """:returns: The number of likelihood categories in the enum."""
62 |         return 4
63 | 
64 | 
65 | class CharacterCategory(object):
66 |     """
67 |     This enum represents the different categories language models for
68 |     ``SingleByteCharsetProber`` put characters into.
69 | 
70 |     Anything less than CONTROL is considered a letter.
71 |     """
72 |     UNDEFINED = 255
73 |     LINE_BREAK = 254
74 |     SYMBOL = 253
75 |     DIGIT = 252
76 |     CONTROL = 251
77 | 


--------------------------------------------------------------------------------
/chardet/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .chardistribution import EUCKRDistributionAnalysis
29 | from .codingstatemachine import CodingStateMachine
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .mbcssm import CP949_SM_MODEL
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(CP949Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     @property
44 |     def charset_name(self):
45 |         return "CP949"
46 | 
47 |     @property
48 |     def language(self):
49 |         return "Korean"
50 | 


--------------------------------------------------------------------------------
/requests/compat.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.compat
 5 | ~~~~~~~~~~~~~~~
 6 | 
 7 | This module handles import compatibility issues between Python 2 and
 8 | Python 3.
 9 | """
10 | 
11 | import chardet
12 | 
13 | import sys
14 | 
15 | # -------
16 | # Pythons
17 | # -------
18 | 
19 | # Syntax sugar.
20 | _ver = sys.version_info
21 | 
22 | #: Python 2.x?
23 | is_py2 = (_ver[0] == 2)
24 | 
25 | #: Python 3.x?
26 | is_py3 = (_ver[0] == 3)
27 | 
28 | try:
29 |     import simplejson as json
30 | except ImportError:
31 |     import json
32 | 
33 | # ---------
34 | # Specifics
35 | # ---------
36 | 
37 | if is_py2:
38 |     from urllib import (
39 |         quote, unquote, quote_plus, unquote_plus, urlencode, getproxies,
40 |         proxy_bypass, proxy_bypass_environment, getproxies_environment)
41 |     from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
42 |     from urllib2 import parse_http_list
43 |     import cookielib
44 |     from Cookie import Morsel
45 |     from StringIO import StringIO
46 |     # Keep OrderedDict for backwards compatibility.
47 |     from collections import Callable, Mapping, MutableMapping, OrderedDict
48 | 
49 | 
50 |     builtin_str = str
51 |     bytes = str
52 |     str = unicode
53 |     basestring = basestring
54 |     numeric_types = (int, long, float)
55 |     integer_types = (int, long)
56 | 
57 | elif is_py3:
58 |     from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
59 |     from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment
60 |     from http import cookiejar as cookielib
61 |     from http.cookies import Morsel
62 |     from io import StringIO
63 |     # Keep OrderedDict for backwards compatibility.
64 |     from collections import OrderedDict
65 |     from collections.abc import Callable, Mapping, MutableMapping
66 | 
67 |     builtin_str = str
68 |     str = str
69 |     bytes = bytes
70 |     basestring = (str, bytes)
71 |     numeric_types = (int, float)
72 |     integer_types = (int,)
73 | 


--------------------------------------------------------------------------------
/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self, lang_filter=None):
43 |         super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
44 |         self.probers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/Levenshtein/StringMatcher.py:
--------------------------------------------------------------------------------
 1 | from Levenshtein import *
 2 | from warnings import warn
 3 | 
 4 | class StringMatcher:
 5 |     """A SequenceMatcher-like class built on the top of Levenshtein"""
 6 | 
 7 |     def _reset_cache(self):
 8 |         self._ratio = self._distance = None
 9 |         self._opcodes = self._editops = self._matching_blocks = None
10 | 
11 |     def __init__(self, isjunk=None, seq1='', seq2=''):
12 |         if isjunk:
13 |             warn("isjunk not NOT implemented, it will be ignored")
14 |         self._str1, self._str2 = seq1, seq2
15 |         self._reset_cache()
16 | 
17 |     def set_seqs(self, seq1, seq2):
18 |         self._str1, self._str2 = seq1, seq2
19 |         self._reset_cache()
20 | 
21 |     def set_seq1(self, seq1):
22 |         self._str1 = seq1
23 |         self._reset_cache()
24 | 
25 |     def set_seq2(self, seq2):
26 |         self._str2 = seq2
27 |         self._reset_cache()
28 | 
29 |     def get_opcodes(self):
30 |         if not self._opcodes:
31 |             if self._editops:
32 |                 self._opcodes = opcodes(self._editops, self._str1, self._str2)
33 |             else:
34 |                 self._opcodes = opcodes(self._str1, self._str2)
35 |         return self._opcodes
36 | 
37 |     def get_editops(self):
38 |         if not self._editops:
39 |             if self._opcodes:
40 |                 self._editops = editops(self._opcodes, self._str1, self._str2)
41 |             else:
42 |                 self._editops = editops(self._str1, self._str2)
43 |         return self._editops
44 | 
45 |     def get_matching_blocks(self):
46 |         if not self._matching_blocks:
47 |             self._matching_blocks = matching_blocks(self.get_opcodes(),
48 |                                                     self._str1, self._str2)
49 |         return self._matching_blocks
50 | 
51 |     def ratio(self):
52 |         if not self._ratio:
53 |             self._ratio = ratio(self._str1, self._str2)
54 |         return self._ratio
55 | 
56 |     def quick_ratio(self):
57 |         # This is usually quick enough :o)
58 |         if not self._ratio:
59 |             self._ratio = ratio(self._str1, self._str2)
60 |         return self._ratio
61 | 
62 |     def real_quick_ratio(self):
63 |         len1, len2 = len(self._str1), len(self._str2)
64 |         return 2.0 * min(len1, len2) / (len1 + len2)
65 | 
66 |     def distance(self):
67 |         if not self._distance:
68 |             self._distance = distance(self._str1, self._str2)
69 |         return self._distance
70 | 


--------------------------------------------------------------------------------
/fuzzywuzzy/StringMatcher.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | StringMatcher.py
 5 | 
 6 | ported from python-Levenshtein
 7 | [https://github.com/miohtama/python-Levenshtein]
 8 | License available here: https://github.com/miohtama/python-Levenshtein/blob/master/COPYING
 9 | """
10 | 
11 | from Levenshtein import *
12 | from warnings import warn
13 | 
14 | 
15 | class StringMatcher:
16 |     """A SequenceMatcher-like class built on the top of Levenshtein"""
17 | 
18 |     def _reset_cache(self):
19 |         self._ratio = self._distance = None
20 |         self._opcodes = self._editops = self._matching_blocks = None
21 | 
22 |     def __init__(self, isjunk=None, seq1='', seq2=''):
23 |         if isjunk:
24 |             warn("isjunk not NOT implemented, it will be ignored")
25 |         self._str1, self._str2 = seq1, seq2
26 |         self._reset_cache()
27 | 
28 |     def set_seqs(self, seq1, seq2):
29 |         self._str1, self._str2 = seq1, seq2
30 |         self._reset_cache()
31 | 
32 |     def set_seq1(self, seq1):
33 |         self._str1 = seq1
34 |         self._reset_cache()
35 | 
36 |     def set_seq2(self, seq2):
37 |         self._str2 = seq2
38 |         self._reset_cache()
39 | 
40 |     def get_opcodes(self):
41 |         if not self._opcodes:
42 |             if self._editops:
43 |                 self._opcodes = opcodes(self._editops, self._str1, self._str2)
44 |             else:
45 |                 self._opcodes = opcodes(self._str1, self._str2)
46 |         return self._opcodes
47 | 
48 |     def get_editops(self):
49 |         if not self._editops:
50 |             if self._opcodes:
51 |                 self._editops = editops(self._opcodes, self._str1, self._str2)
52 |             else:
53 |                 self._editops = editops(self._str1, self._str2)
54 |         return self._editops
55 | 
56 |     def get_matching_blocks(self):
57 |         if not self._matching_blocks:
58 |             self._matching_blocks = matching_blocks(self.get_opcodes(),
59 |                                                     self._str1, self._str2)
60 |         return self._matching_blocks
61 | 
62 |     def ratio(self):
63 |         if not self._ratio:
64 |             self._ratio = ratio(self._str1, self._str2)
65 |         return self._ratio
66 | 
67 |     def quick_ratio(self):
68 |         # This is usually quick enough :o)
69 |         if not self._ratio:
70 |             self._ratio = ratio(self._str1, self._str2)
71 |         return self._ratio
72 | 
73 |     def real_quick_ratio(self):
74 |         len1, len2 = len(self._str1), len(self._str2)
75 |         return 2.0 * min(len1, len2) / (len1 + len2)
76 | 
77 |     def distance(self):
78 |         if not self._distance:
79 |             self._distance = distance(self._str1, self._str2)
80 |         return self._distance
81 | 


--------------------------------------------------------------------------------
/urllib3/filepost.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import binascii
 3 | import codecs
 4 | import os
 5 | 
 6 | from io import BytesIO
 7 | 
 8 | from .packages import six
 9 | from .packages.six import b
10 | from .fields import RequestField
11 | 
12 | writer = codecs.lookup("utf-8")[3]
13 | 
14 | 
15 | def choose_boundary():
16 |     """
17 |     Our embarrassingly-simple replacement for mimetools.choose_boundary.
18 |     """
19 |     boundary = binascii.hexlify(os.urandom(16))
20 |     if not six.PY2:
21 |         boundary = boundary.decode("ascii")
22 |     return boundary
23 | 
24 | 
25 | def iter_field_objects(fields):
26 |     """
27 |     Iterate over fields.
28 | 
29 |     Supports list of (k, v) tuples and dicts, and lists of
30 |     :class:`~urllib3.fields.RequestField`.
31 | 
32 |     """
33 |     if isinstance(fields, dict):
34 |         i = six.iteritems(fields)
35 |     else:
36 |         i = iter(fields)
37 | 
38 |     for field in i:
39 |         if isinstance(field, RequestField):
40 |             yield field
41 |         else:
42 |             yield RequestField.from_tuples(*field)
43 | 
44 | 
45 | def iter_fields(fields):
46 |     """
47 |     .. deprecated:: 1.6
48 | 
49 |     Iterate over fields.
50 | 
51 |     The addition of :class:`~urllib3.fields.RequestField` makes this function
52 |     obsolete. Instead, use :func:`iter_field_objects`, which returns
53 |     :class:`~urllib3.fields.RequestField` objects.
54 | 
55 |     Supports list of (k, v) tuples and dicts.
56 |     """
57 |     if isinstance(fields, dict):
58 |         return ((k, v) for k, v in six.iteritems(fields))
59 | 
60 |     return ((k, v) for k, v in fields)
61 | 
62 | 
63 | def encode_multipart_formdata(fields, boundary=None):
64 |     """
65 |     Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
66 | 
67 |     :param fields:
68 |         Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
69 | 
70 |     :param boundary:
71 |         If not specified, then a random boundary will be generated using
72 |         :func:`urllib3.filepost.choose_boundary`.
73 |     """
74 |     body = BytesIO()
75 |     if boundary is None:
76 |         boundary = choose_boundary()
77 | 
78 |     for field in iter_field_objects(fields):
79 |         body.write(b("--%s\r\n" % (boundary)))
80 | 
81 |         writer(body).write(field.render_headers())
82 |         data = field.data
83 | 
84 |         if isinstance(data, int):
85 |             data = str(data)  # Backwards compatibility
86 | 
87 |         if isinstance(data, six.text_type):
88 |             writer(body).write(data)
89 |         else:
90 |             body.write(data)
91 | 
92 |         body.write(b"\r\n")
93 | 
94 |     body.write(b("--%s--\r\n" % (boundary)))
95 | 
96 |     content_type = str("multipart/form-data; boundary=%s" % boundary)
97 | 
98 |     return body.getvalue(), content_type
99 | 


--------------------------------------------------------------------------------
/urllib3/util/response.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from ..packages.six.moves import http_client as httplib
 3 | 
 4 | from ..exceptions import HeaderParsingError
 5 | 
 6 | 
 7 | def is_fp_closed(obj):
 8 |     """
 9 |     Checks whether a given file-like object is closed.
10 | 
11 |     :param obj:
12 |         The file-like object to check.
13 |     """
14 | 
15 |     try:
16 |         # Check `isclosed()` first, in case Python3 doesn't set `closed`.
17 |         # GH Issue #928
18 |         return obj.isclosed()
19 |     except AttributeError:
20 |         pass
21 | 
22 |     try:
23 |         # Check via the official file-like-object way.
24 |         return obj.closed
25 |     except AttributeError:
26 |         pass
27 | 
28 |     try:
29 |         # Check if the object is a container for another file-like object that
30 |         # gets released on exhaustion (e.g. HTTPResponse).
31 |         return obj.fp is None
32 |     except AttributeError:
33 |         pass
34 | 
35 |     raise ValueError("Unable to determine whether fp is closed.")
36 | 
37 | 
38 | def assert_header_parsing(headers):
39 |     """
40 |     Asserts whether all headers have been successfully parsed.
41 |     Extracts encountered errors from the result of parsing headers.
42 | 
43 |     Only works on Python 3.
44 | 
45 |     :param headers: Headers to verify.
46 |     :type headers: `httplib.HTTPMessage`.
47 | 
48 |     :raises urllib3.exceptions.HeaderParsingError:
49 |         If parsing errors are found.
50 |     """
51 | 
52 |     # This will fail silently if we pass in the wrong kind of parameter.
53 |     # To make debugging easier add an explicit check.
54 |     if not isinstance(headers, httplib.HTTPMessage):
55 |         raise TypeError("expected httplib.Message, got {0}.".format(type(headers)))
56 | 
57 |     defects = getattr(headers, "defects", None)
58 |     get_payload = getattr(headers, "get_payload", None)
59 | 
60 |     unparsed_data = None
61 |     if get_payload:
62 |         # get_payload is actually email.message.Message.get_payload;
63 |         # we're only interested in the result if it's not a multipart message
64 |         if not headers.is_multipart():
65 |             payload = get_payload()
66 | 
67 |             if isinstance(payload, (bytes, str)):
68 |                 unparsed_data = payload
69 | 
70 |     if defects or unparsed_data:
71 |         raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
72 | 
73 | 
74 | def is_response_to_head(response):
75 |     """
76 |     Checks whether the request of a response has been a HEAD-request.
77 |     Handles the quirks of AppEngine.
78 | 
79 |     :param conn:
80 |     :type conn: :class:`httplib.HTTPResponse`
81 |     """
82 |     # FIXME: Can we do this somehow without accessing private httplib _method?
83 |     method = response._method
84 |     if isinstance(method, int):  # Platform-specific: Appengine
85 |         return method == 3
86 |     return method.upper() == "HEAD"
87 | 


--------------------------------------------------------------------------------
/fuzzywuzzy/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import unicode_literals
  2 | import sys
  3 | import functools
  4 | 
  5 | from fuzzywuzzy.string_processing import StringProcessor
  6 | 
  7 | 
  8 | PY3 = sys.version_info[0] == 3
  9 | 
 10 | 
 11 | def validate_string(s):
 12 |     """
 13 |     Check input has length and that length > 0
 14 | 
 15 |     :param s:
 16 |     :return: True if len(s) > 0 else False
 17 |     """
 18 |     try:
 19 |         return len(s) > 0
 20 |     except TypeError:
 21 |         return False
 22 | 
 23 | 
 24 | def check_for_none(func):
 25 |     @functools.wraps(func)
 26 |     def decorator(*args, **kwargs):
 27 |         if args[0] is None or args[1] is None:
 28 |             return 0
 29 |         return func(*args, **kwargs)
 30 |     return decorator
 31 | 
 32 | 
 33 | def check_empty_string(func):
 34 |     @functools.wraps(func)
 35 |     def decorator(*args, **kwargs):
 36 |         if len(args[0]) == 0 or len(args[1]) == 0:
 37 |             return 0
 38 |         return func(*args, **kwargs)
 39 |     return decorator
 40 | 
 41 | 
 42 | bad_chars = str("").join([chr(i) for i in range(128, 256)])  # ascii dammit!
 43 | if PY3:
 44 |     translation_table = dict((ord(c), None) for c in bad_chars)
 45 |     unicode = str
 46 | 
 47 | 
 48 | def asciionly(s):
 49 |     if PY3:
 50 |         return s.translate(translation_table)
 51 |     else:
 52 |         return s.translate(None, bad_chars)
 53 | 
 54 | 
 55 | def asciidammit(s):
 56 |     if type(s) is str:
 57 |         return asciionly(s)
 58 |     elif type(s) is unicode:
 59 |         return asciionly(s.encode('ascii', 'ignore'))
 60 |     else:
 61 |         return asciidammit(unicode(s))
 62 | 
 63 | 
 64 | def make_type_consistent(s1, s2):
 65 |     """If both objects aren't either both string or unicode instances force them to unicode"""
 66 |     if isinstance(s1, str) and isinstance(s2, str):
 67 |         return s1, s2
 68 | 
 69 |     elif isinstance(s1, unicode) and isinstance(s2, unicode):
 70 |         return s1, s2
 71 | 
 72 |     else:
 73 |         return unicode(s1), unicode(s2)
 74 | 
 75 | 
 76 | def full_process(s, force_ascii=False):
 77 |     """Process string by
 78 |         -- removing all but letters and numbers
 79 |         -- trim whitespace
 80 |         -- force to lower case
 81 |         if force_ascii == True, force convert to ascii"""
 82 | 
 83 |     if s is None:
 84 |         return ""
 85 | 
 86 |     if force_ascii:
 87 |         s = asciidammit(s)
 88 |     # Keep only Letters and Numbers (see Unicode docs).
 89 |     string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s)
 90 |     # Force into lowercase.
 91 |     string_out = StringProcessor.to_lower_case(string_out)
 92 |     # Remove leading and trailing whitespaces.
 93 |     string_out = StringProcessor.strip(string_out)
 94 |     return string_out
 95 | 
 96 | 
 97 | def intr(n):
 98 |     '''Returns a correctly rounded integer'''
 99 |     return int(round(n))
100 | 


--------------------------------------------------------------------------------
/urllib3/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | urllib3 - Thread-safe connection pooling and re-using.
 3 | """
 4 | from __future__ import absolute_import
 5 | import warnings
 6 | 
 7 | from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
 8 | 
 9 | from . import exceptions
10 | from .filepost import encode_multipart_formdata
11 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
12 | from .response import HTTPResponse
13 | from .util.request import make_headers
14 | from .util.url import get_host
15 | from .util.timeout import Timeout
16 | from .util.retry import Retry
17 | 
18 | 
19 | # Set default logging handler to avoid "No handler found" warnings.
20 | import logging
21 | from logging import NullHandler
22 | 
23 | __author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
24 | __license__ = "MIT"
25 | __version__ = "1.25.9"
26 | 
27 | __all__ = (
28 |     "HTTPConnectionPool",
29 |     "HTTPSConnectionPool",
30 |     "PoolManager",
31 |     "ProxyManager",
32 |     "HTTPResponse",
33 |     "Retry",
34 |     "Timeout",
35 |     "add_stderr_logger",
36 |     "connection_from_url",
37 |     "disable_warnings",
38 |     "encode_multipart_formdata",
39 |     "get_host",
40 |     "make_headers",
41 |     "proxy_from_url",
42 | )
43 | 
44 | logging.getLogger(__name__).addHandler(NullHandler())
45 | 
46 | 
47 | def add_stderr_logger(level=logging.DEBUG):
48 |     """
49 |     Helper for quickly adding a StreamHandler to the logger. Useful for
50 |     debugging.
51 | 
52 |     Returns the handler after adding it.
53 |     """
54 |     # This method needs to be in this __init__.py to get the __name__ correct
55 |     # even if urllib3 is vendored within another package.
56 |     logger = logging.getLogger(__name__)
57 |     handler = logging.StreamHandler()
58 |     handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
59 |     logger.addHandler(handler)
60 |     logger.setLevel(level)
61 |     logger.debug("Added a stderr logging handler to logger: %s", __name__)
62 |     return handler
63 | 
64 | 
65 | # ... Clean up.
66 | del NullHandler
67 | 
68 | 
69 | # All warning filters *must* be appended unless you're really certain that they
70 | # shouldn't be: otherwise, it's very hard for users to use most Python
71 | # mechanisms to silence them.
72 | # SecurityWarning's always go off by default.
73 | warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
74 | # SubjectAltNameWarning's should go off once per host
75 | warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
76 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
77 | warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
78 | # SNIMissingWarnings should go off only once.
79 | warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
80 | 
81 | 
82 | def disable_warnings(category=exceptions.HTTPWarning):
83 |     """
84 |     Helper for quickly disabling all urllib3 warnings.
85 |     """
86 |     warnings.simplefilter("ignore", category)
87 | 


--------------------------------------------------------------------------------
/chardet/cli/chardetect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script which takes one or more file paths and reports on their detected
 4 | encodings
 5 | 
 6 | Example::
 7 | 
 8 |     % chardetect somefile someotherfile
 9 |     somefile: windows-1252 with confidence 0.5
10 |     someotherfile: ascii with confidence 1.0
11 | 
12 | If no paths are provided, it takes its input from stdin.
13 | 
14 | """
15 | 
16 | from __future__ import absolute_import, print_function, unicode_literals
17 | 
18 | import argparse
19 | import sys
20 | 
21 | from chardet import __version__
22 | from chardet.compat import PY2
23 | from chardet.universaldetector import UniversalDetector
24 | 
25 | 
26 | def description_of(lines, name='stdin'):
27 |     """
28 |     Return a string describing the probable encoding of a file or
29 |     list of strings.
30 | 
31 |     :param lines: The lines to get the encoding of.
32 |     :type lines: Iterable of bytes
33 |     :param name: Name of file or collection of lines
34 |     :type name: str
35 |     """
36 |     u = UniversalDetector()
37 |     for line in lines:
38 |         line = bytearray(line)
39 |         u.feed(line)
40 |         # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
41 |         if u.done:
42 |             break
43 |     u.close()
44 |     result = u.result
45 |     if PY2:
46 |         name = name.decode(sys.getfilesystemencoding(), 'ignore')
47 |     if result['encoding']:
48 |         return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
49 |                                                      result['confidence'])
50 |     else:
51 |         return '{0}: no result'.format(name)
52 | 
53 | 
54 | def main(argv=None):
55 |     """
56 |     Handles command line arguments and gets things started.
57 | 
58 |     :param argv: List of arguments, as if specified on the command-line.
59 |                  If None, ``sys.argv[1:]`` is used instead.
60 |     :type argv: list of str
61 |     """
62 |     # Get command line arguments
63 |     parser = argparse.ArgumentParser(
64 |         description="Takes one or more file paths and reports their detected \
65 |                      encodings")
66 |     parser.add_argument('input',
67 |                         help='File whose encoding we would like to determine. \
68 |                               (default: stdin)',
69 |                         type=argparse.FileType('rb'), nargs='*',
70 |                         default=[sys.stdin if PY2 else sys.stdin.buffer])
71 |     parser.add_argument('--version', action='version',
72 |                         version='%(prog)s {0}'.format(__version__))
73 |     args = parser.parse_args(argv)
74 | 
75 |     for f in args.input:
76 |         if f.isatty():
77 |             print("You are running chardetect interactively. Press " +
78 |                   "CTRL-D twice at the start of a blank line to signal the " +
79 |                   "end of your input. If you want help, run chardetect " +
80 |                   "--help\n", file=sys.stderr)
81 |         print(description_of(f, f.name))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/chardet/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .charsetprober import CharSetProber
29 | from .enums import ProbingState, MachineState
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8_SM_MODEL
32 | 
33 | 
34 | 
35 | class UTF8Prober(CharSetProber):
36 |     ONE_CHAR_PROB = 0.5
37 | 
38 |     def __init__(self):
39 |         super(UTF8Prober, self).__init__()
40 |         self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
41 |         self._num_mb_chars = None
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(UTF8Prober, self).reset()
46 |         self.coding_sm.reset()
47 |         self._num_mb_chars = 0
48 | 
49 |     @property
50 |     def charset_name(self):
51 |         return "utf-8"
52 | 
53 |     @property
54 |     def language(self):
55 |         return ""
56 | 
57 |     def feed(self, byte_str):
58 |         for c in byte_str:
59 |             coding_state = self.coding_sm.next_state(c)
60 |             if coding_state == MachineState.ERROR:
61 |                 self._state = ProbingState.NOT_ME
62 |                 break
63 |             elif coding_state == MachineState.ITS_ME:
64 |                 self._state = ProbingState.FOUND_IT
65 |                 break
66 |             elif coding_state == MachineState.START:
67 |                 if self.coding_sm.get_current_charlen() >= 2:
68 |                     self._num_mb_chars += 1
69 | 
70 |         if self.state == ProbingState.DETECTING:
71 |             if self.get_confidence() > self.SHORTCUT_THRESHOLD:
72 |                 self._state = ProbingState.FOUND_IT
73 | 
74 |         return self.state
75 | 
76 |     def get_confidence(self):
77 |         unlike = 0.99
78 |         if self._num_mb_chars < 6:
79 |             unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
80 |             return 1.0 - unlike
81 |         else:
82 |             return unlike
83 | 


--------------------------------------------------------------------------------
/requests/structures.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.structures
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | Data structures that power Requests.
  8 | """
  9 | 
 10 | from collections import OrderedDict
 11 | 
 12 | from .compat import Mapping, MutableMapping
 13 | 
 14 | 
 15 | class CaseInsensitiveDict(MutableMapping):
 16 |     """A case-insensitive ``dict``-like object.
 17 | 
 18 |     Implements all methods and operations of
 19 |     ``MutableMapping`` as well as dict's ``copy``. Also
 20 |     provides ``lower_items``.
 21 | 
 22 |     All keys are expected to be strings. The structure remembers the
 23 |     case of the last key to be set, and ``iter(instance)``,
 24 |     ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
 25 |     will contain case-sensitive keys. However, querying and contains
 26 |     testing is case insensitive::
 27 | 
 28 |         cid = CaseInsensitiveDict()
 29 |         cid['Accept'] = 'application/json'
 30 |         cid['aCCEPT'] == 'application/json'  # True
 31 |         list(cid) == ['Accept']  # True
 32 | 
 33 |     For example, ``headers['content-encoding']`` will return the
 34 |     value of a ``'Content-Encoding'`` response header, regardless
 35 |     of how the header name was originally stored.
 36 | 
 37 |     If the constructor, ``.update``, or equality comparison
 38 |     operations are given keys that have equal ``.lower()``s, the
 39 |     behavior is undefined.
 40 |     """
 41 | 
 42 |     def __init__(self, data=None, **kwargs):
 43 |         self._store = OrderedDict()
 44 |         if data is None:
 45 |             data = {}
 46 |         self.update(data, **kwargs)
 47 | 
 48 |     def __setitem__(self, key, value):
 49 |         # Use the lowercased key for lookups, but store the actual
 50 |         # key alongside the value.
 51 |         self._store[key.lower()] = (key, value)
 52 | 
 53 |     def __getitem__(self, key):
 54 |         return self._store[key.lower()][1]
 55 | 
 56 |     def __delitem__(self, key):
 57 |         del self._store[key.lower()]
 58 | 
 59 |     def __iter__(self):
 60 |         return (casedkey for casedkey, mappedvalue in self._store.values())
 61 | 
 62 |     def __len__(self):
 63 |         return len(self._store)
 64 | 
 65 |     def lower_items(self):
 66 |         """Like iteritems(), but with all lowercase keys."""
 67 |         return (
 68 |             (lowerkey, keyval[1])
 69 |             for (lowerkey, keyval)
 70 |             in self._store.items()
 71 |         )
 72 | 
 73 |     def __eq__(self, other):
 74 |         if isinstance(other, Mapping):
 75 |             other = CaseInsensitiveDict(other)
 76 |         else:
 77 |             return NotImplemented
 78 |         # Compare insensitively
 79 |         return dict(self.lower_items()) == dict(other.lower_items())
 80 | 
 81 |     # Copy is required
 82 |     def copy(self):
 83 |         return CaseInsensitiveDict(self._store.values())
 84 | 
 85 |     def __repr__(self):
 86 |         return str(dict(self.items()))
 87 | 
 88 | 
 89 | class LookupDict(dict):
 90 |     """Dictionary lookup object."""
 91 | 
 92 |     def __init__(self, name=None):
 93 |         self.name = name
 94 |         super(LookupDict, self).__init__()
 95 | 
 96 |     def __repr__(self):
 97 |         return '<lookup \'%s\'>' % (self.name)
 98 | 
 99 |     def __getitem__(self, key):
100 |         # We allow fall-through here, so values default to None
101 | 
102 |         return self.__dict__.get(key, None)
103 | 
104 |     def get(self, key, default=None):
105 |         return self.__dict__.get(key, default)
106 | 


--------------------------------------------------------------------------------
/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetprober import CharSetProber
31 | from .enums import ProbingState, MachineState
32 | 
33 | 
34 | class MultiByteCharSetProber(CharSetProber):
35 |     """
36 |     MultiByteCharSetProber
37 |     """
38 | 
39 |     def __init__(self, lang_filter=None):
40 |         super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
41 |         self.distribution_analyzer = None
42 |         self.coding_sm = None
43 |         self._last_char = [0, 0]
44 | 
45 |     def reset(self):
46 |         super(MultiByteCharSetProber, self).reset()
47 |         if self.coding_sm:
48 |             self.coding_sm.reset()
49 |         if self.distribution_analyzer:
50 |             self.distribution_analyzer.reset()
51 |         self._last_char = [0, 0]
52 | 
53 |     @property
54 |     def charset_name(self):
55 |         raise NotImplementedError
56 | 
57 |     @property
58 |     def language(self):
59 |         raise NotImplementedError
60 | 
61 |     def feed(self, byte_str):
62 |         for i in range(len(byte_str)):
63 |             coding_state = self.coding_sm.next_state(byte_str[i])
64 |             if coding_state == MachineState.ERROR:
65 |                 self.logger.debug('%s %s prober hit error at byte %s',
66 |                                   self.charset_name, self.language, i)
67 |                 self._state = ProbingState.NOT_ME
68 |                 break
69 |             elif coding_state == MachineState.ITS_ME:
70 |                 self._state = ProbingState.FOUND_IT
71 |                 break
72 |             elif coding_state == MachineState.START:
73 |                 char_len = self.coding_sm.get_current_charlen()
74 |                 if i == 0:
75 |                     self._last_char[1] = byte_str[0]
76 |                     self.distribution_analyzer.feed(self._last_char, char_len)
77 |                 else:
78 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
79 |                                                     char_len)
80 | 
81 |         self._last_char[0] = byte_str[-1]
82 | 
83 |         if self.state == ProbingState.DETECTING:
84 |             if (self.distribution_analyzer.got_enough_data() and
85 |                     (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
86 |                 self._state = ProbingState.FOUND_IT
87 | 
88 |         return self.state
89 | 
90 |     def get_confidence(self):
91 |         return self.distribution_analyzer.get_confidence()
92 | 


--------------------------------------------------------------------------------
/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | from .langturkishmodel import Latin5TurkishModel
41 | 
42 | 
43 | class SBCSGroupProber(CharSetGroupProber):
44 |     def __init__(self):
45 |         super(SBCSGroupProber, self).__init__()
46 |         self.probers = [
47 |             SingleByteCharSetProber(Win1251CyrillicModel),
48 |             SingleByteCharSetProber(Koi8rModel),
49 |             SingleByteCharSetProber(Latin5CyrillicModel),
50 |             SingleByteCharSetProber(MacCyrillicModel),
51 |             SingleByteCharSetProber(Ibm866Model),
52 |             SingleByteCharSetProber(Ibm855Model),
53 |             SingleByteCharSetProber(Latin7GreekModel),
54 |             SingleByteCharSetProber(Win1253GreekModel),
55 |             SingleByteCharSetProber(Latin5BulgarianModel),
56 |             SingleByteCharSetProber(Win1251BulgarianModel),
57 |             # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
58 |             #       after we retrain model.
59 |             # SingleByteCharSetProber(Latin2HungarianModel),
60 |             # SingleByteCharSetProber(Win1250HungarianModel),
61 |             SingleByteCharSetProber(TIS620ThaiModel),
62 |             SingleByteCharSetProber(Latin5TurkishModel),
63 |         ]
64 |         hebrew_prober = HebrewProber()
65 |         logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
66 |                                                         False, hebrew_prober)
67 |         visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
68 |                                                        hebrew_prober)
69 |         hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
70 |         self.probers.extend([hebrew_prober, logical_hebrew_prober,
71 |                              visual_hebrew_prober])
72 | 
73 |         self.reset()
74 | 


--------------------------------------------------------------------------------
/requests/exceptions.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.exceptions
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | This module contains the set of Requests' exceptions.
  8 | """
  9 | from urllib3.exceptions import HTTPError as BaseHTTPError
 10 | 
 11 | 
 12 | class RequestException(IOError):
 13 |     """There was an ambiguous exception that occurred while handling your
 14 |     request.
 15 |     """
 16 | 
 17 |     def __init__(self, *args, **kwargs):
 18 |         """Initialize RequestException with `request` and `response` objects."""
 19 |         response = kwargs.pop('response', None)
 20 |         self.response = response
 21 |         self.request = kwargs.pop('request', None)
 22 |         if (response is not None and not self.request and
 23 |                 hasattr(response, 'request')):
 24 |             self.request = self.response.request
 25 |         super(RequestException, self).__init__(*args, **kwargs)
 26 | 
 27 | 
 28 | class HTTPError(RequestException):
 29 |     """An HTTP error occurred."""
 30 | 
 31 | 
 32 | class ConnectionError(RequestException):
 33 |     """A Connection error occurred."""
 34 | 
 35 | 
 36 | class ProxyError(ConnectionError):
 37 |     """A proxy error occurred."""
 38 | 
 39 | 
 40 | class SSLError(ConnectionError):
 41 |     """An SSL error occurred."""
 42 | 
 43 | 
 44 | class Timeout(RequestException):
 45 |     """The request timed out.
 46 | 
 47 |     Catching this error will catch both
 48 |     :exc:`~requests.exceptions.ConnectTimeout` and
 49 |     :exc:`~requests.exceptions.ReadTimeout` errors.
 50 |     """
 51 | 
 52 | 
 53 | class ConnectTimeout(ConnectionError, Timeout):
 54 |     """The request timed out while trying to connect to the remote server.
 55 | 
 56 |     Requests that produced this error are safe to retry.
 57 |     """
 58 | 
 59 | 
 60 | class ReadTimeout(Timeout):
 61 |     """The server did not send any data in the allotted amount of time."""
 62 | 
 63 | 
 64 | class URLRequired(RequestException):
 65 |     """A valid URL is required to make a request."""
 66 | 
 67 | 
 68 | class TooManyRedirects(RequestException):
 69 |     """Too many redirects."""
 70 | 
 71 | 
 72 | class MissingSchema(RequestException, ValueError):
 73 |     """The URL schema (e.g. http or https) is missing."""
 74 | 
 75 | 
 76 | class InvalidSchema(RequestException, ValueError):
 77 |     """See defaults.py for valid schemas."""
 78 | 
 79 | 
 80 | class InvalidURL(RequestException, ValueError):
 81 |     """The URL provided was somehow invalid."""
 82 | 
 83 | 
 84 | class InvalidHeader(RequestException, ValueError):
 85 |     """The header value provided was somehow invalid."""
 86 | 
 87 | 
 88 | class InvalidProxyURL(InvalidURL):
 89 |     """The proxy URL provided is invalid."""
 90 | 
 91 | 
 92 | class ChunkedEncodingError(RequestException):
 93 |     """The server declared chunked encoding but sent an invalid chunk."""
 94 | 
 95 | 
 96 | class ContentDecodingError(RequestException, BaseHTTPError):
 97 |     """Failed to decode response content"""
 98 | 
 99 | 
100 | class StreamConsumedError(RequestException, TypeError):
101 |     """The content for this response was already consumed"""
102 | 
103 | 
104 | class RetryError(RequestException):
105 |     """Custom retries logic failed"""
106 | 
107 | 
108 | class UnrewindableBodyError(RequestException):
109 |     """Requests encountered an error when trying to rewind a body"""
110 | 
111 | # Warnings
112 | 
113 | 
114 | class RequestsWarning(Warning):
115 |     """Base warning for Requests."""
116 |     pass
117 | 
118 | 
119 | class FileModeWarning(RequestsWarning, DeprecationWarning):
120 |     """A file was opened in text mode, but Requests determined its binary length."""
121 |     pass
122 | 
123 | 
124 | class RequestsDependencyWarning(RequestsWarning):
125 |     """An imported dependency doesn't match the expected version range."""
126 |     pass
127 | 


--------------------------------------------------------------------------------
/idna/codec.py:
--------------------------------------------------------------------------------
  1 | from .core import encode, decode, alabel, ulabel, IDNAError
  2 | import codecs
  3 | import re
  4 | 
  5 | _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]')
  6 | 
  7 | class Codec(codecs.Codec):
  8 | 
  9 |     def encode(self, data, errors='strict'):
 10 | 
 11 |         if errors != 'strict':
 12 |             raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
 13 | 
 14 |         if not data:
 15 |             return "", 0
 16 | 
 17 |         return encode(data), len(data)
 18 | 
 19 |     def decode(self, data, errors='strict'):
 20 | 
 21 |         if errors != 'strict':
 22 |             raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
 23 | 
 24 |         if not data:
 25 |             return u"", 0
 26 | 
 27 |         return decode(data), len(data)
 28 | 
 29 | class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
 30 |     def _buffer_encode(self, data, errors, final):
 31 |         if errors != 'strict':
 32 |             raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
 33 | 
 34 |         if not data:
 35 |             return ("", 0)
 36 | 
 37 |         labels = _unicode_dots_re.split(data)
 38 |         trailing_dot = u''
 39 |         if labels:
 40 |             if not labels[-1]:
 41 |                 trailing_dot = '.'
 42 |                 del labels[-1]
 43 |             elif not final:
 44 |                 # Keep potentially unfinished label until the next call
 45 |                 del labels[-1]
 46 |                 if labels:
 47 |                     trailing_dot = '.'
 48 | 
 49 |         result = []
 50 |         size = 0
 51 |         for label in labels:
 52 |             result.append(alabel(label))
 53 |             if size:
 54 |                 size += 1
 55 |             size += len(label)
 56 | 
 57 |         # Join with U+002E
 58 |         result = ".".join(result) + trailing_dot
 59 |         size += len(trailing_dot)
 60 |         return (result, size)
 61 | 
 62 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
 63 |     def _buffer_decode(self, data, errors, final):
 64 |         if errors != 'strict':
 65 |             raise IDNAError("Unsupported error handling \"{0}\"".format(errors))
 66 | 
 67 |         if not data:
 68 |             return (u"", 0)
 69 | 
 70 |         # IDNA allows decoding to operate on Unicode strings, too.
 71 |         if isinstance(data, unicode):
 72 |             labels = _unicode_dots_re.split(data)
 73 |         else:
 74 |             # Must be ASCII string
 75 |             data = str(data)
 76 |             unicode(data, "ascii")
 77 |             labels = data.split(".")
 78 | 
 79 |         trailing_dot = u''
 80 |         if labels:
 81 |             if not labels[-1]:
 82 |                 trailing_dot = u'.'
 83 |                 del labels[-1]
 84 |             elif not final:
 85 |                 # Keep potentially unfinished label until the next call
 86 |                 del labels[-1]
 87 |                 if labels:
 88 |                     trailing_dot = u'.'
 89 | 
 90 |         result = []
 91 |         size = 0
 92 |         for label in labels:
 93 |             result.append(ulabel(label))
 94 |             if size:
 95 |                 size += 1
 96 |             size += len(label)
 97 | 
 98 |         result = u".".join(result) + trailing_dot
 99 |         size += len(trailing_dot)
100 |         return (result, size)
101 | 
102 | 
103 | class StreamWriter(Codec, codecs.StreamWriter):
104 |     pass
105 | 
106 | class StreamReader(Codec, codecs.StreamReader):
107 |     pass
108 | 
109 | def getregentry():
110 |     return codecs.CodecInfo(
111 |         name='idna',
112 |         encode=Codec().encode,
113 |         decode=Codec().decode,
114 |         incrementalencoder=IncrementalEncoder,
115 |         incrementaldecoder=IncrementalDecoder,
116 |         streamwriter=StreamWriter,
117 |         streamreader=StreamReader,
118 |     )
119 | 


--------------------------------------------------------------------------------
/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import logging
29 | 
30 | from .enums import MachineState
31 | 
32 | 
33 | class CodingStateMachine(object):
34 |     """
35 |     A state machine to verify a byte sequence for a particular encoding. For
36 |     each byte the detector receives, it will feed that byte to every active
37 |     state machine available, one byte at a time. The state machine changes its
38 |     state based on its previous state and the byte it receives. There are 3
39 |     states in a state machine that are of interest to an auto-detector:
40 | 
41 |     START state: This is the state to start with, or a legal byte sequence
42 |                  (i.e. a valid code point) for character has been identified.
43 | 
44 |     ME state:  This indicates that the state machine identified a byte sequence
45 |                that is specific to the charset it is designed for and that
46 |                there is no other possible encoding which can contain this byte
47 |                sequence. This will to lead to an immediate positive answer for
48 |                the detector.
49 | 
50 |     ERROR state: This indicates the state machine identified an illegal byte
51 |                  sequence for that encoding. This will lead to an immediate
52 |                  negative answer for this encoding. Detector will exclude this
53 |                  encoding from consideration from here on.
54 |     """
55 |     def __init__(self, sm):
56 |         self._model = sm
57 |         self._curr_byte_pos = 0
58 |         self._curr_char_len = 0
59 |         self._curr_state = None
60 |         self.logger = logging.getLogger(__name__)
61 |         self.reset()
62 | 
63 |     def reset(self):
64 |         self._curr_state = MachineState.START
65 | 
66 |     def next_state(self, c):
67 |         # for each byte we get its class
68 |         # if it is first byte, we also get byte length
69 |         byte_class = self._model['class_table'][c]
70 |         if self._curr_state == MachineState.START:
71 |             self._curr_byte_pos = 0
72 |             self._curr_char_len = self._model['char_len_table'][byte_class]
73 |         # from byte's class and state_table, we get its next state
74 |         curr_state = (self._curr_state * self._model['class_factor']
75 |                       + byte_class)
76 |         self._curr_state = self._model['state_table'][curr_state]
77 |         self._curr_byte_pos += 1
78 |         return self._curr_state
79 | 
80 |     def get_current_charlen(self):
81 |         return self._curr_char_len
82 | 
83 |     def get_coding_state_machine(self):
84 |         return self._model['name']
85 | 
86 |     @property
87 |     def language(self):
88 |         return self._model['language']
89 | 


--------------------------------------------------------------------------------
/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .enums import ProbingState, MachineState
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import EUCJPDistributionAnalysis
32 | from .jpcntx import EUCJPContextAnalysis
33 | from .mbcssm import EUCJP_SM_MODEL
34 | 
35 | 
36 | class EUCJPProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(EUCJPProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
40 |         self.distribution_analyzer = EUCJPDistributionAnalysis()
41 |         self.context_analyzer = EUCJPContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(EUCJPProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return "EUC-JP"
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
59 |             coding_state = self.coding_sm.next_state(byte_str[i])
60 |             if coding_state == MachineState.ERROR:
61 |                 self.logger.debug('%s %s prober hit error at byte %s',
62 |                                   self.charset_name, self.language, i)
63 |                 self._state = ProbingState.NOT_ME
64 |                 break
65 |             elif coding_state == MachineState.ITS_ME:
66 |                 self._state = ProbingState.FOUND_IT
67 |                 break
68 |             elif coding_state == MachineState.START:
69 |                 char_len = self.coding_sm.get_current_charlen()
70 |                 if i == 0:
71 |                     self._last_char[1] = byte_str[0]
72 |                     self.context_analyzer.feed(self._last_char, char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i - 1:i + 1],
76 |                                                 char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                      char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/requests/help.py:
--------------------------------------------------------------------------------
  1 | """Module containing bug report helper(s)."""
  2 | from __future__ import print_function
  3 | 
  4 | import json
  5 | import platform
  6 | import sys
  7 | import ssl
  8 | 
  9 | import idna
 10 | import urllib3
 11 | import chardet
 12 | 
 13 | from . import __version__ as requests_version
 14 | 
 15 | try:
 16 |     from urllib3.contrib import pyopenssl
 17 | except ImportError:
 18 |     pyopenssl = None
 19 |     OpenSSL = None
 20 |     cryptography = None
 21 | else:
 22 |     import OpenSSL
 23 |     import cryptography
 24 | 
 25 | 
 26 | def _implementation():
 27 |     """Return a dict with the Python implementation and version.
 28 | 
 29 |     Provide both the name and the version of the Python implementation
 30 |     currently running. For example, on CPython 2.7.5 it will return
 31 |     {'name': 'CPython', 'version': '2.7.5'}.
 32 | 
 33 |     This function works best on CPython and PyPy: in particular, it probably
 34 |     doesn't work for Jython or IronPython. Future investigation should be done
 35 |     to work out the correct shape of the code for those platforms.
 36 |     """
 37 |     implementation = platform.python_implementation()
 38 | 
 39 |     if implementation == 'CPython':
 40 |         implementation_version = platform.python_version()
 41 |     elif implementation == 'PyPy':
 42 |         implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major,
 43 |                                                sys.pypy_version_info.minor,
 44 |                                                sys.pypy_version_info.micro)
 45 |         if sys.pypy_version_info.releaselevel != 'final':
 46 |             implementation_version = ''.join([
 47 |                 implementation_version, sys.pypy_version_info.releaselevel
 48 |             ])
 49 |     elif implementation == 'Jython':
 50 |         implementation_version = platform.python_version()  # Complete Guess
 51 |     elif implementation == 'IronPython':
 52 |         implementation_version = platform.python_version()  # Complete Guess
 53 |     else:
 54 |         implementation_version = 'Unknown'
 55 | 
 56 |     return {'name': implementation, 'version': implementation_version}
 57 | 
 58 | 
 59 | def info():
 60 |     """Generate information for a bug report."""
 61 |     try:
 62 |         platform_info = {
 63 |             'system': platform.system(),
 64 |             'release': platform.release(),
 65 |         }
 66 |     except IOError:
 67 |         platform_info = {
 68 |             'system': 'Unknown',
 69 |             'release': 'Unknown',
 70 |         }
 71 | 
 72 |     implementation_info = _implementation()
 73 |     urllib3_info = {'version': urllib3.__version__}
 74 |     chardet_info = {'version': chardet.__version__}
 75 | 
 76 |     pyopenssl_info = {
 77 |         'version': None,
 78 |         'openssl_version': '',
 79 |     }
 80 |     if OpenSSL:
 81 |         pyopenssl_info = {
 82 |             'version': OpenSSL.__version__,
 83 |             'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER,
 84 |         }
 85 |     cryptography_info = {
 86 |         'version': getattr(cryptography, '__version__', ''),
 87 |     }
 88 |     idna_info = {
 89 |         'version': getattr(idna, '__version__', ''),
 90 |     }
 91 | 
 92 |     system_ssl = ssl.OPENSSL_VERSION_NUMBER
 93 |     system_ssl_info = {
 94 |         'version': '%x' % system_ssl if system_ssl is not None else ''
 95 |     }
 96 | 
 97 |     return {
 98 |         'platform': platform_info,
 99 |         'implementation': implementation_info,
100 |         'system_ssl': system_ssl_info,
101 |         'using_pyopenssl': pyopenssl is not None,
102 |         'pyOpenSSL': pyopenssl_info,
103 |         'urllib3': urllib3_info,
104 |         'chardet': chardet_info,
105 |         'cryptography': cryptography_info,
106 |         'idna': idna_info,
107 |         'requests': {
108 |             'version': requests_version,
109 |         },
110 |     }
111 | 
112 | 
113 | def main():
114 |     """Pretty-print the bug information as JSON."""
115 |     print(json.dumps(info(), sort_keys=True, indent=2))
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     main()
120 | 


--------------------------------------------------------------------------------
/chardet/sjisprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import SJISDistributionAnalysis
31 | from .jpcntx import SJISContextAnalysis
32 | from .mbcssm import SJIS_SM_MODEL
33 | from .enums import ProbingState, MachineState
34 | 
35 | 
36 | class SJISProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(SJISProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
40 |         self.distribution_analyzer = SJISDistributionAnalysis()
41 |         self.context_analyzer = SJISContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(SJISProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return self.context_analyzer.charset_name
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             coding_state = self.coding_sm.next_state(byte_str[i])
59 |             if coding_state == MachineState.ERROR:
60 |                 self.logger.debug('%s %s prober hit error at byte %s',
61 |                                   self.charset_name, self.language, i)
62 |                 self._state = ProbingState.NOT_ME
63 |                 break
64 |             elif coding_state == MachineState.ITS_ME:
65 |                 self._state = ProbingState.FOUND_IT
66 |                 break
67 |             elif coding_state == MachineState.START:
68 |                 char_len = self.coding_sm.get_current_charlen()
69 |                 if i == 0:
70 |                     self._last_char[1] = byte_str[0]
71 |                     self.context_analyzer.feed(self._last_char[2 - char_len:],
72 |                                                char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
76 |                                                         - char_len], char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                     char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .enums import ProbingState
 29 | from .charsetprober import CharSetProber
 30 | 
 31 | 
 32 | class CharSetGroupProber(CharSetProber):
 33 |     def __init__(self, lang_filter=None):
 34 |         super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
 35 |         self._active_num = 0
 36 |         self.probers = []
 37 |         self._best_guess_prober = None
 38 | 
 39 |     def reset(self):
 40 |         super(CharSetGroupProber, self).reset()
 41 |         self._active_num = 0
 42 |         for prober in self.probers:
 43 |             if prober:
 44 |                 prober.reset()
 45 |                 prober.active = True
 46 |                 self._active_num += 1
 47 |         self._best_guess_prober = None
 48 | 
 49 |     @property
 50 |     def charset_name(self):
 51 |         if not self._best_guess_prober:
 52 |             self.get_confidence()
 53 |             if not self._best_guess_prober:
 54 |                 return None
 55 |         return self._best_guess_prober.charset_name
 56 | 
 57 |     @property
 58 |     def language(self):
 59 |         if not self._best_guess_prober:
 60 |             self.get_confidence()
 61 |             if not self._best_guess_prober:
 62 |                 return None
 63 |         return self._best_guess_prober.language
 64 | 
 65 |     def feed(self, byte_str):
 66 |         for prober in self.probers:
 67 |             if not prober:
 68 |                 continue
 69 |             if not prober.active:
 70 |                 continue
 71 |             state = prober.feed(byte_str)
 72 |             if not state:
 73 |                 continue
 74 |             if state == ProbingState.FOUND_IT:
 75 |                 self._best_guess_prober = prober
 76 |                 return self.state
 77 |             elif state == ProbingState.NOT_ME:
 78 |                 prober.active = False
 79 |                 self._active_num -= 1
 80 |                 if self._active_num <= 0:
 81 |                     self._state = ProbingState.NOT_ME
 82 |                     return self.state
 83 |         return self.state
 84 | 
 85 |     def get_confidence(self):
 86 |         state = self.state
 87 |         if state == ProbingState.FOUND_IT:
 88 |             return 0.99
 89 |         elif state == ProbingState.NOT_ME:
 90 |             return 0.01
 91 |         best_conf = 0.0
 92 |         self._best_guess_prober = None
 93 |         for prober in self.probers:
 94 |             if not prober:
 95 |                 continue
 96 |             if not prober.active:
 97 |                 self.logger.debug('%s not active', prober.charset_name)
 98 |                 continue
 99 |             conf = prober.get_confidence()
100 |             self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
101 |             if best_conf < conf:
102 |                 best_conf = conf
103 |                 self._best_guess_prober = prober
104 |         if not self._best_guess_prober:
105 |             return 0.0
106 |         return best_conf
107 | 


--------------------------------------------------------------------------------
/chardet/escprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is mozilla.org code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .charsetprober import CharSetProber
 29 | from .codingstatemachine import CodingStateMachine
 30 | from .enums import LanguageFilter, ProbingState, MachineState
 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
 32 |                     ISO2022KR_SM_MODEL)
 33 | 
 34 | 
 35 | class EscCharSetProber(CharSetProber):
 36 |     """
 37 |     This CharSetProber uses a "code scheme" approach for detecting encodings,
 38 |     whereby easily recognizable escape or shift sequences are relied on to
 39 |     identify these encodings.
 40 |     """
 41 | 
 42 |     def __init__(self, lang_filter=None):
 43 |         super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
 44 |         self.coding_sm = []
 45 |         if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
 46 |             self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
 47 |             self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
 48 |         if self.lang_filter & LanguageFilter.JAPANESE:
 49 |             self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
 50 |         if self.lang_filter & LanguageFilter.KOREAN:
 51 |             self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
 52 |         self.active_sm_count = None
 53 |         self._detected_charset = None
 54 |         self._detected_language = None
 55 |         self._state = None
 56 |         self.reset()
 57 | 
 58 |     def reset(self):
 59 |         super(EscCharSetProber, self).reset()
 60 |         for coding_sm in self.coding_sm:
 61 |             if not coding_sm:
 62 |                 continue
 63 |             coding_sm.active = True
 64 |             coding_sm.reset()
 65 |         self.active_sm_count = len(self.coding_sm)
 66 |         self._detected_charset = None
 67 |         self._detected_language = None
 68 | 
 69 |     @property
 70 |     def charset_name(self):
 71 |         return self._detected_charset
 72 | 
 73 |     @property
 74 |     def language(self):
 75 |         return self._detected_language
 76 | 
 77 |     def get_confidence(self):
 78 |         if self._detected_charset:
 79 |             return 0.99
 80 |         else:
 81 |             return 0.00
 82 | 
 83 |     def feed(self, byte_str):
 84 |         for c in byte_str:
 85 |             for coding_sm in self.coding_sm:
 86 |                 if not coding_sm or not coding_sm.active:
 87 |                     continue
 88 |                 coding_state = coding_sm.next_state(c)
 89 |                 if coding_state == MachineState.ERROR:
 90 |                     coding_sm.active = False
 91 |                     self.active_sm_count -= 1
 92 |                     if self.active_sm_count <= 0:
 93 |                         self._state = ProbingState.NOT_ME
 94 |                         return self.state
 95 |                 elif coding_state == MachineState.ITS_ME:
 96 |                     self._state = ProbingState.FOUND_IT
 97 |                     self._detected_charset = coding_sm.get_coding_state_machine()
 98 |                     self._detected_language = coding_sm.language
 99 |                     return self.state
100 | 
101 |         return self.state
102 | 


--------------------------------------------------------------------------------
/urllib3/util/request.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from base64 import b64encode
  3 | 
  4 | from ..packages.six import b, integer_types
  5 | from ..exceptions import UnrewindableBodyError
  6 | 
  7 | ACCEPT_ENCODING = "gzip,deflate"
  8 | try:
  9 |     import brotli as _unused_module_brotli  # noqa: F401
 10 | except ImportError:
 11 |     pass
 12 | else:
 13 |     ACCEPT_ENCODING += ",br"
 14 | 
 15 | _FAILEDTELL = object()
 16 | 
 17 | 
 18 | def make_headers(
 19 |     keep_alive=None,
 20 |     accept_encoding=None,
 21 |     user_agent=None,
 22 |     basic_auth=None,
 23 |     proxy_basic_auth=None,
 24 |     disable_cache=None,
 25 | ):
 26 |     """
 27 |     Shortcuts for generating request headers.
 28 | 
 29 |     :param keep_alive:
 30 |         If ``True``, adds 'connection: keep-alive' header.
 31 | 
 32 |     :param accept_encoding:
 33 |         Can be a boolean, list, or string.
 34 |         ``True`` translates to 'gzip,deflate'.
 35 |         List will get joined by comma.
 36 |         String will be used as provided.
 37 | 
 38 |     :param user_agent:
 39 |         String representing the user-agent you want, such as
 40 |         "python-urllib3/0.6"
 41 | 
 42 |     :param basic_auth:
 43 |         Colon-separated username:password string for 'authorization: basic ...'
 44 |         auth header.
 45 | 
 46 |     :param proxy_basic_auth:
 47 |         Colon-separated username:password string for 'proxy-authorization: basic ...'
 48 |         auth header.
 49 | 
 50 |     :param disable_cache:
 51 |         If ``True``, adds 'cache-control: no-cache' header.
 52 | 
 53 |     Example::
 54 | 
 55 |         >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
 56 |         {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
 57 |         >>> make_headers(accept_encoding=True)
 58 |         {'accept-encoding': 'gzip,deflate'}
 59 |     """
 60 |     headers = {}
 61 |     if accept_encoding:
 62 |         if isinstance(accept_encoding, str):
 63 |             pass
 64 |         elif isinstance(accept_encoding, list):
 65 |             accept_encoding = ",".join(accept_encoding)
 66 |         else:
 67 |             accept_encoding = ACCEPT_ENCODING
 68 |         headers["accept-encoding"] = accept_encoding
 69 | 
 70 |     if user_agent:
 71 |         headers["user-agent"] = user_agent
 72 | 
 73 |     if keep_alive:
 74 |         headers["connection"] = "keep-alive"
 75 | 
 76 |     if basic_auth:
 77 |         headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8")
 78 | 
 79 |     if proxy_basic_auth:
 80 |         headers["proxy-authorization"] = "Basic " + b64encode(
 81 |             b(proxy_basic_auth)
 82 |         ).decode("utf-8")
 83 | 
 84 |     if disable_cache:
 85 |         headers["cache-control"] = "no-cache"
 86 | 
 87 |     return headers
 88 | 
 89 | 
 90 | def set_file_position(body, pos):
 91 |     """
 92 |     If a position is provided, move file to that point.
 93 |     Otherwise, we'll attempt to record a position for future use.
 94 |     """
 95 |     if pos is not None:
 96 |         rewind_body(body, pos)
 97 |     elif getattr(body, "tell", None) is not None:
 98 |         try:
 99 |             pos = body.tell()
100 |         except (IOError, OSError):
101 |             # This differentiates from None, allowing us to catch
102 |             # a failed `tell()` later when trying to rewind the body.
103 |             pos = _FAILEDTELL
104 | 
105 |     return pos
106 | 
107 | 
108 | def rewind_body(body, body_pos):
109 |     """
110 |     Attempt to rewind body to a certain position.
111 |     Primarily used for request redirects and retries.
112 | 
113 |     :param body:
114 |         File-like object that supports seek.
115 | 
116 |     :param int pos:
117 |         Position to seek to in file.
118 |     """
119 |     body_seek = getattr(body, "seek", None)
120 |     if body_seek is not None and isinstance(body_pos, integer_types):
121 |         try:
122 |             body_seek(body_pos)
123 |         except (IOError, OSError):
124 |             raise UnrewindableBodyError(
125 |                 "An error occurred when rewinding request body for redirect/retry."
126 |             )
127 |     elif body_pos is _FAILEDTELL:
128 |         raise UnrewindableBodyError(
129 |             "Unable to record file position for rewinding "
130 |             "request body during a redirect/retry."
131 |         )
132 |     else:
133 |         raise ValueError(
134 |             "body_pos must be of type integer, instead it was %s." % type(body_pos)
135 |         )
136 | 


--------------------------------------------------------------------------------
/requests/__init__.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | #   __
  4 | #  /__)  _  _     _   _ _/   _
  5 | # / (   (- (/ (/ (- _)  /  _)
  6 | #          /
  7 | 
  8 | """
  9 | Requests HTTP Library
 10 | ~~~~~~~~~~~~~~~~~~~~~
 11 | 
 12 | Requests is an HTTP library, written in Python, for human beings.
 13 | Basic GET usage:
 14 | 
 15 |    >>> import requests
 16 |    >>> r = requests.get('https://www.python.org')
 17 |    >>> r.status_code
 18 |    200
 19 |    >>> b'Python is a programming language' in r.content
 20 |    True
 21 | 
 22 | ... or POST:
 23 | 
 24 |    >>> payload = dict(key1='value1', key2='value2')
 25 |    >>> r = requests.post('https://httpbin.org/post', data=payload)
 26 |    >>> print(r.text)
 27 |    {
 28 |      ...
 29 |      "form": {
 30 |        "key1": "value1",
 31 |        "key2": "value2"
 32 |      },
 33 |      ...
 34 |    }
 35 | 
 36 | The other HTTP methods are supported - see `requests.api`. Full documentation
 37 | is at <https://requests.readthedocs.io>.
 38 | 
 39 | :copyright: (c) 2017 by Kenneth Reitz.
 40 | :license: Apache 2.0, see LICENSE for more details.
 41 | """
 42 | 
 43 | import urllib3
 44 | import chardet
 45 | import warnings
 46 | from .exceptions import RequestsDependencyWarning
 47 | 
 48 | 
 49 | def check_compatibility(urllib3_version, chardet_version):
 50 |     urllib3_version = urllib3_version.split('.')
 51 |     assert urllib3_version != ['dev']  # Verify urllib3 isn't installed from git.
 52 | 
 53 |     # Sometimes, urllib3 only reports its version as 16.1.
 54 |     if len(urllib3_version) == 2:
 55 |         urllib3_version.append('0')
 56 | 
 57 |     # Check urllib3 for compatibility.
 58 |     major, minor, patch = urllib3_version  # noqa: F811
 59 |     major, minor, patch = int(major), int(minor), int(patch)
 60 |     # urllib3 >= 1.21.1, <= 1.25
 61 |     assert major == 1
 62 |     assert minor >= 21
 63 |     assert minor <= 25
 64 | 
 65 |     # Check chardet for compatibility.
 66 |     major, minor, patch = chardet_version.split('.')[:3]
 67 |     major, minor, patch = int(major), int(minor), int(patch)
 68 |     # chardet >= 3.0.2, < 3.1.0
 69 |     assert major == 3
 70 |     assert minor < 1
 71 |     assert patch >= 2
 72 | 
 73 | 
 74 | def _check_cryptography(cryptography_version):
 75 |     # cryptography < 1.3.4
 76 |     try:
 77 |         cryptography_version = list(map(int, cryptography_version.split('.')))
 78 |     except ValueError:
 79 |         return
 80 | 
 81 |     if cryptography_version < [1, 3, 4]:
 82 |         warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version)
 83 |         warnings.warn(warning, RequestsDependencyWarning)
 84 | 
 85 | # Check imported dependencies for compatibility.
 86 | try:
 87 |     check_compatibility(urllib3.__version__, chardet.__version__)
 88 | except (AssertionError, ValueError):
 89 |     warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported "
 90 |                   "version!".format(urllib3.__version__, chardet.__version__),
 91 |                   RequestsDependencyWarning)
 92 | 
 93 | # Attempt to enable urllib3's SNI support, if possible
 94 | try:
 95 |     from urllib3.contrib import pyopenssl
 96 |     pyopenssl.inject_into_urllib3()
 97 | 
 98 |     # Check cryptography version
 99 |     from cryptography import __version__ as cryptography_version
100 |     _check_cryptography(cryptography_version)
101 | except ImportError:
102 |     pass
103 | 
104 | # urllib3's DependencyWarnings should be silenced.
105 | from urllib3.exceptions import DependencyWarning
106 | warnings.simplefilter('ignore', DependencyWarning)
107 | 
108 | from .__version__ import __title__, __description__, __url__, __version__
109 | from .__version__ import __build__, __author__, __author_email__, __license__
110 | from .__version__ import __copyright__, __cake__
111 | 
112 | from . import utils
113 | from . import packages
114 | from .models import Request, Response, PreparedRequest
115 | from .api import request, get, head, post, patch, put, delete, options
116 | from .sessions import session, Session
117 | from .status_codes import codes
118 | from .exceptions import (
119 |     RequestException, Timeout, URLRequired,
120 |     TooManyRedirects, HTTPError, ConnectionError,
121 |     FileModeWarning, ConnectTimeout, ReadTimeout
122 | )
123 | 
124 | # Set default logging handler to avoid "No handler found" warnings.
125 | import logging
126 | from logging import NullHandler
127 | 
128 | logging.getLogger(__name__).addHandler(NullHandler())
129 | 
130 | # FileModeWarnings go off per the default.
131 | warnings.simplefilter('default', FileModeWarning, append=True)
132 | 


--------------------------------------------------------------------------------
/urllib3/contrib/ntlmpool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NTLM authenticating pool, contributed by erikcederstran
  3 | 
  4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
  5 | """
  6 | from __future__ import absolute_import
  7 | 
  8 | from logging import getLogger
  9 | from ntlm import ntlm
 10 | 
 11 | from .. import HTTPSConnectionPool
 12 | from ..packages.six.moves.http_client import HTTPSConnection
 13 | 
 14 | 
 15 | log = getLogger(__name__)
 16 | 
 17 | 
 18 | class NTLMConnectionPool(HTTPSConnectionPool):
 19 |     """
 20 |     Implements an NTLM authentication version of an urllib3 connection pool
 21 |     """
 22 | 
 23 |     scheme = "https"
 24 | 
 25 |     def __init__(self, user, pw, authurl, *args, **kwargs):
 26 |         """
 27 |         authurl is a random URL on the server that is protected by NTLM.
 28 |         user is the Windows user, probably in the DOMAIN\\username format.
 29 |         pw is the password for the user.
 30 |         """
 31 |         super(NTLMConnectionPool, self).__init__(*args, **kwargs)
 32 |         self.authurl = authurl
 33 |         self.rawuser = user
 34 |         user_parts = user.split("\\", 1)
 35 |         self.domain = user_parts[0].upper()
 36 |         self.user = user_parts[1]
 37 |         self.pw = pw
 38 | 
 39 |     def _new_conn(self):
 40 |         # Performs the NTLM handshake that secures the connection. The socket
 41 |         # must be kept open while requests are performed.
 42 |         self.num_connections += 1
 43 |         log.debug(
 44 |             "Starting NTLM HTTPS connection no. %d: https://%s%s",
 45 |             self.num_connections,
 46 |             self.host,
 47 |             self.authurl,
 48 |         )
 49 | 
 50 |         headers = {"Connection": "Keep-Alive"}
 51 |         req_header = "Authorization"
 52 |         resp_header = "www-authenticate"
 53 | 
 54 |         conn = HTTPSConnection(host=self.host, port=self.port)
 55 | 
 56 |         # Send negotiation message
 57 |         headers[req_header] = "NTLM %s" % ntlm.create_NTLM_NEGOTIATE_MESSAGE(
 58 |             self.rawuser
 59 |         )
 60 |         log.debug("Request headers: %s", headers)
 61 |         conn.request("GET", self.authurl, None, headers)
 62 |         res = conn.getresponse()
 63 |         reshdr = dict(res.getheaders())
 64 |         log.debug("Response status: %s %s", res.status, res.reason)
 65 |         log.debug("Response headers: %s", reshdr)
 66 |         log.debug("Response data: %s [...]", res.read(100))
 67 | 
 68 |         # Remove the reference to the socket, so that it can not be closed by
 69 |         # the response object (we want to keep the socket open)
 70 |         res.fp = None
 71 | 
 72 |         # Server should respond with a challenge message
 73 |         auth_header_values = reshdr[resp_header].split(", ")
 74 |         auth_header_value = None
 75 |         for s in auth_header_values:
 76 |             if s[:5] == "NTLM ":
 77 |                 auth_header_value = s[5:]
 78 |         if auth_header_value is None:
 79 |             raise Exception(
 80 |                 "Unexpected %s response header: %s" % (resp_header, reshdr[resp_header])
 81 |             )
 82 | 
 83 |         # Send authentication message
 84 |         ServerChallenge, NegotiateFlags = ntlm.parse_NTLM_CHALLENGE_MESSAGE(
 85 |             auth_header_value
 86 |         )
 87 |         auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(
 88 |             ServerChallenge, self.user, self.domain, self.pw, NegotiateFlags
 89 |         )
 90 |         headers[req_header] = "NTLM %s" % auth_msg
 91 |         log.debug("Request headers: %s", headers)
 92 |         conn.request("GET", self.authurl, None, headers)
 93 |         res = conn.getresponse()
 94 |         log.debug("Response status: %s %s", res.status, res.reason)
 95 |         log.debug("Response headers: %s", dict(res.getheaders()))
 96 |         log.debug("Response data: %s [...]", res.read()[:100])
 97 |         if res.status != 200:
 98 |             if res.status == 401:
 99 |                 raise Exception("Server rejected request: wrong username or password")
100 |             raise Exception("Wrong server response: %s %s" % (res.status, res.reason))
101 | 
102 |         res.fp = None
103 |         log.debug("Connection established")
104 |         return conn
105 | 
106 |     def urlopen(
107 |         self,
108 |         method,
109 |         url,
110 |         body=None,
111 |         headers=None,
112 |         retries=3,
113 |         redirect=True,
114 |         assert_same_host=True,
115 |     ):
116 |         if headers is None:
117 |             headers = {}
118 |         headers["Connection"] = "Keep-Alive"
119 |         return super(NTLMConnectionPool, self).urlopen(
120 |             method, url, body, headers, retries, redirect, assert_same_host
121 |         )
122 | 


--------------------------------------------------------------------------------
/requests/status_codes.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | r"""
  4 | The ``codes`` object defines a mapping from common names for HTTP statuses
  5 | to their numerical codes, accessible either as attributes or as dictionary
  6 | items.
  7 | 
  8 | Example::
  9 | 
 10 |     >>> import requests
 11 |     >>> requests.codes['temporary_redirect']
 12 |     307
 13 |     >>> requests.codes.teapot
 14 |     418
 15 |     >>> requests.codes['\o/']
 16 |     200
 17 | 
 18 | Some codes have multiple names, and both upper- and lower-case versions of
 19 | the names are allowed. For example, ``codes.ok``, ``codes.OK``, and
 20 | ``codes.okay`` all correspond to the HTTP status code 200.
 21 | """
 22 | 
 23 | from .structures import LookupDict
 24 | 
 25 | _codes = {
 26 | 
 27 |     # Informational.
 28 |     100: ('continue',),
 29 |     101: ('switching_protocols',),
 30 |     102: ('processing',),
 31 |     103: ('checkpoint',),
 32 |     122: ('uri_too_long', 'request_uri_too_long'),
 33 |     200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
 34 |     201: ('created',),
 35 |     202: ('accepted',),
 36 |     203: ('non_authoritative_info', 'non_authoritative_information'),
 37 |     204: ('no_content',),
 38 |     205: ('reset_content', 'reset'),
 39 |     206: ('partial_content', 'partial'),
 40 |     207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
 41 |     208: ('already_reported',),
 42 |     226: ('im_used',),
 43 | 
 44 |     # Redirection.
 45 |     300: ('multiple_choices',),
 46 |     301: ('moved_permanently', 'moved', '\\o-'),
 47 |     302: ('found',),
 48 |     303: ('see_other', 'other'),
 49 |     304: ('not_modified',),
 50 |     305: ('use_proxy',),
 51 |     306: ('switch_proxy',),
 52 |     307: ('temporary_redirect', 'temporary_moved', 'temporary'),
 53 |     308: ('permanent_redirect',
 54 |           'resume_incomplete', 'resume',),  # These 2 to be removed in 3.0
 55 | 
 56 |     # Client Error.
 57 |     400: ('bad_request', 'bad'),
 58 |     401: ('unauthorized',),
 59 |     402: ('payment_required', 'payment'),
 60 |     403: ('forbidden',),
 61 |     404: ('not_found', '-o-'),
 62 |     405: ('method_not_allowed', 'not_allowed'),
 63 |     406: ('not_acceptable',),
 64 |     407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
 65 |     408: ('request_timeout', 'timeout'),
 66 |     409: ('conflict',),
 67 |     410: ('gone',),
 68 |     411: ('length_required',),
 69 |     412: ('precondition_failed', 'precondition'),
 70 |     413: ('request_entity_too_large',),
 71 |     414: ('request_uri_too_large',),
 72 |     415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
 73 |     416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
 74 |     417: ('expectation_failed',),
 75 |     418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
 76 |     421: ('misdirected_request',),
 77 |     422: ('unprocessable_entity', 'unprocessable'),
 78 |     423: ('locked',),
 79 |     424: ('failed_dependency', 'dependency'),
 80 |     425: ('unordered_collection', 'unordered'),
 81 |     426: ('upgrade_required', 'upgrade'),
 82 |     428: ('precondition_required', 'precondition'),
 83 |     429: ('too_many_requests', 'too_many'),
 84 |     431: ('header_fields_too_large', 'fields_too_large'),
 85 |     444: ('no_response', 'none'),
 86 |     449: ('retry_with', 'retry'),
 87 |     450: ('blocked_by_windows_parental_controls', 'parental_controls'),
 88 |     451: ('unavailable_for_legal_reasons', 'legal_reasons'),
 89 |     499: ('client_closed_request',),
 90 | 
 91 |     # Server Error.
 92 |     500: ('internal_server_error', 'server_error', '/o\\', '✗'),
 93 |     501: ('not_implemented',),
 94 |     502: ('bad_gateway',),
 95 |     503: ('service_unavailable', 'unavailable'),
 96 |     504: ('gateway_timeout',),
 97 |     505: ('http_version_not_supported', 'http_version'),
 98 |     506: ('variant_also_negotiates',),
 99 |     507: ('insufficient_storage',),
100 |     509: ('bandwidth_limit_exceeded', 'bandwidth'),
101 |     510: ('not_extended',),
102 |     511: ('network_authentication_required', 'network_auth', 'network_authentication'),
103 | }
104 | 
105 | codes = LookupDict(name='status_codes')
106 | 
107 | def _init():
108 |     for code, titles in _codes.items():
109 |         for title in titles:
110 |             setattr(codes, title, code)
111 |             if not title.startswith(('\\', '/')):
112 |                 setattr(codes, title.upper(), code)
113 | 
114 |     def doc(code):
115 |         names = ', '.join('``%s``' % n for n in _codes[code])
116 |         return '* %d: %s' % (code, names)
117 | 
118 |     global __doc__
119 |     __doc__ = (__doc__ + '\n' +
120 |                '\n'.join(doc(code) for code in sorted(_codes))
121 |                if __doc__ is not None else None)
122 | 
123 | _init()
124 | 


--------------------------------------------------------------------------------
/urllib3/util/connection.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import socket
  3 | from .wait import NoWayToWaitForSocketError, wait_for_read
  4 | from ..contrib import _appengine_environ
  5 | 
  6 | 
  7 | def is_connection_dropped(conn):  # Platform-specific
  8 |     """
  9 |     Returns True if the connection is dropped and should be closed.
 10 | 
 11 |     :param conn:
 12 |         :class:`httplib.HTTPConnection` object.
 13 | 
 14 |     Note: For platforms like AppEngine, this will always return ``False`` to
 15 |     let the platform handle connection recycling transparently for us.
 16 |     """
 17 |     sock = getattr(conn, "sock", False)
 18 |     if sock is False:  # Platform-specific: AppEngine
 19 |         return False
 20 |     if sock is None:  # Connection already closed (such as by httplib).
 21 |         return True
 22 |     try:
 23 |         # Returns True if readable, which here means it's been dropped
 24 |         return wait_for_read(sock, timeout=0.0)
 25 |     except NoWayToWaitForSocketError:  # Platform-specific: AppEngine
 26 |         return False
 27 | 
 28 | 
 29 | # This function is copied from socket.py in the Python 2.7 standard
 30 | # library test suite. Added to its signature is only `socket_options`.
 31 | # One additional modification is that we avoid binding to IPv6 servers
 32 | # discovered in DNS if the system doesn't have IPv6 functionality.
 33 | def create_connection(
 34 |     address,
 35 |     timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
 36 |     source_address=None,
 37 |     socket_options=None,
 38 | ):
 39 |     """Connect to *address* and return the socket object.
 40 | 
 41 |     Convenience function.  Connect to *address* (a 2-tuple ``(host,
 42 |     port)``) and return the socket object.  Passing the optional
 43 |     *timeout* parameter will set the timeout on the socket instance
 44 |     before attempting to connect.  If no *timeout* is supplied, the
 45 |     global default timeout setting returned by :func:`getdefaulttimeout`
 46 |     is used.  If *source_address* is set it must be a tuple of (host, port)
 47 |     for the socket to bind as a source address before making the connection.
 48 |     An host of '' or port 0 tells the OS to use the default.
 49 |     """
 50 | 
 51 |     host, port = address
 52 |     if host.startswith("["):
 53 |         host = host.strip("[]")
 54 |     err = None
 55 | 
 56 |     # Using the value from allowed_gai_family() in the context of getaddrinfo lets
 57 |     # us select whether to work with IPv4 DNS records, IPv6 records, or both.
 58 |     # The original create_connection function always returns all records.
 59 |     family = allowed_gai_family()
 60 | 
 61 |     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
 62 |         af, socktype, proto, canonname, sa = res
 63 |         sock = None
 64 |         try:
 65 |             sock = socket.socket(af, socktype, proto)
 66 | 
 67 |             # If provided, set socket level options before connecting.
 68 |             _set_socket_options(sock, socket_options)
 69 | 
 70 |             if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 71 |                 sock.settimeout(timeout)
 72 |             if source_address:
 73 |                 sock.bind(source_address)
 74 |             sock.connect(sa)
 75 |             return sock
 76 | 
 77 |         except socket.error as e:
 78 |             err = e
 79 |             if sock is not None:
 80 |                 sock.close()
 81 |                 sock = None
 82 | 
 83 |     if err is not None:
 84 |         raise err
 85 | 
 86 |     raise socket.error("getaddrinfo returns an empty list")
 87 | 
 88 | 
 89 | def _set_socket_options(sock, options):
 90 |     if options is None:
 91 |         return
 92 | 
 93 |     for opt in options:
 94 |         sock.setsockopt(*opt)
 95 | 
 96 | 
 97 | def allowed_gai_family():
 98 |     """This function is designed to work in the context of
 99 |     getaddrinfo, where family=socket.AF_UNSPEC is the default and
100 |     will perform a DNS search for both IPv6 and IPv4 records."""
101 | 
102 |     family = socket.AF_INET
103 |     if HAS_IPV6:
104 |         family = socket.AF_UNSPEC
105 |     return family
106 | 
107 | 
108 | def _has_ipv6(host):
109 |     """ Returns True if the system can bind an IPv6 address. """
110 |     sock = None
111 |     has_ipv6 = False
112 | 
113 |     # App Engine doesn't support IPV6 sockets and actually has a quota on the
114 |     # number of sockets that can be used, so just early out here instead of
115 |     # creating a socket needlessly.
116 |     # See https://github.com/urllib3/urllib3/issues/1446
117 |     if _appengine_environ.is_appengine_sandbox():
118 |         return False
119 | 
120 |     if socket.has_ipv6:
121 |         # has_ipv6 returns true if cPython was compiled with IPv6 support.
122 |         # It does not tell us if the system has IPv6 support enabled. To
123 |         # determine that we must bind to an IPv6 address.
124 |         # https://github.com/urllib3/urllib3/pull/611
125 |         # https://bugs.python.org/issue658327
126 |         try:
127 |             sock = socket.socket(socket.AF_INET6)
128 |             sock.bind((host, 0))
129 |             has_ipv6 = True
130 |         except Exception:
131 |             pass
132 | 
133 |     if sock:
134 |         sock.close()
135 |     return has_ipv6
136 | 
137 | 
138 | HAS_IPV6 = _has_ipv6("::1")
139 | 


--------------------------------------------------------------------------------
/chardet/charsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | import logging
 30 | import re
 31 | 
 32 | from .enums import ProbingState
 33 | 
 34 | 
 35 | class CharSetProber(object):
 36 | 
 37 |     SHORTCUT_THRESHOLD = 0.95
 38 | 
 39 |     def __init__(self, lang_filter=None):
 40 |         self._state = None
 41 |         self.lang_filter = lang_filter
 42 |         self.logger = logging.getLogger(__name__)
 43 | 
 44 |     def reset(self):
 45 |         self._state = ProbingState.DETECTING
 46 | 
 47 |     @property
 48 |     def charset_name(self):
 49 |         return None
 50 | 
 51 |     def feed(self, buf):
 52 |         pass
 53 | 
 54 |     @property
 55 |     def state(self):
 56 |         return self._state
 57 | 
 58 |     def get_confidence(self):
 59 |         return 0.0
 60 | 
 61 |     @staticmethod
 62 |     def filter_high_byte_only(buf):
 63 |         buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
 64 |         return buf
 65 | 
 66 |     @staticmethod
 67 |     def filter_international_words(buf):
 68 |         """
 69 |         We define three types of bytes:
 70 |         alphabet: english alphabets [a-zA-Z]
 71 |         international: international characters [\x80-\xFF]
 72 |         marker: everything else [^a-zA-Z\x80-\xFF]
 73 | 
 74 |         The input buffer can be thought to contain a series of words delimited
 75 |         by markers. This function works to filter all words that contain at
 76 |         least one international character. All contiguous sequences of markers
 77 |         are replaced by a single space ascii character.
 78 | 
 79 |         This filter applies to all scripts which do not use English characters.
 80 |         """
 81 |         filtered = bytearray()
 82 | 
 83 |         # This regex expression filters out only words that have at-least one
 84 |         # international character. The word may include one marker character at
 85 |         # the end.
 86 |         words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
 87 |                            buf)
 88 | 
 89 |         for word in words:
 90 |             filtered.extend(word[:-1])
 91 | 
 92 |             # If the last character in the word is a marker, replace it with a
 93 |             # space as markers shouldn't affect our analysis (they are used
 94 |             # similarly across all languages and may thus have similar
 95 |             # frequencies).
 96 |             last_char = word[-1:]
 97 |             if not last_char.isalpha() and last_char < b'\x80':
 98 |                 last_char = b' '
 99 |             filtered.extend(last_char)
100 | 
101 |         return filtered
102 | 
103 |     @staticmethod
104 |     def filter_with_english_letters(buf):
105 |         """
106 |         Returns a copy of ``buf`` that retains only the sequences of English
107 |         alphabet and high byte characters that are not between <> characters.
108 |         Also retains English alphabet and high byte characters immediately
109 |         before occurrences of >.
110 | 
111 |         This filter can be applied to all scripts which contain both English
112 |         characters and extended ASCII characters, but is currently only used by
113 |         ``Latin1Prober``.
114 |         """
115 |         filtered = bytearray()
116 |         in_tag = False
117 |         prev = 0
118 | 
119 |         for curr in range(len(buf)):
120 |             # Slice here to get bytes instead of an int with Python 3
121 |             buf_char = buf[curr:curr + 1]
122 |             # Check if we're coming out of or entering an HTML tag
123 |             if buf_char == b'>':
124 |                 in_tag = False
125 |             elif buf_char == b'<':
126 |                 in_tag = True
127 | 
128 |             # If current character is not extended-ASCII and not alphabetic...
129 |             if buf_char < b'\x80' and not buf_char.isalpha():
130 |                 # ...and we're not in a tag
131 |                 if curr > prev and not in_tag:
132 |                     # Keep everything after last non-extended-ASCII,
133 |                     # non-alphabetic character
134 |                     filtered.extend(buf[prev:curr])
135 |                     # Output a space to delimit stretch we kept
136 |                     filtered.extend(b' ')
137 |                 prev = curr + 1
138 | 
139 |         # If we're not in a tag...
140 |         if not in_tag:
141 |             # Keep everything after last non-extended-ASCII, non-alphabetic
142 |             # character
143 |             filtered.extend(buf[prev:])
144 | 
145 |         return filtered
146 | 


--------------------------------------------------------------------------------
/chardet/latin1prober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import ProbingState
 31 | 
 32 | FREQ_CAT_NUM = 4
 33 | 
 34 | UDF = 0  # undefined
 35 | OTH = 1  # other
 36 | ASC = 2  # ascii capital letter
 37 | ASS = 3  # ascii small letter
 38 | ACV = 4  # accent capital vowel
 39 | ACO = 5  # accent capital other
 40 | ASV = 6  # accent small vowel
 41 | ASO = 7  # accent small other
 42 | CLASS_NUM = 8  # total classes
 43 | 
 44 | Latin1_CharToClass = (
 45 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
 46 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
 47 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
 48 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
 49 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
 50 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
 51 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
 52 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
 53 |     OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
 54 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
 55 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
 56 |     ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
 57 |     OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
 58 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
 59 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
 60 |     ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
 61 |     OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
 62 |     OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
 63 |     UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
 64 |     OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
 65 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
 66 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
 67 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
 68 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
 69 |     ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
 70 |     ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
 71 |     ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
 72 |     ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
 73 |     ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
 74 |     ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
 75 |     ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
 76 |     ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 77 | )
 78 | 
 79 | # 0 : illegal
 80 | # 1 : very unlikely
 81 | # 2 : normal
 82 | # 3 : very likely
 83 | Latin1ClassModel = (
 84 | # UDF OTH ASC ASS ACV ACO ASV ASO
 85 |     0,  0,  0,  0,  0,  0,  0,  0,  # UDF
 86 |     0,  3,  3,  3,  3,  3,  3,  3,  # OTH
 87 |     0,  3,  3,  3,  3,  3,  3,  3,  # ASC
 88 |     0,  3,  3,  3,  1,  1,  3,  3,  # ASS
 89 |     0,  3,  3,  3,  1,  2,  1,  2,  # ACV
 90 |     0,  3,  3,  3,  3,  3,  3,  3,  # ACO
 91 |     0,  3,  1,  3,  1,  1,  1,  3,  # ASV
 92 |     0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 93 | )
 94 | 
 95 | 
 96 | class Latin1Prober(CharSetProber):
 97 |     def __init__(self):
 98 |         super(Latin1Prober, self).__init__()
 99 |         self._last_char_class = None
100 |         self._freq_counter = None
101 |         self.reset()
102 | 
103 |     def reset(self):
104 |         self._last_char_class = OTH
105 |         self._freq_counter = [0] * FREQ_CAT_NUM
106 |         CharSetProber.reset(self)
107 | 
108 |     @property
109 |     def charset_name(self):
110 |         return "ISO-8859-1"
111 | 
112 |     @property
113 |     def language(self):
114 |         return ""
115 | 
116 |     def feed(self, byte_str):
117 |         byte_str = self.filter_with_english_letters(byte_str)
118 |         for c in byte_str:
119 |             char_class = Latin1_CharToClass[c]
120 |             freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
121 |                                     + char_class]
122 |             if freq == 0:
123 |                 self._state = ProbingState.NOT_ME
124 |                 break
125 |             self._freq_counter[freq] += 1
126 |             self._last_char_class = char_class
127 | 
128 |         return self.state
129 | 
130 |     def get_confidence(self):
131 |         if self.state == ProbingState.NOT_ME:
132 |             return 0.01
133 | 
134 |         total = sum(self._freq_counter)
135 |         if total < 0.01:
136 |             confidence = 0.0
137 |         else:
138 |             confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
139 |                           / total)
140 |         if confidence < 0.0:
141 |             confidence = 0.0
142 |         # lower the confidence of latin1 so that other more accurate
143 |         # detector can take priority.
144 |         confidence = confidence * 0.73
145 |         return confidence
146 | 


--------------------------------------------------------------------------------
/urllib3/util/wait.py:
--------------------------------------------------------------------------------
  1 | import errno
  2 | from functools import partial
  3 | import select
  4 | import sys
  5 | 
  6 | try:
  7 |     from time import monotonic
  8 | except ImportError:
  9 |     from time import time as monotonic
 10 | 
 11 | __all__ = ["NoWayToWaitForSocketError", "wait_for_read", "wait_for_write"]
 12 | 
 13 | 
 14 | class NoWayToWaitForSocketError(Exception):
 15 |     pass
 16 | 
 17 | 
 18 | # How should we wait on sockets?
 19 | #
 20 | # There are two types of APIs you can use for waiting on sockets: the fancy
 21 | # modern stateful APIs like epoll/kqueue, and the older stateless APIs like
 22 | # select/poll. The stateful APIs are more efficient when you have a lots of
 23 | # sockets to keep track of, because you can set them up once and then use them
 24 | # lots of times. But we only ever want to wait on a single socket at a time
 25 | # and don't want to keep track of state, so the stateless APIs are actually
 26 | # more efficient. So we want to use select() or poll().
 27 | #
 28 | # Now, how do we choose between select() and poll()? On traditional Unixes,
 29 | # select() has a strange calling convention that makes it slow, or fail
 30 | # altogether, for high-numbered file descriptors. The point of poll() is to fix
 31 | # that, so on Unixes, we prefer poll().
 32 | #
 33 | # On Windows, there is no poll() (or at least Python doesn't provide a wrapper
 34 | # for it), but that's OK, because on Windows, select() doesn't have this
 35 | # strange calling convention; plain select() works fine.
 36 | #
 37 | # So: on Windows we use select(), and everywhere else we use poll(). We also
 38 | # fall back to select() in case poll() is somehow broken or missing.
 39 | 
 40 | if sys.version_info >= (3, 5):
 41 |     # Modern Python, that retries syscalls by default
 42 |     def _retry_on_intr(fn, timeout):
 43 |         return fn(timeout)
 44 | 
 45 | 
 46 | else:
 47 |     # Old and broken Pythons.
 48 |     def _retry_on_intr(fn, timeout):
 49 |         if timeout is None:
 50 |             deadline = float("inf")
 51 |         else:
 52 |             deadline = monotonic() + timeout
 53 | 
 54 |         while True:
 55 |             try:
 56 |                 return fn(timeout)
 57 |             # OSError for 3 <= pyver < 3.5, select.error for pyver <= 2.7
 58 |             except (OSError, select.error) as e:
 59 |                 # 'e.args[0]' incantation works for both OSError and select.error
 60 |                 if e.args[0] != errno.EINTR:
 61 |                     raise
 62 |                 else:
 63 |                     timeout = deadline - monotonic()
 64 |                     if timeout < 0:
 65 |                         timeout = 0
 66 |                     if timeout == float("inf"):
 67 |                         timeout = None
 68 |                     continue
 69 | 
 70 | 
 71 | def select_wait_for_socket(sock, read=False, write=False, timeout=None):
 72 |     if not read and not write:
 73 |         raise RuntimeError("must specify at least one of read=True, write=True")
 74 |     rcheck = []
 75 |     wcheck = []
 76 |     if read:
 77 |         rcheck.append(sock)
 78 |     if write:
 79 |         wcheck.append(sock)
 80 |     # When doing a non-blocking connect, most systems signal success by
 81 |     # marking the socket writable. Windows, though, signals success by marked
 82 |     # it as "exceptional". We paper over the difference by checking the write
 83 |     # sockets for both conditions. (The stdlib selectors module does the same
 84 |     # thing.)
 85 |     fn = partial(select.select, rcheck, wcheck, wcheck)
 86 |     rready, wready, xready = _retry_on_intr(fn, timeout)
 87 |     return bool(rready or wready or xready)
 88 | 
 89 | 
 90 | def poll_wait_for_socket(sock, read=False, write=False, timeout=None):
 91 |     if not read and not write:
 92 |         raise RuntimeError("must specify at least one of read=True, write=True")
 93 |     mask = 0
 94 |     if read:
 95 |         mask |= select.POLLIN
 96 |     if write:
 97 |         mask |= select.POLLOUT
 98 |     poll_obj = select.poll()
 99 |     poll_obj.register(sock, mask)
100 | 
101 |     # For some reason, poll() takes timeout in milliseconds
102 |     def do_poll(t):
103 |         if t is not None:
104 |             t *= 1000
105 |         return poll_obj.poll(t)
106 | 
107 |     return bool(_retry_on_intr(do_poll, timeout))
108 | 
109 | 
110 | def null_wait_for_socket(*args, **kwargs):
111 |     raise NoWayToWaitForSocketError("no select-equivalent available")
112 | 
113 | 
114 | def _have_working_poll():
115 |     # Apparently some systems have a select.poll that fails as soon as you try
116 |     # to use it, either due to strange configuration or broken monkeypatching
117 |     # from libraries like eventlet/greenlet.
118 |     try:
119 |         poll_obj = select.poll()
120 |         _retry_on_intr(poll_obj.poll, 0)
121 |     except (AttributeError, OSError):
122 |         return False
123 |     else:
124 |         return True
125 | 
126 | 
127 | def wait_for_socket(*args, **kwargs):
128 |     # We delay choosing which implementation to use until the first time we're
129 |     # called. We could do it at import time, but then we might make the wrong
130 |     # decision if someone goes wild with monkeypatching select.poll after
131 |     # we're imported.
132 |     global wait_for_socket
133 |     if _have_working_poll():
134 |         wait_for_socket = poll_wait_for_socket
135 |     elif hasattr(select, "select"):
136 |         wait_for_socket = select_wait_for_socket
137 |     else:  # Platform-specific: Appengine.
138 |         wait_for_socket = null_wait_for_socket
139 |     return wait_for_socket(*args, **kwargs)
140 | 
141 | 
142 | def wait_for_read(sock, timeout=None):
143 |     """ Waits for reading to be available on a given socket.
144 |     Returns True if the socket is readable, or False if the timeout expired.
145 |     """
146 |     return wait_for_socket(sock, read=True, timeout=timeout)
147 | 
148 | 
149 | def wait_for_write(sock, timeout=None):
150 |     """ Waits for writing to be available on a given socket.
151 |     Returns True if the socket is readable, or False if the timeout expired.
152 |     """
153 |     return wait_for_socket(sock, write=True, timeout=timeout)
154 | 


--------------------------------------------------------------------------------
/chardet/sbcharsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood
 31 | 
 32 | 
 33 | class SingleByteCharSetProber(CharSetProber):
 34 |     SAMPLE_SIZE = 64
 35 |     SB_ENOUGH_REL_THRESHOLD = 1024  #  0.25 * SAMPLE_SIZE^2
 36 |     POSITIVE_SHORTCUT_THRESHOLD = 0.95
 37 |     NEGATIVE_SHORTCUT_THRESHOLD = 0.05
 38 | 
 39 |     def __init__(self, model, reversed=False, name_prober=None):
 40 |         super(SingleByteCharSetProber, self).__init__()
 41 |         self._model = model
 42 |         # TRUE if we need to reverse every pair in the model lookup
 43 |         self._reversed = reversed
 44 |         # Optional auxiliary prober for name decision
 45 |         self._name_prober = name_prober
 46 |         self._last_order = None
 47 |         self._seq_counters = None
 48 |         self._total_seqs = None
 49 |         self._total_char = None
 50 |         self._freq_char = None
 51 |         self.reset()
 52 | 
 53 |     def reset(self):
 54 |         super(SingleByteCharSetProber, self).reset()
 55 |         # char order of last character
 56 |         self._last_order = 255
 57 |         self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
 58 |         self._total_seqs = 0
 59 |         self._total_char = 0
 60 |         # characters that fall in our sampling range
 61 |         self._freq_char = 0
 62 | 
 63 |     @property
 64 |     def charset_name(self):
 65 |         if self._name_prober:
 66 |             return self._name_prober.charset_name
 67 |         else:
 68 |             return self._model['charset_name']
 69 | 
 70 |     @property
 71 |     def language(self):
 72 |         if self._name_prober:
 73 |             return self._name_prober.language
 74 |         else:
 75 |             return self._model.get('language')
 76 | 
 77 |     def feed(self, byte_str):
 78 |         if not self._model['keep_english_letter']:
 79 |             byte_str = self.filter_international_words(byte_str)
 80 |         if not byte_str:
 81 |             return self.state
 82 |         char_to_order_map = self._model['char_to_order_map']
 83 |         for i, c in enumerate(byte_str):
 84 |             # XXX: Order is in range 1-64, so one would think we want 0-63 here,
 85 |             #      but that leads to 27 more test failures than before.
 86 |             order = char_to_order_map[c]
 87 |             # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
 88 |             #      CharacterCategory.SYMBOL is actually 253, so we use CONTROL
 89 |             #      to make it closer to the original intent. The only difference
 90 |             #      is whether or not we count digits and control characters for
 91 |             #      _total_char purposes.
 92 |             if order < CharacterCategory.CONTROL:
 93 |                 self._total_char += 1
 94 |             if order < self.SAMPLE_SIZE:
 95 |                 self._freq_char += 1
 96 |                 if self._last_order < self.SAMPLE_SIZE:
 97 |                     self._total_seqs += 1
 98 |                     if not self._reversed:
 99 |                         i = (self._last_order * self.SAMPLE_SIZE) + order
100 |                         model = self._model['precedence_matrix'][i]
101 |                     else:  # reverse the order of the letters in the lookup
102 |                         i = (order * self.SAMPLE_SIZE) + self._last_order
103 |                         model = self._model['precedence_matrix'][i]
104 |                     self._seq_counters[model] += 1
105 |             self._last_order = order
106 | 
107 |         charset_name = self._model['charset_name']
108 |         if self.state == ProbingState.DETECTING:
109 |             if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
110 |                 confidence = self.get_confidence()
111 |                 if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
112 |                     self.logger.debug('%s confidence = %s, we have a winner',
113 |                                       charset_name, confidence)
114 |                     self._state = ProbingState.FOUND_IT
115 |                 elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
116 |                     self.logger.debug('%s confidence = %s, below negative '
117 |                                       'shortcut threshhold %s', charset_name,
118 |                                       confidence,
119 |                                       self.NEGATIVE_SHORTCUT_THRESHOLD)
120 |                     self._state = ProbingState.NOT_ME
121 | 
122 |         return self.state
123 | 
124 |     def get_confidence(self):
125 |         r = 0.01
126 |         if self._total_seqs > 0:
127 |             r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
128 |                  self._total_seqs / self._model['typical_positive_ratio'])
129 |             r = r * self._freq_char / self._total_char
130 |             if r >= 1.0:
131 |                 r = 0.99
132 |         return r
133 | 


--------------------------------------------------------------------------------
/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
  1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
  2 | 
  3 | # Note: This file is under the PSF license as the code comes from the python
  4 | # stdlib.   http://docs.python.org/3/license.html
  5 | 
  6 | import re
  7 | import sys
  8 | 
  9 | # ipaddress has been backported to 2.6+ in pypi.  If it is installed on the
 10 | # system, use it to handle IPAddress ServerAltnames (this was added in
 11 | # python-3.5) otherwise only do DNS matching.  This allows
 12 | # backports.ssl_match_hostname to continue to be used in Python 2.7.
 13 | try:
 14 |     import ipaddress
 15 | except ImportError:
 16 |     ipaddress = None
 17 | 
 18 | __version__ = "3.5.0.1"
 19 | 
 20 | 
 21 | class CertificateError(ValueError):
 22 |     pass
 23 | 
 24 | 
 25 | def _dnsname_match(dn, hostname, max_wildcards=1):
 26 |     """Matching according to RFC 6125, section 6.4.3
 27 | 
 28 |     http://tools.ietf.org/html/rfc6125#section-6.4.3
 29 |     """
 30 |     pats = []
 31 |     if not dn:
 32 |         return False
 33 | 
 34 |     # Ported from python3-syntax:
 35 |     # leftmost, *remainder = dn.split(r'.')
 36 |     parts = dn.split(r".")
 37 |     leftmost = parts[0]
 38 |     remainder = parts[1:]
 39 | 
 40 |     wildcards = leftmost.count("*")
 41 |     if wildcards > max_wildcards:
 42 |         # Issue #17980: avoid denials of service by refusing more
 43 |         # than one wildcard per fragment.  A survey of established
 44 |         # policy among SSL implementations showed it to be a
 45 |         # reasonable choice.
 46 |         raise CertificateError(
 47 |             "too many wildcards in certificate DNS name: " + repr(dn)
 48 |         )
 49 | 
 50 |     # speed up common case w/o wildcards
 51 |     if not wildcards:
 52 |         return dn.lower() == hostname.lower()
 53 | 
 54 |     # RFC 6125, section 6.4.3, subitem 1.
 55 |     # The client SHOULD NOT attempt to match a presented identifier in which
 56 |     # the wildcard character comprises a label other than the left-most label.
 57 |     if leftmost == "*":
 58 |         # When '*' is a fragment by itself, it matches a non-empty dotless
 59 |         # fragment.
 60 |         pats.append("[^.]+")
 61 |     elif leftmost.startswith("xn--") or hostname.startswith("xn--"):
 62 |         # RFC 6125, section 6.4.3, subitem 3.
 63 |         # The client SHOULD NOT attempt to match a presented identifier
 64 |         # where the wildcard character is embedded within an A-label or
 65 |         # U-label of an internationalized domain name.
 66 |         pats.append(re.escape(leftmost))
 67 |     else:
 68 |         # Otherwise, '*' matches any dotless string, e.g. www*
 69 |         pats.append(re.escape(leftmost).replace(r"\*", "[^.]*"))
 70 | 
 71 |     # add the remaining fragments, ignore any wildcards
 72 |     for frag in remainder:
 73 |         pats.append(re.escape(frag))
 74 | 
 75 |     pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE)
 76 |     return pat.match(hostname)
 77 | 
 78 | 
 79 | def _to_unicode(obj):
 80 |     if isinstance(obj, str) and sys.version_info < (3,):
 81 |         obj = unicode(obj, encoding="ascii", errors="strict")
 82 |     return obj
 83 | 
 84 | 
 85 | def _ipaddress_match(ipname, host_ip):
 86 |     """Exact matching of IP addresses.
 87 | 
 88 |     RFC 6125 explicitly doesn't define an algorithm for this
 89 |     (section 1.7.2 - "Out of Scope").
 90 |     """
 91 |     # OpenSSL may add a trailing newline to a subjectAltName's IP address
 92 |     # Divergence from upstream: ipaddress can't handle byte str
 93 |     ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
 94 |     return ip == host_ip
 95 | 
 96 | 
 97 | def match_hostname(cert, hostname):
 98 |     """Verify that *cert* (in decoded format as returned by
 99 |     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
100 |     rules are followed, but IP addresses are not accepted for *hostname*.
101 | 
102 |     CertificateError is raised on failure. On success, the function
103 |     returns nothing.
104 |     """
105 |     if not cert:
106 |         raise ValueError(
107 |             "empty or no certificate, match_hostname needs a "
108 |             "SSL socket or SSL context with either "
109 |             "CERT_OPTIONAL or CERT_REQUIRED"
110 |         )
111 |     try:
112 |         # Divergence from upstream: ipaddress can't handle byte str
113 |         host_ip = ipaddress.ip_address(_to_unicode(hostname))
114 |     except ValueError:
115 |         # Not an IP address (common case)
116 |         host_ip = None
117 |     except UnicodeError:
118 |         # Divergence from upstream: Have to deal with ipaddress not taking
119 |         # byte strings.  addresses should be all ascii, so we consider it not
120 |         # an ipaddress in this case
121 |         host_ip = None
122 |     except AttributeError:
123 |         # Divergence from upstream: Make ipaddress library optional
124 |         if ipaddress is None:
125 |             host_ip = None
126 |         else:
127 |             raise
128 |     dnsnames = []
129 |     san = cert.get("subjectAltName", ())
130 |     for key, value in san:
131 |         if key == "DNS":
132 |             if host_ip is None and _dnsname_match(value, hostname):
133 |                 return
134 |             dnsnames.append(value)
135 |         elif key == "IP Address":
136 |             if host_ip is not None and _ipaddress_match(value, host_ip):
137 |                 return
138 |             dnsnames.append(value)
139 |     if not dnsnames:
140 |         # The subject is only checked when there is no dNSName entry
141 |         # in subjectAltName
142 |         for sub in cert.get("subject", ()):
143 |             for key, value in sub:
144 |                 # XXX according to RFC 2818, the most specific Common Name
145 |                 # must be used.
146 |                 if key == "commonName":
147 |                     if _dnsname_match(value, hostname):
148 |                         return
149 |                     dnsnames.append(value)
150 |     if len(dnsnames) > 1:
151 |         raise CertificateError(
152 |             "hostname %r "
153 |             "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames)))
154 |         )
155 |     elif len(dnsnames) == 1:
156 |         raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0]))
157 |     else:
158 |         raise CertificateError(
159 |             "no appropriate commonName or subjectAltName fields were found"
160 |         )
161 | 


--------------------------------------------------------------------------------
/urllib3/request.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | from .filepost import encode_multipart_formdata
  4 | from .packages.six.moves.urllib.parse import urlencode
  5 | 
  6 | 
  7 | __all__ = ["RequestMethods"]
  8 | 
  9 | 
 10 | class RequestMethods(object):
 11 |     """
 12 |     Convenience mixin for classes who implement a :meth:`urlopen` method, such
 13 |     as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
 14 |     :class:`~urllib3.poolmanager.PoolManager`.
 15 | 
 16 |     Provides behavior for making common types of HTTP request methods and
 17 |     decides which type of request field encoding to use.
 18 | 
 19 |     Specifically,
 20 | 
 21 |     :meth:`.request_encode_url` is for sending requests whose fields are
 22 |     encoded in the URL (such as GET, HEAD, DELETE).
 23 | 
 24 |     :meth:`.request_encode_body` is for sending requests whose fields are
 25 |     encoded in the *body* of the request using multipart or www-form-urlencoded
 26 |     (such as for POST, PUT, PATCH).
 27 | 
 28 |     :meth:`.request` is for making any kind of request, it will look up the
 29 |     appropriate encoding format and use one of the above two methods to make
 30 |     the request.
 31 | 
 32 |     Initializer parameters:
 33 | 
 34 |     :param headers:
 35 |         Headers to include with all requests, unless other headers are given
 36 |         explicitly.
 37 |     """
 38 | 
 39 |     _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
 40 | 
 41 |     def __init__(self, headers=None):
 42 |         self.headers = headers or {}
 43 | 
 44 |     def urlopen(
 45 |         self,
 46 |         method,
 47 |         url,
 48 |         body=None,
 49 |         headers=None,
 50 |         encode_multipart=True,
 51 |         multipart_boundary=None,
 52 |         **kw
 53 |     ):  # Abstract
 54 |         raise NotImplementedError(
 55 |             "Classes extending RequestMethods must implement "
 56 |             "their own ``urlopen`` method."
 57 |         )
 58 | 
 59 |     def request(self, method, url, fields=None, headers=None, **urlopen_kw):
 60 |         """
 61 |         Make a request using :meth:`urlopen` with the appropriate encoding of
 62 |         ``fields`` based on the ``method`` used.
 63 | 
 64 |         This is a convenience method that requires the least amount of manual
 65 |         effort. It can be used in most situations, while still having the
 66 |         option to drop down to more specific methods when necessary, such as
 67 |         :meth:`request_encode_url`, :meth:`request_encode_body`,
 68 |         or even the lowest level :meth:`urlopen`.
 69 |         """
 70 |         method = method.upper()
 71 | 
 72 |         urlopen_kw["request_url"] = url
 73 | 
 74 |         if method in self._encode_url_methods:
 75 |             return self.request_encode_url(
 76 |                 method, url, fields=fields, headers=headers, **urlopen_kw
 77 |             )
 78 |         else:
 79 |             return self.request_encode_body(
 80 |                 method, url, fields=fields, headers=headers, **urlopen_kw
 81 |             )
 82 | 
 83 |     def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw):
 84 |         """
 85 |         Make a request using :meth:`urlopen` with the ``fields`` encoded in
 86 |         the url. This is useful for request methods like GET, HEAD, DELETE, etc.
 87 |         """
 88 |         if headers is None:
 89 |             headers = self.headers
 90 | 
 91 |         extra_kw = {"headers": headers}
 92 |         extra_kw.update(urlopen_kw)
 93 | 
 94 |         if fields:
 95 |             url += "?" + urlencode(fields)
 96 | 
 97 |         return self.urlopen(method, url, **extra_kw)
 98 | 
 99 |     def request_encode_body(
100 |         self,
101 |         method,
102 |         url,
103 |         fields=None,
104 |         headers=None,
105 |         encode_multipart=True,
106 |         multipart_boundary=None,
107 |         **urlopen_kw
108 |     ):
109 |         """
110 |         Make a request using :meth:`urlopen` with the ``fields`` encoded in
111 |         the body. This is useful for request methods like POST, PUT, PATCH, etc.
112 | 
113 |         When ``encode_multipart=True`` (default), then
114 |         :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
115 |         the payload with the appropriate content type. Otherwise
116 |         :meth:`urllib.urlencode` is used with the
117 |         'application/x-www-form-urlencoded' content type.
118 | 
119 |         Multipart encoding must be used when posting files, and it's reasonably
120 |         safe to use it in other times too. However, it may break request
121 |         signing, such as with OAuth.
122 | 
123 |         Supports an optional ``fields`` parameter of key/value strings AND
124 |         key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
125 |         the MIME type is optional. For example::
126 | 
127 |             fields = {
128 |                 'foo': 'bar',
129 |                 'fakefile': ('foofile.txt', 'contents of foofile'),
130 |                 'realfile': ('barfile.txt', open('realfile').read()),
131 |                 'typedfile': ('bazfile.bin', open('bazfile').read(),
132 |                               'image/jpeg'),
133 |                 'nonamefile': 'contents of nonamefile field',
134 |             }
135 | 
136 |         When uploading a file, providing a filename (the first parameter of the
137 |         tuple) is optional but recommended to best mimic behavior of browsers.
138 | 
139 |         Note that if ``headers`` are supplied, the 'Content-Type' header will
140 |         be overwritten because it depends on the dynamic random boundary string
141 |         which is used to compose the body of the request. The random boundary
142 |         string can be explicitly set with the ``multipart_boundary`` parameter.
143 |         """
144 |         if headers is None:
145 |             headers = self.headers
146 | 
147 |         extra_kw = {"headers": {}}
148 | 
149 |         if fields:
150 |             if "body" in urlopen_kw:
151 |                 raise TypeError(
152 |                     "request got values for both 'fields' and 'body', can only specify one."
153 |                 )
154 | 
155 |             if encode_multipart:
156 |                 body, content_type = encode_multipart_formdata(
157 |                     fields, boundary=multipart_boundary
158 |                 )
159 |             else:
160 |                 body, content_type = (
161 |                     urlencode(fields),
162 |                     "application/x-www-form-urlencoded",
163 |                 )
164 | 
165 |             extra_kw["body"] = body
166 |             extra_kw["headers"] = {"Content-Type": content_type}
167 | 
168 |         extra_kw["headers"].update(headers)
169 |         extra_kw.update(urlopen_kw)
170 | 
171 |         return self.urlopen(method, url, **extra_kw)
172 | 


--------------------------------------------------------------------------------
/requests/api.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.api
  5 | ~~~~~~~~~~~~
  6 | 
  7 | This module implements the Requests API.
  8 | 
  9 | :copyright: (c) 2012 by Kenneth Reitz.
 10 | :license: Apache2, see LICENSE for more details.
 11 | """
 12 | 
 13 | from . import sessions
 14 | 
 15 | 
 16 | def request(method, url, **kwargs):
 17 |     """Constructs and sends a :class:`Request <Request>`.
 18 | 
 19 |     :param method: method for the new :class:`Request` object: ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``, ``PATCH``, or ``DELETE``.
 20 |     :param url: URL for the new :class:`Request` object.
 21 |     :param params: (optional) Dictionary, list of tuples or bytes to send
 22 |         in the query string for the :class:`Request`.
 23 |     :param data: (optional) Dictionary, list of tuples, bytes, or file-like
 24 |         object to send in the body of the :class:`Request`.
 25 |     :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`.
 26 |     :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
 27 |     :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
 28 |     :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload.
 29 |         ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')``
 30 |         or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string
 31 |         defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers
 32 |         to add for the file.
 33 |     :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
 34 |     :param timeout: (optional) How many seconds to wait for the server to send data
 35 |         before giving up, as a float, or a :ref:`(connect timeout, read
 36 |         timeout) <timeouts>` tuple.
 37 |     :type timeout: float or tuple
 38 |     :param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``.
 39 |     :type allow_redirects: bool
 40 |     :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
 41 |     :param verify: (optional) Either a boolean, in which case it controls whether we verify
 42 |             the server's TLS certificate, or a string, in which case it must be a path
 43 |             to a CA bundle to use. Defaults to ``True``.
 44 |     :param stream: (optional) if ``False``, the response content will be immediately downloaded.
 45 |     :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
 46 |     :return: :class:`Response <Response>` object
 47 |     :rtype: requests.Response
 48 | 
 49 |     Usage::
 50 | 
 51 |       >>> import requests
 52 |       >>> req = requests.request('GET', 'https://httpbin.org/get')
 53 |       >>> req
 54 |       <Response [200]>
 55 |     """
 56 | 
 57 |     # By using the 'with' statement we are sure the session is closed, thus we
 58 |     # avoid leaving sockets open which can trigger a ResourceWarning in some
 59 |     # cases, and look like a memory leak in others.
 60 |     with sessions.Session() as session:
 61 |         return session.request(method=method, url=url, **kwargs)
 62 | 
 63 | 
 64 | def get(url, params=None, **kwargs):
 65 |     r"""Sends a GET request.
 66 | 
 67 |     :param url: URL for the new :class:`Request` object.
 68 |     :param params: (optional) Dictionary, list of tuples or bytes to send
 69 |         in the query string for the :class:`Request`.
 70 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 71 |     :return: :class:`Response <Response>` object
 72 |     :rtype: requests.Response
 73 |     """
 74 | 
 75 |     kwargs.setdefault('allow_redirects', True)
 76 |     return request('get', url, params=params, **kwargs)
 77 | 
 78 | 
 79 | def options(url, **kwargs):
 80 |     r"""Sends an OPTIONS request.
 81 | 
 82 |     :param url: URL for the new :class:`Request` object.
 83 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 84 |     :return: :class:`Response <Response>` object
 85 |     :rtype: requests.Response
 86 |     """
 87 | 
 88 |     kwargs.setdefault('allow_redirects', True)
 89 |     return request('options', url, **kwargs)
 90 | 
 91 | 
 92 | def head(url, **kwargs):
 93 |     r"""Sends a HEAD request.
 94 | 
 95 |     :param url: URL for the new :class:`Request` object.
 96 |     :param \*\*kwargs: Optional arguments that ``request`` takes. If
 97 |         `allow_redirects` is not provided, it will be set to `False` (as
 98 |         opposed to the default :meth:`request` behavior).
 99 |     :return: :class:`Response <Response>` object
100 |     :rtype: requests.Response
101 |     """
102 | 
103 |     kwargs.setdefault('allow_redirects', False)
104 |     return request('head', url, **kwargs)
105 | 
106 | 
107 | def post(url, data=None, json=None, **kwargs):
108 |     r"""Sends a POST request.
109 | 
110 |     :param url: URL for the new :class:`Request` object.
111 |     :param data: (optional) Dictionary, list of tuples, bytes, or file-like
112 |         object to send in the body of the :class:`Request`.
113 |     :param json: (optional) json data to send in the body of the :class:`Request`.
114 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
115 |     :return: :class:`Response <Response>` object
116 |     :rtype: requests.Response
117 |     """
118 | 
119 |     return request('post', url, data=data, json=json, **kwargs)
120 | 
121 | 
122 | def put(url, data=None, **kwargs):
123 |     r"""Sends a PUT request.
124 | 
125 |     :param url: URL for the new :class:`Request` object.
126 |     :param data: (optional) Dictionary, list of tuples, bytes, or file-like
127 |         object to send in the body of the :class:`Request`.
128 |     :param json: (optional) json data to send in the body of the :class:`Request`.
129 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
130 |     :return: :class:`Response <Response>` object
131 |     :rtype: requests.Response
132 |     """
133 | 
134 |     return request('put', url, data=data, **kwargs)
135 | 
136 | 
137 | def patch(url, data=None, **kwargs):
138 |     r"""Sends a PATCH request.
139 | 
140 |     :param url: URL for the new :class:`Request` object.
141 |     :param data: (optional) Dictionary, list of tuples, bytes, or file-like
142 |         object to send in the body of the :class:`Request`.
143 |     :param json: (optional) json data to send in the body of the :class:`Request`.
144 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
145 |     :return: :class:`Response <Response>` object
146 |     :rtype: requests.Response
147 |     """
148 | 
149 |     return request('patch', url, data=data, **kwargs)
150 | 
151 | 
152 | def delete(url, **kwargs):
153 |     r"""Sends a DELETE request.
154 | 
155 |     :param url: URL for the new :class:`Request` object.
156 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
157 |     :return: :class:`Response <Response>` object
158 |     :rtype: requests.Response
159 |     """
160 | 
161 |     return request('delete', url, **kwargs)
162 | 


--------------------------------------------------------------------------------
/urllib3/contrib/socks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This module contains provisional support for SOCKS proxies from within
  4 | urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
  5 | SOCKS5. To enable its functionality, either install PySocks or install this
  6 | module with the ``socks`` extra.
  7 | 
  8 | The SOCKS implementation supports the full range of urllib3 features. It also
  9 | supports the following SOCKS features:
 10 | 
 11 | - SOCKS4A (``proxy_url='socks4a://...``)
 12 | - SOCKS4 (``proxy_url='socks4://...``)
 13 | - SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
 14 | - SOCKS5 with local DNS (``proxy_url='socks5://...``)
 15 | - Usernames and passwords for the SOCKS proxy
 16 | 
 17 |  .. note::
 18 |     It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
 19 |     your ``proxy_url`` to ensure that DNS resolution is done from the remote
 20 |     server instead of client-side when connecting to a domain name.
 21 | 
 22 | SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
 23 | supports IPv4, IPv6, and domain names.
 24 | 
 25 | When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
 26 | will be sent as the ``userid`` section of the SOCKS request::
 27 | 
 28 |     proxy_url="socks4a://<userid>@proxy-host"
 29 | 
 30 | When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
 31 | of the ``proxy_url`` will be sent as the username/password to authenticate
 32 | with the proxy::
 33 | 
 34 |     proxy_url="socks5h://<username>:<password>@proxy-host"
 35 | 
 36 | """
 37 | from __future__ import absolute_import
 38 | 
 39 | try:
 40 |     import socks
 41 | except ImportError:
 42 |     import warnings
 43 |     from ..exceptions import DependencyWarning
 44 | 
 45 |     warnings.warn(
 46 |         (
 47 |             "SOCKS support in urllib3 requires the installation of optional "
 48 |             "dependencies: specifically, PySocks.  For more information, see "
 49 |             "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies"
 50 |         ),
 51 |         DependencyWarning,
 52 |     )
 53 |     raise
 54 | 
 55 | from socket import error as SocketError, timeout as SocketTimeout
 56 | 
 57 | from ..connection import HTTPConnection, HTTPSConnection
 58 | from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
 59 | from ..exceptions import ConnectTimeoutError, NewConnectionError
 60 | from ..poolmanager import PoolManager
 61 | from ..util.url import parse_url
 62 | 
 63 | try:
 64 |     import ssl
 65 | except ImportError:
 66 |     ssl = None
 67 | 
 68 | 
 69 | class SOCKSConnection(HTTPConnection):
 70 |     """
 71 |     A plain-text HTTP connection that connects via a SOCKS proxy.
 72 |     """
 73 | 
 74 |     def __init__(self, *args, **kwargs):
 75 |         self._socks_options = kwargs.pop("_socks_options")
 76 |         super(SOCKSConnection, self).__init__(*args, **kwargs)
 77 | 
 78 |     def _new_conn(self):
 79 |         """
 80 |         Establish a new connection via the SOCKS proxy.
 81 |         """
 82 |         extra_kw = {}
 83 |         if self.source_address:
 84 |             extra_kw["source_address"] = self.source_address
 85 | 
 86 |         if self.socket_options:
 87 |             extra_kw["socket_options"] = self.socket_options
 88 | 
 89 |         try:
 90 |             conn = socks.create_connection(
 91 |                 (self.host, self.port),
 92 |                 proxy_type=self._socks_options["socks_version"],
 93 |                 proxy_addr=self._socks_options["proxy_host"],
 94 |                 proxy_port=self._socks_options["proxy_port"],
 95 |                 proxy_username=self._socks_options["username"],
 96 |                 proxy_password=self._socks_options["password"],
 97 |                 proxy_rdns=self._socks_options["rdns"],
 98 |                 timeout=self.timeout,
 99 |                 **extra_kw
100 |             )
101 | 
102 |         except SocketTimeout:
103 |             raise ConnectTimeoutError(
104 |                 self,
105 |                 "Connection to %s timed out. (connect timeout=%s)"
106 |                 % (self.host, self.timeout),
107 |             )
108 | 
109 |         except socks.ProxyError as e:
110 |             # This is fragile as hell, but it seems to be the only way to raise
111 |             # useful errors here.
112 |             if e.socket_err:
113 |                 error = e.socket_err
114 |                 if isinstance(error, SocketTimeout):
115 |                     raise ConnectTimeoutError(
116 |                         self,
117 |                         "Connection to %s timed out. (connect timeout=%s)"
118 |                         % (self.host, self.timeout),
119 |                     )
120 |                 else:
121 |                     raise NewConnectionError(
122 |                         self, "Failed to establish a new connection: %s" % error
123 |                     )
124 |             else:
125 |                 raise NewConnectionError(
126 |                     self, "Failed to establish a new connection: %s" % e
127 |                 )
128 | 
129 |         except SocketError as e:  # Defensive: PySocks should catch all these.
130 |             raise NewConnectionError(
131 |                 self, "Failed to establish a new connection: %s" % e
132 |             )
133 | 
134 |         return conn
135 | 
136 | 
137 | # We don't need to duplicate the Verified/Unverified distinction from
138 | # urllib3/connection.py here because the HTTPSConnection will already have been
139 | # correctly set to either the Verified or Unverified form by that module. This
140 | # means the SOCKSHTTPSConnection will automatically be the correct type.
141 | class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
142 |     pass
143 | 
144 | 
145 | class SOCKSHTTPConnectionPool(HTTPConnectionPool):
146 |     ConnectionCls = SOCKSConnection
147 | 
148 | 
149 | class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
150 |     ConnectionCls = SOCKSHTTPSConnection
151 | 
152 | 
153 | class SOCKSProxyManager(PoolManager):
154 |     """
155 |     A version of the urllib3 ProxyManager that routes connections via the
156 |     defined SOCKS proxy.
157 |     """
158 | 
159 |     pool_classes_by_scheme = {
160 |         "http": SOCKSHTTPConnectionPool,
161 |         "https": SOCKSHTTPSConnectionPool,
162 |     }
163 | 
164 |     def __init__(
165 |         self,
166 |         proxy_url,
167 |         username=None,
168 |         password=None,
169 |         num_pools=10,
170 |         headers=None,
171 |         **connection_pool_kw
172 |     ):
173 |         parsed = parse_url(proxy_url)
174 | 
175 |         if username is None and password is None and parsed.auth is not None:
176 |             split = parsed.auth.split(":")
177 |             if len(split) == 2:
178 |                 username, password = split
179 |         if parsed.scheme == "socks5":
180 |             socks_version = socks.PROXY_TYPE_SOCKS5
181 |             rdns = False
182 |         elif parsed.scheme == "socks5h":
183 |             socks_version = socks.PROXY_TYPE_SOCKS5
184 |             rdns = True
185 |         elif parsed.scheme == "socks4":
186 |             socks_version = socks.PROXY_TYPE_SOCKS4
187 |             rdns = False
188 |         elif parsed.scheme == "socks4a":
189 |             socks_version = socks.PROXY_TYPE_SOCKS4
190 |             rdns = True
191 |         else:
192 |             raise ValueError("Unable to determine SOCKS version from %s" % proxy_url)
193 | 
194 |         self.proxy_url = proxy_url
195 | 
196 |         socks_options = {
197 |             "socks_version": socks_version,
198 |             "proxy_host": parsed.host,
199 |             "proxy_port": parsed.port,
200 |             "username": username,
201 |             "password": password,
202 |             "rdns": rdns,
203 |         }
204 |         connection_pool_kw["_socks_options"] = socks_options
205 | 
206 |         super(SOCKSProxyManager, self).__init__(
207 |             num_pools, headers, **connection_pool_kw
208 |         )
209 | 
210 |         self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme
211 | 


--------------------------------------------------------------------------------
/urllib3/exceptions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead
  3 | 
  4 | # Base Exceptions
  5 | 
  6 | 
  7 | class HTTPError(Exception):
  8 |     "Base exception used by this module."
  9 |     pass
 10 | 
 11 | 
 12 | class HTTPWarning(Warning):
 13 |     "Base warning used by this module."
 14 |     pass
 15 | 
 16 | 
 17 | class PoolError(HTTPError):
 18 |     "Base exception for errors caused within a pool."
 19 | 
 20 |     def __init__(self, pool, message):
 21 |         self.pool = pool
 22 |         HTTPError.__init__(self, "%s: %s" % (pool, message))
 23 | 
 24 |     def __reduce__(self):
 25 |         # For pickling purposes.
 26 |         return self.__class__, (None, None)
 27 | 
 28 | 
 29 | class RequestError(PoolError):
 30 |     "Base exception for PoolErrors that have associated URLs."
 31 | 
 32 |     def __init__(self, pool, url, message):
 33 |         self.url = url
 34 |         PoolError.__init__(self, pool, message)
 35 | 
 36 |     def __reduce__(self):
 37 |         # For pickling purposes.
 38 |         return self.__class__, (None, self.url, None)
 39 | 
 40 | 
 41 | class SSLError(HTTPError):
 42 |     "Raised when SSL certificate fails in an HTTPS connection."
 43 |     pass
 44 | 
 45 | 
 46 | class ProxyError(HTTPError):
 47 |     "Raised when the connection to a proxy fails."
 48 | 
 49 |     def __init__(self, message, error, *args):
 50 |         super(ProxyError, self).__init__(message, error, *args)
 51 |         self.original_error = error
 52 | 
 53 | 
 54 | class DecodeError(HTTPError):
 55 |     "Raised when automatic decoding based on Content-Type fails."
 56 |     pass
 57 | 
 58 | 
 59 | class ProtocolError(HTTPError):
 60 |     "Raised when something unexpected happens mid-request/response."
 61 |     pass
 62 | 
 63 | 
 64 | #: Renamed to ProtocolError but aliased for backwards compatibility.
 65 | ConnectionError = ProtocolError
 66 | 
 67 | 
 68 | # Leaf Exceptions
 69 | 
 70 | 
 71 | class MaxRetryError(RequestError):
 72 |     """Raised when the maximum number of retries is exceeded.
 73 | 
 74 |     :param pool: The connection pool
 75 |     :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
 76 |     :param string url: The requested Url
 77 |     :param exceptions.Exception reason: The underlying error
 78 | 
 79 |     """
 80 | 
 81 |     def __init__(self, pool, url, reason=None):
 82 |         self.reason = reason
 83 | 
 84 |         message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason)
 85 | 
 86 |         RequestError.__init__(self, pool, url, message)
 87 | 
 88 | 
 89 | class HostChangedError(RequestError):
 90 |     "Raised when an existing pool gets a request for a foreign host."
 91 | 
 92 |     def __init__(self, pool, url, retries=3):
 93 |         message = "Tried to open a foreign host with url: %s" % url
 94 |         RequestError.__init__(self, pool, url, message)
 95 |         self.retries = retries
 96 | 
 97 | 
 98 | class TimeoutStateError(HTTPError):
 99 |     """ Raised when passing an invalid state to a timeout """
100 | 
101 |     pass
102 | 
103 | 
104 | class TimeoutError(HTTPError):
105 |     """ Raised when a socket timeout error occurs.
106 | 
107 |     Catching this error will catch both :exc:`ReadTimeoutErrors
108 |     <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
109 |     """
110 | 
111 |     pass
112 | 
113 | 
114 | class ReadTimeoutError(TimeoutError, RequestError):
115 |     "Raised when a socket timeout occurs while receiving data from a server"
116 |     pass
117 | 
118 | 
119 | # This timeout error does not have a URL attached and needs to inherit from the
120 | # base HTTPError
121 | class ConnectTimeoutError(TimeoutError):
122 |     "Raised when a socket timeout occurs while connecting to a server"
123 |     pass
124 | 
125 | 
126 | class NewConnectionError(ConnectTimeoutError, PoolError):
127 |     "Raised when we fail to establish a new connection. Usually ECONNREFUSED."
128 |     pass
129 | 
130 | 
131 | class EmptyPoolError(PoolError):
132 |     "Raised when a pool runs out of connections and no more are allowed."
133 |     pass
134 | 
135 | 
136 | class ClosedPoolError(PoolError):
137 |     "Raised when a request enters a pool after the pool has been closed."
138 |     pass
139 | 
140 | 
141 | class LocationValueError(ValueError, HTTPError):
142 |     "Raised when there is something wrong with a given URL input."
143 |     pass
144 | 
145 | 
146 | class LocationParseError(LocationValueError):
147 |     "Raised when get_host or similar fails to parse the URL input."
148 | 
149 |     def __init__(self, location):
150 |         message = "Failed to parse: %s" % location
151 |         HTTPError.__init__(self, message)
152 | 
153 |         self.location = location
154 | 
155 | 
156 | class ResponseError(HTTPError):
157 |     "Used as a container for an error reason supplied in a MaxRetryError."
158 |     GENERIC_ERROR = "too many error responses"
159 |     SPECIFIC_ERROR = "too many {status_code} error responses"
160 | 
161 | 
162 | class SecurityWarning(HTTPWarning):
163 |     "Warned when performing security reducing actions"
164 |     pass
165 | 
166 | 
167 | class SubjectAltNameWarning(SecurityWarning):
168 |     "Warned when connecting to a host with a certificate missing a SAN."
169 |     pass
170 | 
171 | 
172 | class InsecureRequestWarning(SecurityWarning):
173 |     "Warned when making an unverified HTTPS request."
174 |     pass
175 | 
176 | 
177 | class SystemTimeWarning(SecurityWarning):
178 |     "Warned when system time is suspected to be wrong"
179 |     pass
180 | 
181 | 
182 | class InsecurePlatformWarning(SecurityWarning):
183 |     "Warned when certain SSL configuration is not available on a platform."
184 |     pass
185 | 
186 | 
187 | class SNIMissingWarning(HTTPWarning):
188 |     "Warned when making a HTTPS request without SNI available."
189 |     pass
190 | 
191 | 
192 | class DependencyWarning(HTTPWarning):
193 |     """
194 |     Warned when an attempt is made to import a module with missing optional
195 |     dependencies.
196 |     """
197 | 
198 |     pass
199 | 
200 | 
201 | class InvalidProxyConfigurationWarning(HTTPWarning):
202 |     """
203 |     Warned when using an HTTPS proxy and an HTTPS URL. Currently
204 |     urllib3 doesn't support HTTPS proxies and the proxy will be
205 |     contacted via HTTP instead. This warning can be fixed by
206 |     changing your HTTPS proxy URL into an HTTP proxy URL.
207 | 
208 |     If you encounter this warning read this:
209 |     https://github.com/urllib3/urllib3/issues/1850
210 |     """
211 | 
212 |     pass
213 | 
214 | 
215 | class ResponseNotChunked(ProtocolError, ValueError):
216 |     "Response needs to be chunked in order to read it as chunks."
217 |     pass
218 | 
219 | 
220 | class BodyNotHttplibCompatible(HTTPError):
221 |     """
222 |     Body should be httplib.HTTPResponse like (have an fp attribute which
223 |     returns raw chunks) for read_chunked().
224 |     """
225 | 
226 |     pass
227 | 
228 | 
229 | class IncompleteRead(HTTPError, httplib_IncompleteRead):
230 |     """
231 |     Response length doesn't match expected Content-Length
232 | 
233 |     Subclass of http_client.IncompleteRead to allow int value
234 |     for `partial` to avoid creating large objects on streamed
235 |     reads.
236 |     """
237 | 
238 |     def __init__(self, partial, expected):
239 |         super(IncompleteRead, self).__init__(partial, expected)
240 | 
241 |     def __repr__(self):
242 |         return "IncompleteRead(%i bytes read, %i more expected)" % (
243 |             self.partial,
244 |             self.expected,
245 |         )
246 | 
247 | 
248 | class InvalidHeader(HTTPError):
249 |     "The header provided was somehow invalid."
250 |     pass
251 | 
252 | 
253 | class ProxySchemeUnknown(AssertionError, ValueError):
254 |     "ProxyManager does not support the supplied scheme"
255 |     # TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
256 | 
257 |     def __init__(self, scheme):
258 |         message = "Not supported proxy scheme %s" % scheme
259 |         super(ProxySchemeUnknown, self).__init__(message)
260 | 
261 | 
262 | class HeaderParsingError(HTTPError):
263 |     "Raised by assert_header_parsing, but we convert it to a log.warning statement."
264 | 
265 |     def __init__(self, defects, unparsed_data):
266 |         message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data)
267 |         super(HeaderParsingError, self).__init__(message)
268 | 
269 | 
270 | class UnrewindableBodyError(HTTPError):
271 |     "urllib3 encountered an error when trying to rewind a body"
272 |     pass
273 | 


--------------------------------------------------------------------------------
/urllib3/fields.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import email.utils
  3 | import mimetypes
  4 | import re
  5 | 
  6 | from .packages import six
  7 | 
  8 | 
  9 | def guess_content_type(filename, default="application/octet-stream"):
 10 |     """
 11 |     Guess the "Content-Type" of a file.
 12 | 
 13 |     :param filename:
 14 |         The filename to guess the "Content-Type" of using :mod:`mimetypes`.
 15 |     :param default:
 16 |         If no "Content-Type" can be guessed, default to `default`.
 17 |     """
 18 |     if filename:
 19 |         return mimetypes.guess_type(filename)[0] or default
 20 |     return default
 21 | 
 22 | 
 23 | def format_header_param_rfc2231(name, value):
 24 |     """
 25 |     Helper function to format and quote a single header parameter using the
 26 |     strategy defined in RFC 2231.
 27 | 
 28 |     Particularly useful for header parameters which might contain
 29 |     non-ASCII values, like file names. This follows RFC 2388 Section 4.4.
 30 | 
 31 |     :param name:
 32 |         The name of the parameter, a string expected to be ASCII only.
 33 |     :param value:
 34 |         The value of the parameter, provided as ``bytes`` or `str``.
 35 |     :ret:
 36 |         An RFC-2231-formatted unicode string.
 37 |     """
 38 |     if isinstance(value, six.binary_type):
 39 |         value = value.decode("utf-8")
 40 | 
 41 |     if not any(ch in value for ch in '"\\\r\n'):
 42 |         result = u'%s="%s"' % (name, value)
 43 |         try:
 44 |             result.encode("ascii")
 45 |         except (UnicodeEncodeError, UnicodeDecodeError):
 46 |             pass
 47 |         else:
 48 |             return result
 49 | 
 50 |     if six.PY2:  # Python 2:
 51 |         value = value.encode("utf-8")
 52 | 
 53 |     # encode_rfc2231 accepts an encoded string and returns an ascii-encoded
 54 |     # string in Python 2 but accepts and returns unicode strings in Python 3
 55 |     value = email.utils.encode_rfc2231(value, "utf-8")
 56 |     value = "%s*=%s" % (name, value)
 57 | 
 58 |     if six.PY2:  # Python 2:
 59 |         value = value.decode("utf-8")
 60 | 
 61 |     return value
 62 | 
 63 | 
 64 | _HTML5_REPLACEMENTS = {
 65 |     u"\u0022": u"%22",
 66 |     # Replace "\" with "\\".
 67 |     u"\u005C": u"\u005C\u005C",
 68 |     u"\u005C": u"\u005C\u005C",
 69 | }
 70 | 
 71 | # All control characters from 0x00 to 0x1F *except* 0x1B.
 72 | _HTML5_REPLACEMENTS.update(
 73 |     {
 74 |         six.unichr(cc): u"%{:02X}".format(cc)
 75 |         for cc in range(0x00, 0x1F + 1)
 76 |         if cc not in (0x1B,)
 77 |     }
 78 | )
 79 | 
 80 | 
 81 | def _replace_multiple(value, needles_and_replacements):
 82 |     def replacer(match):
 83 |         return needles_and_replacements[match.group(0)]
 84 | 
 85 |     pattern = re.compile(
 86 |         r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
 87 |     )
 88 | 
 89 |     result = pattern.sub(replacer, value)
 90 | 
 91 |     return result
 92 | 
 93 | 
 94 | def format_header_param_html5(name, value):
 95 |     """
 96 |     Helper function to format and quote a single header parameter using the
 97 |     HTML5 strategy.
 98 | 
 99 |     Particularly useful for header parameters which might contain
100 |     non-ASCII values, like file names. This follows the `HTML5 Working Draft
101 |     Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
102 | 
103 |     .. _HTML5 Working Draft Section 4.10.22.7:
104 |         https://w3c.github.io/html/sec-forms.html#multipart-form-data
105 | 
106 |     :param name:
107 |         The name of the parameter, a string expected to be ASCII only.
108 |     :param value:
109 |         The value of the parameter, provided as ``bytes`` or `str``.
110 |     :ret:
111 |         A unicode string, stripped of troublesome characters.
112 |     """
113 |     if isinstance(value, six.binary_type):
114 |         value = value.decode("utf-8")
115 | 
116 |     value = _replace_multiple(value, _HTML5_REPLACEMENTS)
117 | 
118 |     return u'%s="%s"' % (name, value)
119 | 
120 | 
121 | # For backwards-compatibility.
122 | format_header_param = format_header_param_html5
123 | 
124 | 
125 | class RequestField(object):
126 |     """
127 |     A data container for request body parameters.
128 | 
129 |     :param name:
130 |         The name of this request field. Must be unicode.
131 |     :param data:
132 |         The data/value body.
133 |     :param filename:
134 |         An optional filename of the request field. Must be unicode.
135 |     :param headers:
136 |         An optional dict-like object of headers to initially use for the field.
137 |     :param header_formatter:
138 |         An optional callable that is used to encode and format the headers. By
139 |         default, this is :func:`format_header_param_html5`.
140 |     """
141 | 
142 |     def __init__(
143 |         self,
144 |         name,
145 |         data,
146 |         filename=None,
147 |         headers=None,
148 |         header_formatter=format_header_param_html5,
149 |     ):
150 |         self._name = name
151 |         self._filename = filename
152 |         self.data = data
153 |         self.headers = {}
154 |         if headers:
155 |             self.headers = dict(headers)
156 |         self.header_formatter = header_formatter
157 | 
158 |     @classmethod
159 |     def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
160 |         """
161 |         A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
162 | 
163 |         Supports constructing :class:`~urllib3.fields.RequestField` from
164 |         parameter of key/value strings AND key/filetuple. A filetuple is a
165 |         (filename, data, MIME type) tuple where the MIME type is optional.
166 |         For example::
167 | 
168 |             'foo': 'bar',
169 |             'fakefile': ('foofile.txt', 'contents of foofile'),
170 |             'realfile': ('barfile.txt', open('realfile').read()),
171 |             'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
172 |             'nonamefile': 'contents of nonamefile field',
173 | 
174 |         Field names and filenames must be unicode.
175 |         """
176 |         if isinstance(value, tuple):
177 |             if len(value) == 3:
178 |                 filename, data, content_type = value
179 |             else:
180 |                 filename, data = value
181 |                 content_type = guess_content_type(filename)
182 |         else:
183 |             filename = None
184 |             content_type = None
185 |             data = value
186 | 
187 |         request_param = cls(
188 |             fieldname, data, filename=filename, header_formatter=header_formatter
189 |         )
190 |         request_param.make_multipart(content_type=content_type)
191 | 
192 |         return request_param
193 | 
194 |     def _render_part(self, name, value):
195 |         """
196 |         Overridable helper function to format a single header parameter. By
197 |         default, this calls ``self.header_formatter``.
198 | 
199 |         :param name:
200 |             The name of the parameter, a string expected to be ASCII only.
201 |         :param value:
202 |             The value of the parameter, provided as a unicode string.
203 |         """
204 | 
205 |         return self.header_formatter(name, value)
206 | 
207 |     def _render_parts(self, header_parts):
208 |         """
209 |         Helper function to format and quote a single header.
210 | 
211 |         Useful for single headers that are composed of multiple items. E.g.,
212 |         'Content-Disposition' fields.
213 | 
214 |         :param header_parts:
215 |             A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
216 |             as `k1="v1"; k2="v2"; ...`.
217 |         """
218 |         parts = []
219 |         iterable = header_parts
220 |         if isinstance(header_parts, dict):
221 |             iterable = header_parts.items()
222 | 
223 |         for name, value in iterable:
224 |             if value is not None:
225 |                 parts.append(self._render_part(name, value))
226 | 
227 |         return u"; ".join(parts)
228 | 
229 |     def render_headers(self):
230 |         """
231 |         Renders the headers for this request field.
232 |         """
233 |         lines = []
234 | 
235 |         sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
236 |         for sort_key in sort_keys:
237 |             if self.headers.get(sort_key, False):
238 |                 lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
239 | 
240 |         for header_name, header_value in self.headers.items():
241 |             if header_name not in sort_keys:
242 |                 if header_value:
243 |                     lines.append(u"%s: %s" % (header_name, header_value))
244 | 
245 |         lines.append(u"\r\n")
246 |         return u"\r\n".join(lines)
247 | 
248 |     def make_multipart(
249 |         self, content_disposition=None, content_type=None, content_location=None
250 |     ):
251 |         """
252 |         Makes this request field into a multipart request field.
253 | 
254 |         This method overrides "Content-Disposition", "Content-Type" and
255 |         "Content-Location" headers to the request parameter.
256 | 
257 |         :param content_type:
258 |             The 'Content-Type' of the request body.
259 |         :param content_location:
260 |             The 'Content-Location' of the request body.
261 | 
262 |         """
263 |         self.headers["Content-Disposition"] = content_disposition or u"form-data"
264 |         self.headers["Content-Disposition"] += u"; ".join(
265 |             [
266 |                 u"",
267 |                 self._render_parts(
268 |                     ((u"name", self._name), (u"filename", self._filename))
269 |                 ),
270 |             ]
271 |         )
272 |         self.headers["Content-Type"] = content_type
273 |         self.headers["Content-Location"] = content_location
274 | 


--------------------------------------------------------------------------------
/chardet/chardistribution.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE,
 29 |                         EUCTW_TYPICAL_DISTRIBUTION_RATIO)
 30 | from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE,
 31 |                         EUCKR_TYPICAL_DISTRIBUTION_RATIO)
 32 | from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE,
 33 |                          GB2312_TYPICAL_DISTRIBUTION_RATIO)
 34 | from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE,
 35 |                        BIG5_TYPICAL_DISTRIBUTION_RATIO)
 36 | from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE,
 37 |                       JIS_TYPICAL_DISTRIBUTION_RATIO)
 38 | 
 39 | 
 40 | class CharDistributionAnalysis(object):
 41 |     ENOUGH_DATA_THRESHOLD = 1024
 42 |     SURE_YES = 0.99
 43 |     SURE_NO = 0.01
 44 |     MINIMUM_DATA_THRESHOLD = 3
 45 | 
 46 |     def __init__(self):
 47 |         # Mapping table to get frequency order from char order (get from
 48 |         # GetOrder())
 49 |         self._char_to_freq_order = None
 50 |         self._table_size = None  # Size of above table
 51 |         # This is a constant value which varies from language to language,
 52 |         # used in calculating confidence.  See
 53 |         # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html
 54 |         # for further detail.
 55 |         self.typical_distribution_ratio = None
 56 |         self._done = None
 57 |         self._total_chars = None
 58 |         self._freq_chars = None
 59 |         self.reset()
 60 | 
 61 |     def reset(self):
 62 |         """reset analyser, clear any state"""
 63 |         # If this flag is set to True, detection is done and conclusion has
 64 |         # been made
 65 |         self._done = False
 66 |         self._total_chars = 0  # Total characters encountered
 67 |         # The number of characters whose frequency order is less than 512
 68 |         self._freq_chars = 0
 69 | 
 70 |     def feed(self, char, char_len):
 71 |         """feed a character with known length"""
 72 |         if char_len == 2:
 73 |             # we only care about 2-bytes character in our distribution analysis
 74 |             order = self.get_order(char)
 75 |         else:
 76 |             order = -1
 77 |         if order >= 0:
 78 |             self._total_chars += 1
 79 |             # order is valid
 80 |             if order < self._table_size:
 81 |                 if 512 > self._char_to_freq_order[order]:
 82 |                     self._freq_chars += 1
 83 | 
 84 |     def get_confidence(self):
 85 |         """return confidence based on existing data"""
 86 |         # if we didn't receive any character in our consideration range,
 87 |         # return negative answer
 88 |         if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD:
 89 |             return self.SURE_NO
 90 | 
 91 |         if self._total_chars != self._freq_chars:
 92 |             r = (self._freq_chars / ((self._total_chars - self._freq_chars)
 93 |                  * self.typical_distribution_ratio))
 94 |             if r < self.SURE_YES:
 95 |                 return r
 96 | 
 97 |         # normalize confidence (we don't want to be 100% sure)
 98 |         return self.SURE_YES
 99 | 
100 |     def got_enough_data(self):
101 |         # It is not necessary to receive all data to draw conclusion.
102 |         # For charset detection, certain amount of data is enough
103 |         return self._total_chars > self.ENOUGH_DATA_THRESHOLD
104 | 
105 |     def get_order(self, byte_str):
106 |         # We do not handle characters based on the original encoding string,
107 |         # but convert this encoding string to a number, here called order.
108 |         # This allows multiple encodings of a language to share one frequency
109 |         # table.
110 |         return -1
111 | 
112 | 
113 | class EUCTWDistributionAnalysis(CharDistributionAnalysis):
114 |     def __init__(self):
115 |         super(EUCTWDistributionAnalysis, self).__init__()
116 |         self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER
117 |         self._table_size = EUCTW_TABLE_SIZE
118 |         self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO
119 | 
120 |     def get_order(self, byte_str):
121 |         # for euc-TW encoding, we are interested
122 |         #   first  byte range: 0xc4 -- 0xfe
123 |         #   second byte range: 0xa1 -- 0xfe
124 |         # no validation needed here. State machine has done that
125 |         first_char = byte_str[0]
126 |         if first_char >= 0xC4:
127 |             return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1
128 |         else:
129 |             return -1
130 | 
131 | 
132 | class EUCKRDistributionAnalysis(CharDistributionAnalysis):
133 |     def __init__(self):
134 |         super(EUCKRDistributionAnalysis, self).__init__()
135 |         self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER
136 |         self._table_size = EUCKR_TABLE_SIZE
137 |         self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO
138 | 
139 |     def get_order(self, byte_str):
140 |         # for euc-KR encoding, we are interested
141 |         #   first  byte range: 0xb0 -- 0xfe
142 |         #   second byte range: 0xa1 -- 0xfe
143 |         # no validation needed here. State machine has done that
144 |         first_char = byte_str[0]
145 |         if first_char >= 0xB0:
146 |             return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1
147 |         else:
148 |             return -1
149 | 
150 | 
151 | class GB2312DistributionAnalysis(CharDistributionAnalysis):
152 |     def __init__(self):
153 |         super(GB2312DistributionAnalysis, self).__init__()
154 |         self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER
155 |         self._table_size = GB2312_TABLE_SIZE
156 |         self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO
157 | 
158 |     def get_order(self, byte_str):
159 |         # for GB2312 encoding, we are interested
160 |         #  first  byte range: 0xb0 -- 0xfe
161 |         #  second byte range: 0xa1 -- 0xfe
162 |         # no validation needed here. State machine has done that
163 |         first_char, second_char = byte_str[0], byte_str[1]
164 |         if (first_char >= 0xB0) and (second_char >= 0xA1):
165 |             return 94 * (first_char - 0xB0) + second_char - 0xA1
166 |         else:
167 |             return -1
168 | 
169 | 
170 | class Big5DistributionAnalysis(CharDistributionAnalysis):
171 |     def __init__(self):
172 |         super(Big5DistributionAnalysis, self).__init__()
173 |         self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER
174 |         self._table_size = BIG5_TABLE_SIZE
175 |         self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO
176 | 
177 |     def get_order(self, byte_str):
178 |         # for big5 encoding, we are interested
179 |         #   first  byte range: 0xa4 -- 0xfe
180 |         #   second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe
181 |         # no validation needed here. State machine has done that
182 |         first_char, second_char = byte_str[0], byte_str[1]
183 |         if first_char >= 0xA4:
184 |             if second_char >= 0xA1:
185 |                 return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63
186 |             else:
187 |                 return 157 * (first_char - 0xA4) + second_char - 0x40
188 |         else:
189 |             return -1
190 | 
191 | 
192 | class SJISDistributionAnalysis(CharDistributionAnalysis):
193 |     def __init__(self):
194 |         super(SJISDistributionAnalysis, self).__init__()
195 |         self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
196 |         self._table_size = JIS_TABLE_SIZE
197 |         self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
198 | 
199 |     def get_order(self, byte_str):
200 |         # for sjis encoding, we are interested
201 |         #   first  byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe
202 |         #   second byte range: 0x40 -- 0x7e,  0x81 -- oxfe
203 |         # no validation needed here. State machine has done that
204 |         first_char, second_char = byte_str[0], byte_str[1]
205 |         if (first_char >= 0x81) and (first_char <= 0x9F):
206 |             order = 188 * (first_char - 0x81)
207 |         elif (first_char >= 0xE0) and (first_char <= 0xEF):
208 |             order = 188 * (first_char - 0xE0 + 31)
209 |         else:
210 |             return -1
211 |         order = order + second_char - 0x40
212 |         if second_char > 0x7F:
213 |             order = -1
214 |         return order
215 | 
216 | 
217 | class EUCJPDistributionAnalysis(CharDistributionAnalysis):
218 |     def __init__(self):
219 |         super(EUCJPDistributionAnalysis, self).__init__()
220 |         self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER
221 |         self._table_size = JIS_TABLE_SIZE
222 |         self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO
223 | 
224 |     def get_order(self, byte_str):
225 |         # for euc-JP encoding, we are interested
226 |         #   first  byte range: 0xa0 -- 0xfe
227 |         #   second byte range: 0xa1 -- 0xfe
228 |         # no validation needed here. State machine has done that
229 |         char = byte_str[0]
230 |         if char >= 0xA0:
231 |             return 94 * (char - 0xA1) + byte_str[1] - 0xa1
232 |         else:
233 |             return -1
234 | 


--------------------------------------------------------------------------------
/urllib3/util/timeout.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | # The default socket timeout, used by httplib to indicate that no timeout was
  4 | # specified by the user
  5 | from socket import _GLOBAL_DEFAULT_TIMEOUT
  6 | import time
  7 | 
  8 | from ..exceptions import TimeoutStateError
  9 | 
 10 | # A sentinel value to indicate that no timeout was specified by the user in
 11 | # urllib3
 12 | _Default = object()
 13 | 
 14 | 
 15 | # Use time.monotonic if available.
 16 | current_time = getattr(time, "monotonic", time.time)
 17 | 
 18 | 
 19 | class Timeout(object):
 20 |     """ Timeout configuration.
 21 | 
 22 |     Timeouts can be defined as a default for a pool::
 23 | 
 24 |         timeout = Timeout(connect=2.0, read=7.0)
 25 |         http = PoolManager(timeout=timeout)
 26 |         response = http.request('GET', 'http://example.com/')
 27 | 
 28 |     Or per-request (which overrides the default for the pool)::
 29 | 
 30 |         response = http.request('GET', 'http://example.com/', timeout=Timeout(10))
 31 | 
 32 |     Timeouts can be disabled by setting all the parameters to ``None``::
 33 | 
 34 |         no_timeout = Timeout(connect=None, read=None)
 35 |         response = http.request('GET', 'http://example.com/, timeout=no_timeout)
 36 | 
 37 | 
 38 |     :param total:
 39 |         This combines the connect and read timeouts into one; the read timeout
 40 |         will be set to the time leftover from the connect attempt. In the
 41 |         event that both a connect timeout and a total are specified, or a read
 42 |         timeout and a total are specified, the shorter timeout will be applied.
 43 | 
 44 |         Defaults to None.
 45 | 
 46 |     :type total: integer, float, or None
 47 | 
 48 |     :param connect:
 49 |         The maximum amount of time (in seconds) to wait for a connection
 50 |         attempt to a server to succeed. Omitting the parameter will default the
 51 |         connect timeout to the system default, probably `the global default
 52 |         timeout in socket.py
 53 |         <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
 54 |         None will set an infinite timeout for connection attempts.
 55 | 
 56 |     :type connect: integer, float, or None
 57 | 
 58 |     :param read:
 59 |         The maximum amount of time (in seconds) to wait between consecutive
 60 |         read operations for a response from the server. Omitting the parameter
 61 |         will default the read timeout to the system default, probably `the
 62 |         global default timeout in socket.py
 63 |         <http://hg.python.org/cpython/file/603b4d593758/Lib/socket.py#l535>`_.
 64 |         None will set an infinite timeout.
 65 | 
 66 |     :type read: integer, float, or None
 67 | 
 68 |     .. note::
 69 | 
 70 |         Many factors can affect the total amount of time for urllib3 to return
 71 |         an HTTP response.
 72 | 
 73 |         For example, Python's DNS resolver does not obey the timeout specified
 74 |         on the socket. Other factors that can affect total request time include
 75 |         high CPU load, high swap, the program running at a low priority level,
 76 |         or other behaviors.
 77 | 
 78 |         In addition, the read and total timeouts only measure the time between
 79 |         read operations on the socket connecting the client and the server,
 80 |         not the total amount of time for the request to return a complete
 81 |         response. For most requests, the timeout is raised because the server
 82 |         has not sent the first byte in the specified time. This is not always
 83 |         the case; if a server streams one byte every fifteen seconds, a timeout
 84 |         of 20 seconds will not trigger, even though the request will take
 85 |         several minutes to complete.
 86 | 
 87 |         If your goal is to cut off any request after a set amount of wall clock
 88 |         time, consider having a second "watcher" thread to cut off a slow
 89 |         request.
 90 |     """
 91 | 
 92 |     #: A sentinel object representing the default timeout value
 93 |     DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT
 94 | 
 95 |     def __init__(self, total=None, connect=_Default, read=_Default):
 96 |         self._connect = self._validate_timeout(connect, "connect")
 97 |         self._read = self._validate_timeout(read, "read")
 98 |         self.total = self._validate_timeout(total, "total")
 99 |         self._start_connect = None
100 | 
101 |     def __repr__(self):
102 |         return "%s(connect=%r, read=%r, total=%r)" % (
103 |             type(self).__name__,
104 |             self._connect,
105 |             self._read,
106 |             self.total,
107 |         )
108 | 
109 |     # __str__ provided for backwards compatibility
110 |     __str__ = __repr__
111 | 
112 |     @classmethod
113 |     def _validate_timeout(cls, value, name):
114 |         """ Check that a timeout attribute is valid.
115 | 
116 |         :param value: The timeout value to validate
117 |         :param name: The name of the timeout attribute to validate. This is
118 |             used to specify in error messages.
119 |         :return: The validated and casted version of the given value.
120 |         :raises ValueError: If it is a numeric value less than or equal to
121 |             zero, or the type is not an integer, float, or None.
122 |         """
123 |         if value is _Default:
124 |             return cls.DEFAULT_TIMEOUT
125 | 
126 |         if value is None or value is cls.DEFAULT_TIMEOUT:
127 |             return value
128 | 
129 |         if isinstance(value, bool):
130 |             raise ValueError(
131 |                 "Timeout cannot be a boolean value. It must "
132 |                 "be an int, float or None."
133 |             )
134 |         try:
135 |             float(value)
136 |         except (TypeError, ValueError):
137 |             raise ValueError(
138 |                 "Timeout value %s was %s, but it must be an "
139 |                 "int, float or None." % (name, value)
140 |             )
141 | 
142 |         try:
143 |             if value <= 0:
144 |                 raise ValueError(
145 |                     "Attempted to set %s timeout to %s, but the "
146 |                     "timeout cannot be set to a value less "
147 |                     "than or equal to 0." % (name, value)
148 |                 )
149 |         except TypeError:
150 |             # Python 3
151 |             raise ValueError(
152 |                 "Timeout value %s was %s, but it must be an "
153 |                 "int, float or None." % (name, value)
154 |             )
155 | 
156 |         return value
157 | 
158 |     @classmethod
159 |     def from_float(cls, timeout):
160 |         """ Create a new Timeout from a legacy timeout value.
161 | 
162 |         The timeout value used by httplib.py sets the same timeout on the
163 |         connect(), and recv() socket requests. This creates a :class:`Timeout`
164 |         object that sets the individual timeouts to the ``timeout`` value
165 |         passed to this function.
166 | 
167 |         :param timeout: The legacy timeout value.
168 |         :type timeout: integer, float, sentinel default object, or None
169 |         :return: Timeout object
170 |         :rtype: :class:`Timeout`
171 |         """
172 |         return Timeout(read=timeout, connect=timeout)
173 | 
174 |     def clone(self):
175 |         """ Create a copy of the timeout object
176 | 
177 |         Timeout properties are stored per-pool but each request needs a fresh
178 |         Timeout object to ensure each one has its own start/stop configured.
179 | 
180 |         :return: a copy of the timeout object
181 |         :rtype: :class:`Timeout`
182 |         """
183 |         # We can't use copy.deepcopy because that will also create a new object
184 |         # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to
185 |         # detect the user default.
186 |         return Timeout(connect=self._connect, read=self._read, total=self.total)
187 | 
188 |     def start_connect(self):
189 |         """ Start the timeout clock, used during a connect() attempt
190 | 
191 |         :raises urllib3.exceptions.TimeoutStateError: if you attempt
192 |             to start a timer that has been started already.
193 |         """
194 |         if self._start_connect is not None:
195 |             raise TimeoutStateError("Timeout timer has already been started.")
196 |         self._start_connect = current_time()
197 |         return self._start_connect
198 | 
199 |     def get_connect_duration(self):
200 |         """ Gets the time elapsed since the call to :meth:`start_connect`.
201 | 
202 |         :return: Elapsed time in seconds.
203 |         :rtype: float
204 |         :raises urllib3.exceptions.TimeoutStateError: if you attempt
205 |             to get duration for a timer that hasn't been started.
206 |         """
207 |         if self._start_connect is None:
208 |             raise TimeoutStateError(
209 |                 "Can't get connect duration for timer that has not started."
210 |             )
211 |         return current_time() - self._start_connect
212 | 
213 |     @property
214 |     def connect_timeout(self):
215 |         """ Get the value to use when setting a connection timeout.
216 | 
217 |         This will be a positive float or integer, the value None
218 |         (never timeout), or the default system timeout.
219 | 
220 |         :return: Connect timeout.
221 |         :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
222 |         """
223 |         if self.total is None:
224 |             return self._connect
225 | 
226 |         if self._connect is None or self._connect is self.DEFAULT_TIMEOUT:
227 |             return self.total
228 | 
229 |         return min(self._connect, self.total)
230 | 
231 |     @property
232 |     def read_timeout(self):
233 |         """ Get the value for the read timeout.
234 | 
235 |         This assumes some time has elapsed in the connection timeout and
236 |         computes the read timeout appropriately.
237 | 
238 |         If self.total is set, the read timeout is dependent on the amount of
239 |         time taken by the connect timeout. If the connection time has not been
240 |         established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be
241 |         raised.
242 | 
243 |         :return: Value to use for the read timeout.
244 |         :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None
245 |         :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect`
246 |             has not yet been called on this object.
247 |         """
248 |         if (
249 |             self.total is not None
250 |             and self.total is not self.DEFAULT_TIMEOUT
251 |             and self._read is not None
252 |             and self._read is not self.DEFAULT_TIMEOUT
253 |         ):
254 |             # In case the connect timeout has not yet been established.
255 |             if self._start_connect is None:
256 |                 return self._read
257 |             return max(0, min(self.total - self.get_connect_duration(), self._read))
258 |         elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT:
259 |             return max(0, self.total - self.get_connect_duration())
260 |         else:
261 |             return self._read
262 | 


--------------------------------------------------------------------------------