├── .gitignore ├── chardet ├── cli │ ├── __init__.py │ └── chardetect.py ├── version.py ├── compat.py ├── __init__.py ├── euctwprober.py ├── euckrprober.py ├── gb2312prober.py ├── big5prober.py ├── enums.py ├── cp949prober.py ├── mbcsgroupprober.py ├── utf8prober.py ├── mbcharsetprober.py ├── sbcsgroupprober.py ├── codingstatemachine.py ├── eucjpprober.py ├── sjisprober.py ├── charsetgroupprober.py ├── escprober.py ├── charsetprober.py ├── latin1prober.py ├── sbcharsetprober.py └── chardistribution.py ├── urllib3 ├── contrib │ ├── __init__.py │ ├── _securetransport │ │ └── __init__.py │ ├── _appengine_environ.py │ ├── ntlmpool.py │ └── socks.py ├── packages │ ├── backports │ │ ├── __init__.py │ │ └── makefile.py │ ├── __init__.py │ └── ssl_match_hostname │ │ ├── __init__.py │ │ └── _implementation.py ├── util │ ├── queue.py │ ├── __init__.py │ ├── response.py │ ├── request.py │ ├── connection.py │ ├── wait.py │ └── timeout.py ├── filepost.py ├── __init__.py ├── request.py ├── exceptions.py └── fields.py ├── idna ├── package_data.py ├── __init__.py ├── compat.py ├── intranges.py └── codec.py ├── fuzzywuzzy ├── __init__.py ├── string_processing.py ├── StringMatcher.py └── utils.py ├── certifi ├── __init__.py ├── __main__.py └── core.py ├── lambda_function.zip ├── Levenshtein ├── _levenshtein.so ├── __init__.py └── StringMatcher.py ├── scripts ├── prep_for_commit.sh └── download_lambda_function.sh └── requests ├── __version__.py ├── certs.py ├── packages.py ├── hooks.py ├── _internal_utils.py ├── compat.py ├── structures.py ├── exceptions.py ├── help.py ├── __init__.py ├── status_codes.py └── api.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /chardet/cli/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /urllib3/packages/backports/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /urllib3/contrib/_securetransport/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /idna/package_data.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.9' 2 | 3 | -------------------------------------------------------------------------------- /fuzzywuzzy/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | __version__ = '0.16.0' 3 | -------------------------------------------------------------------------------- /idna/__init__.py: -------------------------------------------------------------------------------- 1 | from .package_data import __version__ 2 | from .core import * 3 | -------------------------------------------------------------------------------- /certifi/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import contents, where 2 | 3 | __version__ = "2020.04.05.1" 4 | -------------------------------------------------------------------------------- /lambda_function.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndg63276/alexa-sky-hd/HEAD/lambda_function.zip -------------------------------------------------------------------------------- /Levenshtein/_levenshtein.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndg63276/alexa-sky-hd/HEAD/Levenshtein/_levenshtein.so -------------------------------------------------------------------------------- /Levenshtein/__init__.py: -------------------------------------------------------------------------------- 1 | from Levenshtein import _levenshtein 2 | from Levenshtein._levenshtein import * 3 | 4 | __doc__ = _levenshtein.__doc__ 5 | -------------------------------------------------------------------------------- /urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | __all__ = ("ssl_match_hostname",) 6 | -------------------------------------------------------------------------------- /chardet/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module exists only to simplify retrieving the version number of chardet 3 | from within setup.py and from chardet subpackages. 4 | 5 | :author: Dan Blanchard (dan.blanchard@gmail.com) 6 | """ 7 | 8 | __version__ = "3.0.4" 9 | VERSION = __version__.split('.') 10 | -------------------------------------------------------------------------------- /idna/compat.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | from .codec import * 3 | 4 | def ToASCII(label): 5 | return encode(label) 6 | 7 | def ToUnicode(label): 8 | return decode(label) 9 | 10 | def nameprep(s): 11 | raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") 12 | 13 | -------------------------------------------------------------------------------- /certifi/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from certifi import contents, where 4 | 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("-c", "--contents", action="store_true") 7 | args = parser.parse_args() 8 | 9 | if args.contents: 10 | print(contents()) 11 | else: 12 | print(where()) 13 | -------------------------------------------------------------------------------- /scripts/prep_for_commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gitroot=`git rev-parse --show-toplevel` 4 | rm $gitroot/lambda_function.zip 2>/dev/null 5 | find . -type f -name *~ -exec rm -rf {} \; 6 | find . -type d -name __pycache__ -exec rm -rf {} \; 7 | find . -type f -name *.py[cod] -exec rm -rf {} \; 8 | find . -type f -name *\$py.class -exec rm -rf {} \; 9 | 10 | 11 | GLOBIGNORE="*" 12 | command="cd $gitroot; zip -r lambda_function.zip * -x '.git*' -x 'scripts*'" 13 | echo $command 14 | -------------------------------------------------------------------------------- /scripts/download_lambda_function.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | gitroot=`git rev-parse --show-toplevel` 4 | 5 | if [[ $1 == "" ]]; then 6 | echo "Specify full arn" 7 | exit 8 | else 9 | arn="$1" 10 | region=`echo $arn | cut -d: -f4` 11 | fi 12 | 13 | url=$(aws lambda --region $region get-function --function-name $arn | grep Location | cut -d'"' -f4) 14 | wget -O /tmp/aws.zip "$url" 15 | unzip -o /tmp/aws.zip lambda_function.py -d $gitroot 16 | rm /tmp/aws.zip 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /requests/__version__.py: -------------------------------------------------------------------------------- 1 | # .-. .-. .-. . . .-. .-. .-. .-. 2 | # |( |- |.| | | |- `-. | `-. 3 | # ' ' `-' `-`.`-' `-' `-' ' `-' 4 | 5 | __title__ = 'requests' 6 | __description__ = 'Python HTTP for Humans.' 7 | __url__ = 'https://requests.readthedocs.io' 8 | __version__ = '2.23.0' 9 | __build__ = 0x022300 10 | __author__ = 'Kenneth Reitz' 11 | __author_email__ = 'me@kennethreitz.org' 12 | __license__ = 'Apache 2.0' 13 | __copyright__ = 'Copyright 2020 Kenneth Reitz' 14 | __cake__ = u'\u2728 \U0001f370 \u2728' 15 | -------------------------------------------------------------------------------- /requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | requests.certs 6 | ~~~~~~~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. There is 9 | only one — the one from the certifi package. 10 | 11 | If you are packaging Requests, e.g., for a Linux distribution or a managed 12 | environment, you can change the definition of where() to return a separately 13 | packaged CA bundle. 14 | """ 15 | from certifi import where 16 | 17 | if __name__ == '__main__': 18 | print(where()) 19 | -------------------------------------------------------------------------------- /urllib3/util/queue.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from ..packages import six 3 | from ..packages.six.moves import queue 4 | 5 | if six.PY2: 6 | # Queue is imported for side effects on MS Windows. See issue #229. 7 | import Queue as _unused_module_Queue # noqa: F401 8 | 9 | 10 | class LifoQueue(queue.Queue): 11 | def _init(self, _): 12 | self.queue = collections.deque() 13 | 14 | def _qsize(self, len=len): 15 | return len(self.queue) 16 | 17 | def _put(self, item): 18 | self.queue.append(item) 19 | 20 | def _get(self): 21 | return self.queue.pop() 22 | -------------------------------------------------------------------------------- /requests/packages.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | # This code exists for backwards compatibility reasons. 4 | # I don't like it either. Just look the other way. :) 5 | 6 | for package in ('urllib3', 'idna', 'chardet'): 7 | locals()[package] = __import__(package) 8 | # This traversal is apparently necessary such that the identities are 9 | # preserved (requests.packages.urllib3.* is urllib3.*) 10 | for mod in list(sys.modules): 11 | if mod == package or mod.startswith(package + '.'): 12 | sys.modules['requests.packages.' + mod] = sys.modules[mod] 13 | 14 | # Kinda cool, though, right? 15 | -------------------------------------------------------------------------------- /urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | try: 4 | # Our match_hostname function is the same as 3.5's, so we only want to 5 | # import the match_hostname function if it's at least that good. 6 | if sys.version_info < (3, 5): 7 | raise ImportError("Fallback to vendored code") 8 | 9 | from ssl import CertificateError, match_hostname 10 | except ImportError: 11 | try: 12 | # Backport of the function from a pypi module 13 | from backports.ssl_match_hostname import CertificateError, match_hostname 14 | except ImportError: 15 | # Our vendored copy 16 | from ._implementation import CertificateError, match_hostname 17 | 18 | # Not needed, but documenting what we provide. 19 | __all__ = ("CertificateError", "match_hostname") 20 | -------------------------------------------------------------------------------- /requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | """ 14 | HOOKS = ['response'] 15 | 16 | 17 | def default_hooks(): 18 | return {event: [] for event in HOOKS} 19 | 20 | # TODO: response is the only one 21 | 22 | 23 | def dispatch_hook(key, hooks, hook_data, **kwargs): 24 | """Dispatches a hook dictionary on a given piece of data.""" 25 | hooks = hooks or {} 26 | hooks = hooks.get(key) 27 | if hooks: 28 | if hasattr(hooks, '__call__'): 29 | hooks = [hooks] 30 | for hook in hooks: 31 | _hook_data = hook(hook_data, **kwargs) 32 | if _hook_data is not None: 33 | hook_data = _hook_data 34 | return hook_data 35 | -------------------------------------------------------------------------------- /certifi/core.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | certifi.py 5 | ~~~~~~~~~~ 6 | 7 | This module returns the installation location of cacert.pem or its contents. 8 | """ 9 | import os 10 | 11 | try: 12 | from importlib.resources import read_text 13 | except ImportError: 14 | # This fallback will work for Python versions prior to 3.7 that lack the 15 | # importlib.resources module but relies on the existing `where` function 16 | # so won't address issues with environments like PyOxidizer that don't set 17 | # __file__ on modules. 18 | def read_text(_module, _path, encoding="ascii"): 19 | with open(where(), "r", encoding=encoding) as data: 20 | return data.read() 21 | 22 | 23 | def where(): 24 | f = os.path.dirname(__file__) 25 | 26 | return os.path.join(f, "cacert.pem") 27 | 28 | 29 | def contents(): 30 | return read_text("certifi", "cacert.pem", encoding="ascii") 31 | -------------------------------------------------------------------------------- /fuzzywuzzy/string_processing.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import re 3 | import string 4 | import sys 5 | 6 | PY3 = sys.version_info[0] == 3 7 | if PY3: 8 | string = str 9 | 10 | 11 | class StringProcessor(object): 12 | """ 13 | This class defines method to process strings in the most 14 | efficient way. Ideally all the methods below use unicode strings 15 | for both input and output. 16 | """ 17 | 18 | regex = re.compile(r"(?ui)\W") 19 | 20 | @classmethod 21 | def replace_non_letters_non_numbers_with_whitespace(cls, a_string): 22 | """ 23 | This function replaces any sequence of non letters and non 24 | numbers with a single white space. 25 | """ 26 | return cls.regex.sub(" ", a_string) 27 | 28 | strip = staticmethod(string.strip) 29 | to_lower_case = staticmethod(string.lower) 30 | to_upper_case = staticmethod(string.upper) 31 | -------------------------------------------------------------------------------- /urllib3/contrib/_appengine_environ.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module provides means to detect the App Engine environment. 3 | """ 4 | 5 | import os 6 | 7 | 8 | def is_appengine(): 9 | return is_local_appengine() or is_prod_appengine() 10 | 11 | 12 | def is_appengine_sandbox(): 13 | """Reports if the app is running in the first generation sandbox. 14 | 15 | The second generation runtimes are technically still in a sandbox, but it 16 | is much less restrictive, so generally you shouldn't need to check for it. 17 | see https://cloud.google.com/appengine/docs/standard/runtimes 18 | """ 19 | return is_appengine() and os.environ["APPENGINE_RUNTIME"] == "python27" 20 | 21 | 22 | def is_local_appengine(): 23 | return "APPENGINE_RUNTIME" in os.environ and os.environ.get( 24 | "SERVER_SOFTWARE", "" 25 | ).startswith("Development/") 26 | 27 | 28 | def is_prod_appengine(): 29 | return "APPENGINE_RUNTIME" in os.environ and os.environ.get( 30 | "SERVER_SOFTWARE", "" 31 | ).startswith("Google App Engine/") 32 | 33 | 34 | def is_prod_appengine_mvms(): 35 | """Deprecated.""" 36 | return False 37 | -------------------------------------------------------------------------------- /urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # For backwards compatibility, provide imports that used to be here. 4 | from .connection import is_connection_dropped 5 | from .request import make_headers 6 | from .response import is_fp_closed 7 | from .ssl_ import ( 8 | SSLContext, 9 | HAS_SNI, 10 | IS_PYOPENSSL, 11 | IS_SECURETRANSPORT, 12 | assert_fingerprint, 13 | resolve_cert_reqs, 14 | resolve_ssl_version, 15 | ssl_wrap_socket, 16 | PROTOCOL_TLS, 17 | ) 18 | from .timeout import current_time, Timeout 19 | 20 | from .retry import Retry 21 | from .url import get_host, parse_url, split_first, Url 22 | from .wait import wait_for_read, wait_for_write 23 | 24 | __all__ = ( 25 | "HAS_SNI", 26 | "IS_PYOPENSSL", 27 | "IS_SECURETRANSPORT", 28 | "SSLContext", 29 | "PROTOCOL_TLS", 30 | "Retry", 31 | "Timeout", 32 | "Url", 33 | "assert_fingerprint", 34 | "current_time", 35 | "is_connection_dropped", 36 | "is_fp_closed", 37 | "get_host", 38 | "parse_url", 39 | "make_headers", 40 | "resolve_cert_reqs", 41 | "resolve_ssl_version", 42 | "split_first", 43 | "ssl_wrap_socket", 44 | "wait_for_read", 45 | "wait_for_write", 46 | ) 47 | -------------------------------------------------------------------------------- /chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Dan Blanchard 4 | # Ian Cordasco 5 | # 6 | # This library is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU Lesser General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2.1 of the License, or (at your option) any later version. 10 | # 11 | # This library is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public 17 | # License along with this library; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 19 | # 02110-1301 USA 20 | ######################### END LICENSE BLOCK ######################### 21 | 22 | import sys 23 | 24 | 25 | if sys.version_info < (3, 0): 26 | PY2 = True 27 | PY3 = False 28 | base_str = (str, unicode) 29 | text_type = unicode 30 | else: 31 | PY2 = False 32 | PY3 = True 33 | base_str = (bytes, str) 34 | text_type = str 35 | -------------------------------------------------------------------------------- /requests/_internal_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests._internal_utils 5 | ~~~~~~~~~~~~~~ 6 | 7 | Provides utility functions that are consumed internally by Requests 8 | which depend on extremely few external helpers (such as compat) 9 | """ 10 | 11 | from .compat import is_py2, builtin_str, str 12 | 13 | 14 | def to_native_string(string, encoding='ascii'): 15 | """Given a string object, regardless of type, returns a representation of 16 | that string in the native string type, encoding and decoding where 17 | necessary. This assumes ASCII unless told otherwise. 18 | """ 19 | if isinstance(string, builtin_str): 20 | out = string 21 | else: 22 | if is_py2: 23 | out = string.encode(encoding) 24 | else: 25 | out = string.decode(encoding) 26 | 27 | return out 28 | 29 | 30 | def unicode_is_ascii(u_string): 31 | """Determine if unicode string only contains ASCII characters. 32 | 33 | :param str u_string: unicode string to check. Must be unicode 34 | and not Python 2 `str`. 35 | :rtype: bool 36 | """ 37 | assert isinstance(u_string, str) 38 | try: 39 | u_string.encode('ascii') 40 | return True 41 | except UnicodeEncodeError: 42 | return False 43 | -------------------------------------------------------------------------------- /urllib3/packages/backports/makefile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | backports.makefile 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that 7 | wants to create a "fake" socket object. 8 | """ 9 | import io 10 | 11 | from socket import SocketIO 12 | 13 | 14 | def backport_makefile( 15 | self, mode="r", buffering=None, encoding=None, errors=None, newline=None 16 | ): 17 | """ 18 | Backport of ``socket.makefile`` from Python 3.5. 19 | """ 20 | if not set(mode) <= {"r", "w", "b"}: 21 | raise ValueError("invalid mode %r (only r, w, b allowed)" % (mode,)) 22 | writing = "w" in mode 23 | reading = "r" in mode or not writing 24 | assert reading or writing 25 | binary = "b" in mode 26 | rawmode = "" 27 | if reading: 28 | rawmode += "r" 29 | if writing: 30 | rawmode += "w" 31 | raw = SocketIO(self, rawmode) 32 | self._makefile_refs += 1 33 | if buffering is None: 34 | buffering = -1 35 | if buffering < 0: 36 | buffering = io.DEFAULT_BUFFER_SIZE 37 | if buffering == 0: 38 | if not binary: 39 | raise ValueError("unbuffered streams must be binary") 40 | return raw 41 | if reading and writing: 42 | buffer = io.BufferedRWPair(raw, raw, buffering) 43 | elif reading: 44 | buffer = io.BufferedReader(raw, buffering) 45 | else: 46 | assert writing 47 | buffer = io.BufferedWriter(raw, buffering) 48 | if binary: 49 | return buffer 50 | text = io.TextIOWrapper(buffer, encoding, errors, newline) 51 | text.mode = mode 52 | return text 53 | -------------------------------------------------------------------------------- /chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | 19 | from .compat import PY2, PY3 20 | from .universaldetector import UniversalDetector 21 | from .version import __version__, VERSION 22 | 23 | 24 | def detect(byte_str): 25 | """ 26 | Detect the encoding of the given byte string. 27 | 28 | :param byte_str: The byte sequence to examine. 29 | :type byte_str: ``bytes`` or ``bytearray`` 30 | """ 31 | if not isinstance(byte_str, bytearray): 32 | if not isinstance(byte_str, bytes): 33 | raise TypeError('Expected object of type bytes or bytearray, got: ' 34 | '{0}'.format(type(byte_str))) 35 | else: 36 | byte_str = bytearray(byte_str) 37 | detector = UniversalDetector() 38 | detector.feed(byte_str) 39 | return detector.close() 40 | -------------------------------------------------------------------------------- /chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTW_SM_MODEL 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(EUCTWProber, self).__init__() 36 | self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) 37 | self.distribution_analyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "EUC-TW" 43 | 44 | @property 45 | def language(self): 46 | return "Taiwan" 47 | -------------------------------------------------------------------------------- /chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKR_SM_MODEL 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(EUCKRProber, self).__init__() 37 | self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) 38 | self.distribution_analyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "EUC-KR" 44 | 45 | @property 46 | def language(self): 47 | return "Korean" 48 | -------------------------------------------------------------------------------- /chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312_SM_MODEL 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(GB2312Prober, self).__init__() 36 | self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) 37 | self.distribution_analyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "GB2312" 43 | 44 | @property 45 | def language(self): 46 | return "Chinese" 47 | -------------------------------------------------------------------------------- /chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import BIG5_SM_MODEL 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(Big5Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) 38 | self.distribution_analyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "Big5" 44 | 45 | @property 46 | def language(self): 47 | return "Chinese" 48 | -------------------------------------------------------------------------------- /idna/intranges.py: -------------------------------------------------------------------------------- 1 | """ 2 | Given a list of integers, made up of (hopefully) a small number of long runs 3 | of consecutive integers, compute a representation of the form 4 | ((start1, end1), (start2, end2) ...). Then answer the question "was x present 5 | in the original list?" in time O(log(# runs)). 6 | """ 7 | 8 | import bisect 9 | 10 | def intranges_from_list(list_): 11 | """Represent a list of integers as a sequence of ranges: 12 | ((start_0, end_0), (start_1, end_1), ...), such that the original 13 | integers are exactly those x such that start_i <= x < end_i for some i. 14 | 15 | Ranges are encoded as single integers (start << 32 | end), not as tuples. 16 | """ 17 | 18 | sorted_list = sorted(list_) 19 | ranges = [] 20 | last_write = -1 21 | for i in range(len(sorted_list)): 22 | if i+1 < len(sorted_list): 23 | if sorted_list[i] == sorted_list[i+1]-1: 24 | continue 25 | current_range = sorted_list[last_write+1:i+1] 26 | ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) 27 | last_write = i 28 | 29 | return tuple(ranges) 30 | 31 | def _encode_range(start, end): 32 | return (start << 32) | end 33 | 34 | def _decode_range(r): 35 | return (r >> 32), (r & ((1 << 32) - 1)) 36 | 37 | 38 | def intranges_contain(int_, ranges): 39 | """Determine if `int_` falls into one of the ranges in `ranges`.""" 40 | tuple_ = _encode_range(int_, 0) 41 | pos = bisect.bisect_left(ranges, tuple_) 42 | # we could be immediately ahead of a tuple (start, end) 43 | # with start < int_ <= end 44 | if pos > 0: 45 | left, right = _decode_range(ranges[pos-1]) 46 | if left <= int_ < right: 47 | return True 48 | # or we could be immediately behind a tuple (int_, end) 49 | if pos < len(ranges): 50 | left, _ = _decode_range(ranges[pos]) 51 | if left == int_: 52 | return True 53 | return False 54 | -------------------------------------------------------------------------------- /chardet/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | All of the Enums that are used throughout the chardet package. 3 | 4 | :author: Dan Blanchard (dan.blanchard@gmail.com) 5 | """ 6 | 7 | 8 | class InputState(object): 9 | """ 10 | This enum represents the different states a universal detector can be in. 11 | """ 12 | PURE_ASCII = 0 13 | ESC_ASCII = 1 14 | HIGH_BYTE = 2 15 | 16 | 17 | class LanguageFilter(object): 18 | """ 19 | This enum represents the different language filters we can apply to a 20 | ``UniversalDetector``. 21 | """ 22 | CHINESE_SIMPLIFIED = 0x01 23 | CHINESE_TRADITIONAL = 0x02 24 | JAPANESE = 0x04 25 | KOREAN = 0x08 26 | NON_CJK = 0x10 27 | ALL = 0x1F 28 | CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 29 | CJK = CHINESE | JAPANESE | KOREAN 30 | 31 | 32 | class ProbingState(object): 33 | """ 34 | This enum represents the different states a prober can be in. 35 | """ 36 | DETECTING = 0 37 | FOUND_IT = 1 38 | NOT_ME = 2 39 | 40 | 41 | class MachineState(object): 42 | """ 43 | This enum represents the different states a state machine can be in. 44 | """ 45 | START = 0 46 | ERROR = 1 47 | ITS_ME = 2 48 | 49 | 50 | class SequenceLikelihood(object): 51 | """ 52 | This enum represents the likelihood of a character following the previous one. 53 | """ 54 | NEGATIVE = 0 55 | UNLIKELY = 1 56 | LIKELY = 2 57 | POSITIVE = 3 58 | 59 | @classmethod 60 | def get_num_categories(cls): 61 | """:returns: The number of likelihood categories in the enum.""" 62 | return 4 63 | 64 | 65 | class CharacterCategory(object): 66 | """ 67 | This enum represents the different categories language models for 68 | ``SingleByteCharsetProber`` put characters into. 69 | 70 | Anything less than CONTROL is considered a letter. 71 | """ 72 | UNDEFINED = 255 73 | LINE_BREAK = 254 74 | SYMBOL = 253 75 | DIGIT = 252 76 | CONTROL = 251 77 | -------------------------------------------------------------------------------- /chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .chardistribution import EUCKRDistributionAnalysis 29 | from .codingstatemachine import CodingStateMachine 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .mbcssm import CP949_SM_MODEL 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(CP949Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(CP949_SM_MODEL) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self.distribution_analyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | @property 44 | def charset_name(self): 45 | return "CP949" 46 | 47 | @property 48 | def language(self): 49 | return "Korean" 50 | -------------------------------------------------------------------------------- /requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.compat 5 | ~~~~~~~~~~~~~~~ 6 | 7 | This module handles import compatibility issues between Python 2 and 8 | Python 3. 9 | """ 10 | 11 | import chardet 12 | 13 | import sys 14 | 15 | # ------- 16 | # Pythons 17 | # ------- 18 | 19 | # Syntax sugar. 20 | _ver = sys.version_info 21 | 22 | #: Python 2.x? 23 | is_py2 = (_ver[0] == 2) 24 | 25 | #: Python 3.x? 26 | is_py3 = (_ver[0] == 3) 27 | 28 | try: 29 | import simplejson as json 30 | except ImportError: 31 | import json 32 | 33 | # --------- 34 | # Specifics 35 | # --------- 36 | 37 | if is_py2: 38 | from urllib import ( 39 | quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, 40 | proxy_bypass, proxy_bypass_environment, getproxies_environment) 41 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 42 | from urllib2 import parse_http_list 43 | import cookielib 44 | from Cookie import Morsel 45 | from StringIO import StringIO 46 | # Keep OrderedDict for backwards compatibility. 47 | from collections import Callable, Mapping, MutableMapping, OrderedDict 48 | 49 | 50 | builtin_str = str 51 | bytes = str 52 | str = unicode 53 | basestring = basestring 54 | numeric_types = (int, long, float) 55 | integer_types = (int, long) 56 | 57 | elif is_py3: 58 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 59 | from urllib.request import parse_http_list, getproxies, proxy_bypass, proxy_bypass_environment, getproxies_environment 60 | from http import cookiejar as cookielib 61 | from http.cookies import Morsel 62 | from io import StringIO 63 | # Keep OrderedDict for backwards compatibility. 64 | from collections import OrderedDict 65 | from collections.abc import Callable, Mapping, MutableMapping 66 | 67 | builtin_str = str 68 | str = str 69 | bytes = bytes 70 | basestring = (str, bytes) 71 | numeric_types = (int, float) 72 | integer_types = (int,) 73 | -------------------------------------------------------------------------------- /chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self, lang_filter=None): 43 | super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) 44 | self.probers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /Levenshtein/StringMatcher.py: -------------------------------------------------------------------------------- 1 | from Levenshtein import * 2 | from warnings import warn 3 | 4 | class StringMatcher: 5 | """A SequenceMatcher-like class built on the top of Levenshtein""" 6 | 7 | def _reset_cache(self): 8 | self._ratio = self._distance = None 9 | self._opcodes = self._editops = self._matching_blocks = None 10 | 11 | def __init__(self, isjunk=None, seq1='', seq2=''): 12 | if isjunk: 13 | warn("isjunk not NOT implemented, it will be ignored") 14 | self._str1, self._str2 = seq1, seq2 15 | self._reset_cache() 16 | 17 | def set_seqs(self, seq1, seq2): 18 | self._str1, self._str2 = seq1, seq2 19 | self._reset_cache() 20 | 21 | def set_seq1(self, seq1): 22 | self._str1 = seq1 23 | self._reset_cache() 24 | 25 | def set_seq2(self, seq2): 26 | self._str2 = seq2 27 | self._reset_cache() 28 | 29 | def get_opcodes(self): 30 | if not self._opcodes: 31 | if self._editops: 32 | self._opcodes = opcodes(self._editops, self._str1, self._str2) 33 | else: 34 | self._opcodes = opcodes(self._str1, self._str2) 35 | return self._opcodes 36 | 37 | def get_editops(self): 38 | if not self._editops: 39 | if self._opcodes: 40 | self._editops = editops(self._opcodes, self._str1, self._str2) 41 | else: 42 | self._editops = editops(self._str1, self._str2) 43 | return self._editops 44 | 45 | def get_matching_blocks(self): 46 | if not self._matching_blocks: 47 | self._matching_blocks = matching_blocks(self.get_opcodes(), 48 | self._str1, self._str2) 49 | return self._matching_blocks 50 | 51 | def ratio(self): 52 | if not self._ratio: 53 | self._ratio = ratio(self._str1, self._str2) 54 | return self._ratio 55 | 56 | def quick_ratio(self): 57 | # This is usually quick enough :o) 58 | if not self._ratio: 59 | self._ratio = ratio(self._str1, self._str2) 60 | return self._ratio 61 | 62 | def real_quick_ratio(self): 63 | len1, len2 = len(self._str1), len(self._str2) 64 | return 2.0 * min(len1, len2) / (len1 + len2) 65 | 66 | def distance(self): 67 | if not self._distance: 68 | self._distance = distance(self._str1, self._str2) 69 | return self._distance 70 | -------------------------------------------------------------------------------- /fuzzywuzzy/StringMatcher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 4 | StringMatcher.py 5 | 6 | ported from python-Levenshtein 7 | [https://github.com/miohtama/python-Levenshtein] 8 | License available here: https://github.com/miohtama/python-Levenshtein/blob/master/COPYING 9 | """ 10 | 11 | from Levenshtein import * 12 | from warnings import warn 13 | 14 | 15 | class StringMatcher: 16 | """A SequenceMatcher-like class built on the top of Levenshtein""" 17 | 18 | def _reset_cache(self): 19 | self._ratio = self._distance = None 20 | self._opcodes = self._editops = self._matching_blocks = None 21 | 22 | def __init__(self, isjunk=None, seq1='', seq2=''): 23 | if isjunk: 24 | warn("isjunk not NOT implemented, it will be ignored") 25 | self._str1, self._str2 = seq1, seq2 26 | self._reset_cache() 27 | 28 | def set_seqs(self, seq1, seq2): 29 | self._str1, self._str2 = seq1, seq2 30 | self._reset_cache() 31 | 32 | def set_seq1(self, seq1): 33 | self._str1 = seq1 34 | self._reset_cache() 35 | 36 | def set_seq2(self, seq2): 37 | self._str2 = seq2 38 | self._reset_cache() 39 | 40 | def get_opcodes(self): 41 | if not self._opcodes: 42 | if self._editops: 43 | self._opcodes = opcodes(self._editops, self._str1, self._str2) 44 | else: 45 | self._opcodes = opcodes(self._str1, self._str2) 46 | return self._opcodes 47 | 48 | def get_editops(self): 49 | if not self._editops: 50 | if self._opcodes: 51 | self._editops = editops(self._opcodes, self._str1, self._str2) 52 | else: 53 | self._editops = editops(self._str1, self._str2) 54 | return self._editops 55 | 56 | def get_matching_blocks(self): 57 | if not self._matching_blocks: 58 | self._matching_blocks = matching_blocks(self.get_opcodes(), 59 | self._str1, self._str2) 60 | return self._matching_blocks 61 | 62 | def ratio(self): 63 | if not self._ratio: 64 | self._ratio = ratio(self._str1, self._str2) 65 | return self._ratio 66 | 67 | def quick_ratio(self): 68 | # This is usually quick enough :o) 69 | if not self._ratio: 70 | self._ratio = ratio(self._str1, self._str2) 71 | return self._ratio 72 | 73 | def real_quick_ratio(self): 74 | len1, len2 = len(self._str1), len(self._str2) 75 | return 2.0 * min(len1, len2) / (len1 + len2) 76 | 77 | def distance(self): 78 | if not self._distance: 79 | self._distance = distance(self._str1, self._str2) 80 | return self._distance 81 | -------------------------------------------------------------------------------- /urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import binascii 3 | import codecs 4 | import os 5 | 6 | from io import BytesIO 7 | 8 | from .packages import six 9 | from .packages.six import b 10 | from .fields import RequestField 11 | 12 | writer = codecs.lookup("utf-8")[3] 13 | 14 | 15 | def choose_boundary(): 16 | """ 17 | Our embarrassingly-simple replacement for mimetools.choose_boundary. 18 | """ 19 | boundary = binascii.hexlify(os.urandom(16)) 20 | if not six.PY2: 21 | boundary = boundary.decode("ascii") 22 | return boundary 23 | 24 | 25 | def iter_field_objects(fields): 26 | """ 27 | Iterate over fields. 28 | 29 | Supports list of (k, v) tuples and dicts, and lists of 30 | :class:`~urllib3.fields.RequestField`. 31 | 32 | """ 33 | if isinstance(fields, dict): 34 | i = six.iteritems(fields) 35 | else: 36 | i = iter(fields) 37 | 38 | for field in i: 39 | if isinstance(field, RequestField): 40 | yield field 41 | else: 42 | yield RequestField.from_tuples(*field) 43 | 44 | 45 | def iter_fields(fields): 46 | """ 47 | .. deprecated:: 1.6 48 | 49 | Iterate over fields. 50 | 51 | The addition of :class:`~urllib3.fields.RequestField` makes this function 52 | obsolete. Instead, use :func:`iter_field_objects`, which returns 53 | :class:`~urllib3.fields.RequestField` objects. 54 | 55 | Supports list of (k, v) tuples and dicts. 56 | """ 57 | if isinstance(fields, dict): 58 | return ((k, v) for k, v in six.iteritems(fields)) 59 | 60 | return ((k, v) for k, v in fields) 61 | 62 | 63 | def encode_multipart_formdata(fields, boundary=None): 64 | """ 65 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 66 | 67 | :param fields: 68 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 69 | 70 | :param boundary: 71 | If not specified, then a random boundary will be generated using 72 | :func:`urllib3.filepost.choose_boundary`. 73 | """ 74 | body = BytesIO() 75 | if boundary is None: 76 | boundary = choose_boundary() 77 | 78 | for field in iter_field_objects(fields): 79 | body.write(b("--%s\r\n" % (boundary))) 80 | 81 | writer(body).write(field.render_headers()) 82 | data = field.data 83 | 84 | if isinstance(data, int): 85 | data = str(data) # Backwards compatibility 86 | 87 | if isinstance(data, six.text_type): 88 | writer(body).write(data) 89 | else: 90 | body.write(data) 91 | 92 | body.write(b"\r\n") 93 | 94 | body.write(b("--%s--\r\n" % (boundary))) 95 | 96 | content_type = str("multipart/form-data; boundary=%s" % boundary) 97 | 98 | return body.getvalue(), content_type 99 | -------------------------------------------------------------------------------- /urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..packages.six.moves import http_client as httplib 3 | 4 | from ..exceptions import HeaderParsingError 5 | 6 | 7 | def is_fp_closed(obj): 8 | """ 9 | Checks whether a given file-like object is closed. 10 | 11 | :param obj: 12 | The file-like object to check. 13 | """ 14 | 15 | try: 16 | # Check `isclosed()` first, in case Python3 doesn't set `closed`. 17 | # GH Issue #928 18 | return obj.isclosed() 19 | except AttributeError: 20 | pass 21 | 22 | try: 23 | # Check via the official file-like-object way. 24 | return obj.closed 25 | except AttributeError: 26 | pass 27 | 28 | try: 29 | # Check if the object is a container for another file-like object that 30 | # gets released on exhaustion (e.g. HTTPResponse). 31 | return obj.fp is None 32 | except AttributeError: 33 | pass 34 | 35 | raise ValueError("Unable to determine whether fp is closed.") 36 | 37 | 38 | def assert_header_parsing(headers): 39 | """ 40 | Asserts whether all headers have been successfully parsed. 41 | Extracts encountered errors from the result of parsing headers. 42 | 43 | Only works on Python 3. 44 | 45 | :param headers: Headers to verify. 46 | :type headers: `httplib.HTTPMessage`. 47 | 48 | :raises urllib3.exceptions.HeaderParsingError: 49 | If parsing errors are found. 50 | """ 51 | 52 | # This will fail silently if we pass in the wrong kind of parameter. 53 | # To make debugging easier add an explicit check. 54 | if not isinstance(headers, httplib.HTTPMessage): 55 | raise TypeError("expected httplib.Message, got {0}.".format(type(headers))) 56 | 57 | defects = getattr(headers, "defects", None) 58 | get_payload = getattr(headers, "get_payload", None) 59 | 60 | unparsed_data = None 61 | if get_payload: 62 | # get_payload is actually email.message.Message.get_payload; 63 | # we're only interested in the result if it's not a multipart message 64 | if not headers.is_multipart(): 65 | payload = get_payload() 66 | 67 | if isinstance(payload, (bytes, str)): 68 | unparsed_data = payload 69 | 70 | if defects or unparsed_data: 71 | raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) 72 | 73 | 74 | def is_response_to_head(response): 75 | """ 76 | Checks whether the request of a response has been a HEAD-request. 77 | Handles the quirks of AppEngine. 78 | 79 | :param conn: 80 | :type conn: :class:`httplib.HTTPResponse` 81 | """ 82 | # FIXME: Can we do this somehow without accessing private httplib _method? 83 | method = response._method 84 | if isinstance(method, int): # Platform-specific: Appengine 85 | return method == 3 86 | return method.upper() == "HEAD" 87 | -------------------------------------------------------------------------------- /fuzzywuzzy/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import sys 3 | import functools 4 | 5 | from fuzzywuzzy.string_processing import StringProcessor 6 | 7 | 8 | PY3 = sys.version_info[0] == 3 9 | 10 | 11 | def validate_string(s): 12 | """ 13 | Check input has length and that length > 0 14 | 15 | :param s: 16 | :return: True if len(s) > 0 else False 17 | """ 18 | try: 19 | return len(s) > 0 20 | except TypeError: 21 | return False 22 | 23 | 24 | def check_for_none(func): 25 | @functools.wraps(func) 26 | def decorator(*args, **kwargs): 27 | if args[0] is None or args[1] is None: 28 | return 0 29 | return func(*args, **kwargs) 30 | return decorator 31 | 32 | 33 | def check_empty_string(func): 34 | @functools.wraps(func) 35 | def decorator(*args, **kwargs): 36 | if len(args[0]) == 0 or len(args[1]) == 0: 37 | return 0 38 | return func(*args, **kwargs) 39 | return decorator 40 | 41 | 42 | bad_chars = str("").join([chr(i) for i in range(128, 256)]) # ascii dammit! 43 | if PY3: 44 | translation_table = dict((ord(c), None) for c in bad_chars) 45 | unicode = str 46 | 47 | 48 | def asciionly(s): 49 | if PY3: 50 | return s.translate(translation_table) 51 | else: 52 | return s.translate(None, bad_chars) 53 | 54 | 55 | def asciidammit(s): 56 | if type(s) is str: 57 | return asciionly(s) 58 | elif type(s) is unicode: 59 | return asciionly(s.encode('ascii', 'ignore')) 60 | else: 61 | return asciidammit(unicode(s)) 62 | 63 | 64 | def make_type_consistent(s1, s2): 65 | """If both objects aren't either both string or unicode instances force them to unicode""" 66 | if isinstance(s1, str) and isinstance(s2, str): 67 | return s1, s2 68 | 69 | elif isinstance(s1, unicode) and isinstance(s2, unicode): 70 | return s1, s2 71 | 72 | else: 73 | return unicode(s1), unicode(s2) 74 | 75 | 76 | def full_process(s, force_ascii=False): 77 | """Process string by 78 | -- removing all but letters and numbers 79 | -- trim whitespace 80 | -- force to lower case 81 | if force_ascii == True, force convert to ascii""" 82 | 83 | if s is None: 84 | return "" 85 | 86 | if force_ascii: 87 | s = asciidammit(s) 88 | # Keep only Letters and Numbers (see Unicode docs). 89 | string_out = StringProcessor.replace_non_letters_non_numbers_with_whitespace(s) 90 | # Force into lowercase. 91 | string_out = StringProcessor.to_lower_case(string_out) 92 | # Remove leading and trailing whitespaces. 93 | string_out = StringProcessor.strip(string_out) 94 | return string_out 95 | 96 | 97 | def intr(n): 98 | '''Returns a correctly rounded integer''' 99 | return int(round(n)) 100 | -------------------------------------------------------------------------------- /urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | from __future__ import absolute_import 5 | import warnings 6 | 7 | from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url 8 | 9 | from . import exceptions 10 | from .filepost import encode_multipart_formdata 11 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 12 | from .response import HTTPResponse 13 | from .util.request import make_headers 14 | from .util.url import get_host 15 | from .util.timeout import Timeout 16 | from .util.retry import Retry 17 | 18 | 19 | # Set default logging handler to avoid "No handler found" warnings. 20 | import logging 21 | from logging import NullHandler 22 | 23 | __author__ = "Andrey Petrov (andrey.petrov@shazow.net)" 24 | __license__ = "MIT" 25 | __version__ = "1.25.9" 26 | 27 | __all__ = ( 28 | "HTTPConnectionPool", 29 | "HTTPSConnectionPool", 30 | "PoolManager", 31 | "ProxyManager", 32 | "HTTPResponse", 33 | "Retry", 34 | "Timeout", 35 | "add_stderr_logger", 36 | "connection_from_url", 37 | "disable_warnings", 38 | "encode_multipart_formdata", 39 | "get_host", 40 | "make_headers", 41 | "proxy_from_url", 42 | ) 43 | 44 | logging.getLogger(__name__).addHandler(NullHandler()) 45 | 46 | 47 | def add_stderr_logger(level=logging.DEBUG): 48 | """ 49 | Helper for quickly adding a StreamHandler to the logger. Useful for 50 | debugging. 51 | 52 | Returns the handler after adding it. 53 | """ 54 | # This method needs to be in this __init__.py to get the __name__ correct 55 | # even if urllib3 is vendored within another package. 56 | logger = logging.getLogger(__name__) 57 | handler = logging.StreamHandler() 58 | handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s")) 59 | logger.addHandler(handler) 60 | logger.setLevel(level) 61 | logger.debug("Added a stderr logging handler to logger: %s", __name__) 62 | return handler 63 | 64 | 65 | # ... Clean up. 66 | del NullHandler 67 | 68 | 69 | # All warning filters *must* be appended unless you're really certain that they 70 | # shouldn't be: otherwise, it's very hard for users to use most Python 71 | # mechanisms to silence them. 72 | # SecurityWarning's always go off by default. 73 | warnings.simplefilter("always", exceptions.SecurityWarning, append=True) 74 | # SubjectAltNameWarning's should go off once per host 75 | warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True) 76 | # InsecurePlatformWarning's don't vary between requests, so we keep it default. 77 | warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True) 78 | # SNIMissingWarnings should go off only once. 79 | warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True) 80 | 81 | 82 | def disable_warnings(category=exceptions.HTTPWarning): 83 | """ 84 | Helper for quickly disabling all urllib3 warnings. 85 | """ 86 | warnings.simplefilter("ignore", category) 87 | -------------------------------------------------------------------------------- /chardet/cli/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | 21 | from chardet import __version__ 22 | from chardet.compat import PY2 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | line = bytearray(line) 39 | u.feed(line) 40 | # shortcut out of the loop to save reading further - particularly useful if we read a BOM. 41 | if u.done: 42 | break 43 | u.close() 44 | result = u.result 45 | if PY2: 46 | name = name.decode(sys.getfilesystemencoding(), 'ignore') 47 | if result['encoding']: 48 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 49 | result['confidence']) 50 | else: 51 | return '{0}: no result'.format(name) 52 | 53 | 54 | def main(argv=None): 55 | """ 56 | Handles command line arguments and gets things started. 57 | 58 | :param argv: List of arguments, as if specified on the command-line. 59 | If None, ``sys.argv[1:]`` is used instead. 60 | :type argv: list of str 61 | """ 62 | # Get command line arguments 63 | parser = argparse.ArgumentParser( 64 | description="Takes one or more file paths and reports their detected \ 65 | encodings") 66 | parser.add_argument('input', 67 | help='File whose encoding we would like to determine. \ 68 | (default: stdin)', 69 | type=argparse.FileType('rb'), nargs='*', 70 | default=[sys.stdin if PY2 else sys.stdin.buffer]) 71 | parser.add_argument('--version', action='version', 72 | version='%(prog)s {0}'.format(__version__)) 73 | args = parser.parse_args(argv) 74 | 75 | for f in args.input: 76 | if f.isatty(): 77 | print("You are running chardetect interactively. Press " + 78 | "CTRL-D twice at the start of a blank line to signal the " + 79 | "end of your input. If you want help, run chardetect " + 80 | "--help\n", file=sys.stderr) 81 | print(description_of(f, f.name)) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .enums import ProbingState, MachineState 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8_SM_MODEL 32 | 33 | 34 | 35 | class UTF8Prober(CharSetProber): 36 | ONE_CHAR_PROB = 0.5 37 | 38 | def __init__(self): 39 | super(UTF8Prober, self).__init__() 40 | self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) 41 | self._num_mb_chars = None 42 | self.reset() 43 | 44 | def reset(self): 45 | super(UTF8Prober, self).reset() 46 | self.coding_sm.reset() 47 | self._num_mb_chars = 0 48 | 49 | @property 50 | def charset_name(self): 51 | return "utf-8" 52 | 53 | @property 54 | def language(self): 55 | return "" 56 | 57 | def feed(self, byte_str): 58 | for c in byte_str: 59 | coding_state = self.coding_sm.next_state(c) 60 | if coding_state == MachineState.ERROR: 61 | self._state = ProbingState.NOT_ME 62 | break 63 | elif coding_state == MachineState.ITS_ME: 64 | self._state = ProbingState.FOUND_IT 65 | break 66 | elif coding_state == MachineState.START: 67 | if self.coding_sm.get_current_charlen() >= 2: 68 | self._num_mb_chars += 1 69 | 70 | if self.state == ProbingState.DETECTING: 71 | if self.get_confidence() > self.SHORTCUT_THRESHOLD: 72 | self._state = ProbingState.FOUND_IT 73 | 74 | return self.state 75 | 76 | def get_confidence(self): 77 | unlike = 0.99 78 | if self._num_mb_chars < 6: 79 | unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars 80 | return 1.0 - unlike 81 | else: 82 | return unlike 83 | -------------------------------------------------------------------------------- /requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | """ 9 | 10 | from collections import OrderedDict 11 | 12 | from .compat import Mapping, MutableMapping 13 | 14 | 15 | class CaseInsensitiveDict(MutableMapping): 16 | """A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | """ 41 | 42 | def __init__(self, data=None, **kwargs): 43 | self._store = OrderedDict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | 89 | class LookupDict(dict): 90 | """Dictionary lookup object.""" 91 | 92 | def __init__(self, name=None): 93 | self.name = name 94 | super(LookupDict, self).__init__() 95 | 96 | def __repr__(self): 97 | return '' % (self.name) 98 | 99 | def __getitem__(self, key): 100 | # We allow fall-through here, so values default to None 101 | 102 | return self.__dict__.get(key, None) 103 | 104 | def get(self, key, default=None): 105 | return self.__dict__.get(key, default) 106 | -------------------------------------------------------------------------------- /chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetprober import CharSetProber 31 | from .enums import ProbingState, MachineState 32 | 33 | 34 | class MultiByteCharSetProber(CharSetProber): 35 | """ 36 | MultiByteCharSetProber 37 | """ 38 | 39 | def __init__(self, lang_filter=None): 40 | super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) 41 | self.distribution_analyzer = None 42 | self.coding_sm = None 43 | self._last_char = [0, 0] 44 | 45 | def reset(self): 46 | super(MultiByteCharSetProber, self).reset() 47 | if self.coding_sm: 48 | self.coding_sm.reset() 49 | if self.distribution_analyzer: 50 | self.distribution_analyzer.reset() 51 | self._last_char = [0, 0] 52 | 53 | @property 54 | def charset_name(self): 55 | raise NotImplementedError 56 | 57 | @property 58 | def language(self): 59 | raise NotImplementedError 60 | 61 | def feed(self, byte_str): 62 | for i in range(len(byte_str)): 63 | coding_state = self.coding_sm.next_state(byte_str[i]) 64 | if coding_state == MachineState.ERROR: 65 | self.logger.debug('%s %s prober hit error at byte %s', 66 | self.charset_name, self.language, i) 67 | self._state = ProbingState.NOT_ME 68 | break 69 | elif coding_state == MachineState.ITS_ME: 70 | self._state = ProbingState.FOUND_IT 71 | break 72 | elif coding_state == MachineState.START: 73 | char_len = self.coding_sm.get_current_charlen() 74 | if i == 0: 75 | self._last_char[1] = byte_str[0] 76 | self.distribution_analyzer.feed(self._last_char, char_len) 77 | else: 78 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 79 | char_len) 80 | 81 | self._last_char[0] = byte_str[-1] 82 | 83 | if self.state == ProbingState.DETECTING: 84 | if (self.distribution_analyzer.got_enough_data() and 85 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 86 | self._state = ProbingState.FOUND_IT 87 | 88 | return self.state 89 | 90 | def get_confidence(self): 91 | return self.distribution_analyzer.get_confidence() 92 | -------------------------------------------------------------------------------- /chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | from .langturkishmodel import Latin5TurkishModel 41 | 42 | 43 | class SBCSGroupProber(CharSetGroupProber): 44 | def __init__(self): 45 | super(SBCSGroupProber, self).__init__() 46 | self.probers = [ 47 | SingleByteCharSetProber(Win1251CyrillicModel), 48 | SingleByteCharSetProber(Koi8rModel), 49 | SingleByteCharSetProber(Latin5CyrillicModel), 50 | SingleByteCharSetProber(MacCyrillicModel), 51 | SingleByteCharSetProber(Ibm866Model), 52 | SingleByteCharSetProber(Ibm855Model), 53 | SingleByteCharSetProber(Latin7GreekModel), 54 | SingleByteCharSetProber(Win1253GreekModel), 55 | SingleByteCharSetProber(Latin5BulgarianModel), 56 | SingleByteCharSetProber(Win1251BulgarianModel), 57 | # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) 58 | # after we retrain model. 59 | # SingleByteCharSetProber(Latin2HungarianModel), 60 | # SingleByteCharSetProber(Win1250HungarianModel), 61 | SingleByteCharSetProber(TIS620ThaiModel), 62 | SingleByteCharSetProber(Latin5TurkishModel), 63 | ] 64 | hebrew_prober = HebrewProber() 65 | logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, 66 | False, hebrew_prober) 67 | visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, 68 | hebrew_prober) 69 | hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) 70 | self.probers.extend([hebrew_prober, logical_hebrew_prober, 71 | visual_hebrew_prober]) 72 | 73 | self.reset() 74 | -------------------------------------------------------------------------------- /requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | """ 9 | from urllib3.exceptions import HTTPError as BaseHTTPError 10 | 11 | 12 | class RequestException(IOError): 13 | """There was an ambiguous exception that occurred while handling your 14 | request. 15 | """ 16 | 17 | def __init__(self, *args, **kwargs): 18 | """Initialize RequestException with `request` and `response` objects.""" 19 | response = kwargs.pop('response', None) 20 | self.response = response 21 | self.request = kwargs.pop('request', None) 22 | if (response is not None and not self.request and 23 | hasattr(response, 'request')): 24 | self.request = self.response.request 25 | super(RequestException, self).__init__(*args, **kwargs) 26 | 27 | 28 | class HTTPError(RequestException): 29 | """An HTTP error occurred.""" 30 | 31 | 32 | class ConnectionError(RequestException): 33 | """A Connection error occurred.""" 34 | 35 | 36 | class ProxyError(ConnectionError): 37 | """A proxy error occurred.""" 38 | 39 | 40 | class SSLError(ConnectionError): 41 | """An SSL error occurred.""" 42 | 43 | 44 | class Timeout(RequestException): 45 | """The request timed out. 46 | 47 | Catching this error will catch both 48 | :exc:`~requests.exceptions.ConnectTimeout` and 49 | :exc:`~requests.exceptions.ReadTimeout` errors. 50 | """ 51 | 52 | 53 | class ConnectTimeout(ConnectionError, Timeout): 54 | """The request timed out while trying to connect to the remote server. 55 | 56 | Requests that produced this error are safe to retry. 57 | """ 58 | 59 | 60 | class ReadTimeout(Timeout): 61 | """The server did not send any data in the allotted amount of time.""" 62 | 63 | 64 | class URLRequired(RequestException): 65 | """A valid URL is required to make a request.""" 66 | 67 | 68 | class TooManyRedirects(RequestException): 69 | """Too many redirects.""" 70 | 71 | 72 | class MissingSchema(RequestException, ValueError): 73 | """The URL schema (e.g. http or https) is missing.""" 74 | 75 | 76 | class InvalidSchema(RequestException, ValueError): 77 | """See defaults.py for valid schemas.""" 78 | 79 | 80 | class InvalidURL(RequestException, ValueError): 81 | """The URL provided was somehow invalid.""" 82 | 83 | 84 | class InvalidHeader(RequestException, ValueError): 85 | """The header value provided was somehow invalid.""" 86 | 87 | 88 | class InvalidProxyURL(InvalidURL): 89 | """The proxy URL provided is invalid.""" 90 | 91 | 92 | class ChunkedEncodingError(RequestException): 93 | """The server declared chunked encoding but sent an invalid chunk.""" 94 | 95 | 96 | class ContentDecodingError(RequestException, BaseHTTPError): 97 | """Failed to decode response content""" 98 | 99 | 100 | class StreamConsumedError(RequestException, TypeError): 101 | """The content for this response was already consumed""" 102 | 103 | 104 | class RetryError(RequestException): 105 | """Custom retries logic failed""" 106 | 107 | 108 | class UnrewindableBodyError(RequestException): 109 | """Requests encountered an error when trying to rewind a body""" 110 | 111 | # Warnings 112 | 113 | 114 | class RequestsWarning(Warning): 115 | """Base warning for Requests.""" 116 | pass 117 | 118 | 119 | class FileModeWarning(RequestsWarning, DeprecationWarning): 120 | """A file was opened in text mode, but Requests determined its binary length.""" 121 | pass 122 | 123 | 124 | class RequestsDependencyWarning(RequestsWarning): 125 | """An imported dependency doesn't match the expected version range.""" 126 | pass 127 | -------------------------------------------------------------------------------- /idna/codec.py: -------------------------------------------------------------------------------- 1 | from .core import encode, decode, alabel, ulabel, IDNAError 2 | import codecs 3 | import re 4 | 5 | _unicode_dots_re = re.compile(u'[\u002e\u3002\uff0e\uff61]') 6 | 7 | class Codec(codecs.Codec): 8 | 9 | def encode(self, data, errors='strict'): 10 | 11 | if errors != 'strict': 12 | raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) 13 | 14 | if not data: 15 | return "", 0 16 | 17 | return encode(data), len(data) 18 | 19 | def decode(self, data, errors='strict'): 20 | 21 | if errors != 'strict': 22 | raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) 23 | 24 | if not data: 25 | return u"", 0 26 | 27 | return decode(data), len(data) 28 | 29 | class IncrementalEncoder(codecs.BufferedIncrementalEncoder): 30 | def _buffer_encode(self, data, errors, final): 31 | if errors != 'strict': 32 | raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) 33 | 34 | if not data: 35 | return ("", 0) 36 | 37 | labels = _unicode_dots_re.split(data) 38 | trailing_dot = u'' 39 | if labels: 40 | if not labels[-1]: 41 | trailing_dot = '.' 42 | del labels[-1] 43 | elif not final: 44 | # Keep potentially unfinished label until the next call 45 | del labels[-1] 46 | if labels: 47 | trailing_dot = '.' 48 | 49 | result = [] 50 | size = 0 51 | for label in labels: 52 | result.append(alabel(label)) 53 | if size: 54 | size += 1 55 | size += len(label) 56 | 57 | # Join with U+002E 58 | result = ".".join(result) + trailing_dot 59 | size += len(trailing_dot) 60 | return (result, size) 61 | 62 | class IncrementalDecoder(codecs.BufferedIncrementalDecoder): 63 | def _buffer_decode(self, data, errors, final): 64 | if errors != 'strict': 65 | raise IDNAError("Unsupported error handling \"{0}\"".format(errors)) 66 | 67 | if not data: 68 | return (u"", 0) 69 | 70 | # IDNA allows decoding to operate on Unicode strings, too. 71 | if isinstance(data, unicode): 72 | labels = _unicode_dots_re.split(data) 73 | else: 74 | # Must be ASCII string 75 | data = str(data) 76 | unicode(data, "ascii") 77 | labels = data.split(".") 78 | 79 | trailing_dot = u'' 80 | if labels: 81 | if not labels[-1]: 82 | trailing_dot = u'.' 83 | del labels[-1] 84 | elif not final: 85 | # Keep potentially unfinished label until the next call 86 | del labels[-1] 87 | if labels: 88 | trailing_dot = u'.' 89 | 90 | result = [] 91 | size = 0 92 | for label in labels: 93 | result.append(ulabel(label)) 94 | if size: 95 | size += 1 96 | size += len(label) 97 | 98 | result = u".".join(result) + trailing_dot 99 | size += len(trailing_dot) 100 | return (result, size) 101 | 102 | 103 | class StreamWriter(Codec, codecs.StreamWriter): 104 | pass 105 | 106 | class StreamReader(Codec, codecs.StreamReader): 107 | pass 108 | 109 | def getregentry(): 110 | return codecs.CodecInfo( 111 | name='idna', 112 | encode=Codec().encode, 113 | decode=Codec().decode, 114 | incrementalencoder=IncrementalEncoder, 115 | incrementaldecoder=IncrementalDecoder, 116 | streamwriter=StreamWriter, 117 | streamreader=StreamReader, 118 | ) 119 | -------------------------------------------------------------------------------- /chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import logging 29 | 30 | from .enums import MachineState 31 | 32 | 33 | class CodingStateMachine(object): 34 | """ 35 | A state machine to verify a byte sequence for a particular encoding. For 36 | each byte the detector receives, it will feed that byte to every active 37 | state machine available, one byte at a time. The state machine changes its 38 | state based on its previous state and the byte it receives. There are 3 39 | states in a state machine that are of interest to an auto-detector: 40 | 41 | START state: This is the state to start with, or a legal byte sequence 42 | (i.e. a valid code point) for character has been identified. 43 | 44 | ME state: This indicates that the state machine identified a byte sequence 45 | that is specific to the charset it is designed for and that 46 | there is no other possible encoding which can contain this byte 47 | sequence. This will to lead to an immediate positive answer for 48 | the detector. 49 | 50 | ERROR state: This indicates the state machine identified an illegal byte 51 | sequence for that encoding. This will lead to an immediate 52 | negative answer for this encoding. Detector will exclude this 53 | encoding from consideration from here on. 54 | """ 55 | def __init__(self, sm): 56 | self._model = sm 57 | self._curr_byte_pos = 0 58 | self._curr_char_len = 0 59 | self._curr_state = None 60 | self.logger = logging.getLogger(__name__) 61 | self.reset() 62 | 63 | def reset(self): 64 | self._curr_state = MachineState.START 65 | 66 | def next_state(self, c): 67 | # for each byte we get its class 68 | # if it is first byte, we also get byte length 69 | byte_class = self._model['class_table'][c] 70 | if self._curr_state == MachineState.START: 71 | self._curr_byte_pos = 0 72 | self._curr_char_len = self._model['char_len_table'][byte_class] 73 | # from byte's class and state_table, we get its next state 74 | curr_state = (self._curr_state * self._model['class_factor'] 75 | + byte_class) 76 | self._curr_state = self._model['state_table'][curr_state] 77 | self._curr_byte_pos += 1 78 | return self._curr_state 79 | 80 | def get_current_charlen(self): 81 | return self._curr_char_len 82 | 83 | def get_coding_state_machine(self): 84 | return self._model['name'] 85 | 86 | @property 87 | def language(self): 88 | return self._model['language'] 89 | -------------------------------------------------------------------------------- /chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState, MachineState 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import EUCJPDistributionAnalysis 32 | from .jpcntx import EUCJPContextAnalysis 33 | from .mbcssm import EUCJP_SM_MODEL 34 | 35 | 36 | class EUCJPProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(EUCJPProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) 40 | self.distribution_analyzer = EUCJPDistributionAnalysis() 41 | self.context_analyzer = EUCJPContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(EUCJPProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return "EUC-JP" 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte 59 | coding_state = self.coding_sm.next_state(byte_str[i]) 60 | if coding_state == MachineState.ERROR: 61 | self.logger.debug('%s %s prober hit error at byte %s', 62 | self.charset_name, self.language, i) 63 | self._state = ProbingState.NOT_ME 64 | break 65 | elif coding_state == MachineState.ITS_ME: 66 | self._state = ProbingState.FOUND_IT 67 | break 68 | elif coding_state == MachineState.START: 69 | char_len = self.coding_sm.get_current_charlen() 70 | if i == 0: 71 | self._last_char[1] = byte_str[0] 72 | self.context_analyzer.feed(self._last_char, char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i - 1:i + 1], 76 | char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /requests/help.py: -------------------------------------------------------------------------------- 1 | """Module containing bug report helper(s).""" 2 | from __future__ import print_function 3 | 4 | import json 5 | import platform 6 | import sys 7 | import ssl 8 | 9 | import idna 10 | import urllib3 11 | import chardet 12 | 13 | from . import __version__ as requests_version 14 | 15 | try: 16 | from urllib3.contrib import pyopenssl 17 | except ImportError: 18 | pyopenssl = None 19 | OpenSSL = None 20 | cryptography = None 21 | else: 22 | import OpenSSL 23 | import cryptography 24 | 25 | 26 | def _implementation(): 27 | """Return a dict with the Python implementation and version. 28 | 29 | Provide both the name and the version of the Python implementation 30 | currently running. For example, on CPython 2.7.5 it will return 31 | {'name': 'CPython', 'version': '2.7.5'}. 32 | 33 | This function works best on CPython and PyPy: in particular, it probably 34 | doesn't work for Jython or IronPython. Future investigation should be done 35 | to work out the correct shape of the code for those platforms. 36 | """ 37 | implementation = platform.python_implementation() 38 | 39 | if implementation == 'CPython': 40 | implementation_version = platform.python_version() 41 | elif implementation == 'PyPy': 42 | implementation_version = '%s.%s.%s' % (sys.pypy_version_info.major, 43 | sys.pypy_version_info.minor, 44 | sys.pypy_version_info.micro) 45 | if sys.pypy_version_info.releaselevel != 'final': 46 | implementation_version = ''.join([ 47 | implementation_version, sys.pypy_version_info.releaselevel 48 | ]) 49 | elif implementation == 'Jython': 50 | implementation_version = platform.python_version() # Complete Guess 51 | elif implementation == 'IronPython': 52 | implementation_version = platform.python_version() # Complete Guess 53 | else: 54 | implementation_version = 'Unknown' 55 | 56 | return {'name': implementation, 'version': implementation_version} 57 | 58 | 59 | def info(): 60 | """Generate information for a bug report.""" 61 | try: 62 | platform_info = { 63 | 'system': platform.system(), 64 | 'release': platform.release(), 65 | } 66 | except IOError: 67 | platform_info = { 68 | 'system': 'Unknown', 69 | 'release': 'Unknown', 70 | } 71 | 72 | implementation_info = _implementation() 73 | urllib3_info = {'version': urllib3.__version__} 74 | chardet_info = {'version': chardet.__version__} 75 | 76 | pyopenssl_info = { 77 | 'version': None, 78 | 'openssl_version': '', 79 | } 80 | if OpenSSL: 81 | pyopenssl_info = { 82 | 'version': OpenSSL.__version__, 83 | 'openssl_version': '%x' % OpenSSL.SSL.OPENSSL_VERSION_NUMBER, 84 | } 85 | cryptography_info = { 86 | 'version': getattr(cryptography, '__version__', ''), 87 | } 88 | idna_info = { 89 | 'version': getattr(idna, '__version__', ''), 90 | } 91 | 92 | system_ssl = ssl.OPENSSL_VERSION_NUMBER 93 | system_ssl_info = { 94 | 'version': '%x' % system_ssl if system_ssl is not None else '' 95 | } 96 | 97 | return { 98 | 'platform': platform_info, 99 | 'implementation': implementation_info, 100 | 'system_ssl': system_ssl_info, 101 | 'using_pyopenssl': pyopenssl is not None, 102 | 'pyOpenSSL': pyopenssl_info, 103 | 'urllib3': urllib3_info, 104 | 'chardet': chardet_info, 105 | 'cryptography': cryptography_info, 106 | 'idna': idna_info, 107 | 'requests': { 108 | 'version': requests_version, 109 | }, 110 | } 111 | 112 | 113 | def main(): 114 | """Pretty-print the bug information as JSON.""" 115 | print(json.dumps(info(), sort_keys=True, indent=2)) 116 | 117 | 118 | if __name__ == '__main__': 119 | main() 120 | -------------------------------------------------------------------------------- /chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import SJISDistributionAnalysis 31 | from .jpcntx import SJISContextAnalysis 32 | from .mbcssm import SJIS_SM_MODEL 33 | from .enums import ProbingState, MachineState 34 | 35 | 36 | class SJISProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(SJISProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) 40 | self.distribution_analyzer = SJISDistributionAnalysis() 41 | self.context_analyzer = SJISContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(SJISProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return self.context_analyzer.charset_name 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | coding_state = self.coding_sm.next_state(byte_str[i]) 59 | if coding_state == MachineState.ERROR: 60 | self.logger.debug('%s %s prober hit error at byte %s', 61 | self.charset_name, self.language, i) 62 | self._state = ProbingState.NOT_ME 63 | break 64 | elif coding_state == MachineState.ITS_ME: 65 | self._state = ProbingState.FOUND_IT 66 | break 67 | elif coding_state == MachineState.START: 68 | char_len = self.coding_sm.get_current_charlen() 69 | if i == 0: 70 | self._last_char[1] = byte_str[0] 71 | self.context_analyzer.feed(self._last_char[2 - char_len:], 72 | char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 76 | - char_len], char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState 29 | from .charsetprober import CharSetProber 30 | 31 | 32 | class CharSetGroupProber(CharSetProber): 33 | def __init__(self, lang_filter=None): 34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) 35 | self._active_num = 0 36 | self.probers = [] 37 | self._best_guess_prober = None 38 | 39 | def reset(self): 40 | super(CharSetGroupProber, self).reset() 41 | self._active_num = 0 42 | for prober in self.probers: 43 | if prober: 44 | prober.reset() 45 | prober.active = True 46 | self._active_num += 1 47 | self._best_guess_prober = None 48 | 49 | @property 50 | def charset_name(self): 51 | if not self._best_guess_prober: 52 | self.get_confidence() 53 | if not self._best_guess_prober: 54 | return None 55 | return self._best_guess_prober.charset_name 56 | 57 | @property 58 | def language(self): 59 | if not self._best_guess_prober: 60 | self.get_confidence() 61 | if not self._best_guess_prober: 62 | return None 63 | return self._best_guess_prober.language 64 | 65 | def feed(self, byte_str): 66 | for prober in self.probers: 67 | if not prober: 68 | continue 69 | if not prober.active: 70 | continue 71 | state = prober.feed(byte_str) 72 | if not state: 73 | continue 74 | if state == ProbingState.FOUND_IT: 75 | self._best_guess_prober = prober 76 | return self.state 77 | elif state == ProbingState.NOT_ME: 78 | prober.active = False 79 | self._active_num -= 1 80 | if self._active_num <= 0: 81 | self._state = ProbingState.NOT_ME 82 | return self.state 83 | return self.state 84 | 85 | def get_confidence(self): 86 | state = self.state 87 | if state == ProbingState.FOUND_IT: 88 | return 0.99 89 | elif state == ProbingState.NOT_ME: 90 | return 0.01 91 | best_conf = 0.0 92 | self._best_guess_prober = None 93 | for prober in self.probers: 94 | if not prober: 95 | continue 96 | if not prober.active: 97 | self.logger.debug('%s not active', prober.charset_name) 98 | continue 99 | conf = prober.get_confidence() 100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) 101 | if best_conf < conf: 102 | best_conf = conf 103 | self._best_guess_prober = prober 104 | if not self._best_guess_prober: 105 | return 0.0 106 | return best_conf 107 | -------------------------------------------------------------------------------- /chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .enums import LanguageFilter, ProbingState, MachineState 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, 32 | ISO2022KR_SM_MODEL) 33 | 34 | 35 | class EscCharSetProber(CharSetProber): 36 | """ 37 | This CharSetProber uses a "code scheme" approach for detecting encodings, 38 | whereby easily recognizable escape or shift sequences are relied on to 39 | identify these encodings. 40 | """ 41 | 42 | def __init__(self, lang_filter=None): 43 | super(EscCharSetProber, self).__init__(lang_filter=lang_filter) 44 | self.coding_sm = [] 45 | if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: 46 | self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) 47 | self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) 48 | if self.lang_filter & LanguageFilter.JAPANESE: 49 | self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) 50 | if self.lang_filter & LanguageFilter.KOREAN: 51 | self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) 52 | self.active_sm_count = None 53 | self._detected_charset = None 54 | self._detected_language = None 55 | self._state = None 56 | self.reset() 57 | 58 | def reset(self): 59 | super(EscCharSetProber, self).reset() 60 | for coding_sm in self.coding_sm: 61 | if not coding_sm: 62 | continue 63 | coding_sm.active = True 64 | coding_sm.reset() 65 | self.active_sm_count = len(self.coding_sm) 66 | self._detected_charset = None 67 | self._detected_language = None 68 | 69 | @property 70 | def charset_name(self): 71 | return self._detected_charset 72 | 73 | @property 74 | def language(self): 75 | return self._detected_language 76 | 77 | def get_confidence(self): 78 | if self._detected_charset: 79 | return 0.99 80 | else: 81 | return 0.00 82 | 83 | def feed(self, byte_str): 84 | for c in byte_str: 85 | for coding_sm in self.coding_sm: 86 | if not coding_sm or not coding_sm.active: 87 | continue 88 | coding_state = coding_sm.next_state(c) 89 | if coding_state == MachineState.ERROR: 90 | coding_sm.active = False 91 | self.active_sm_count -= 1 92 | if self.active_sm_count <= 0: 93 | self._state = ProbingState.NOT_ME 94 | return self.state 95 | elif coding_state == MachineState.ITS_ME: 96 | self._state = ProbingState.FOUND_IT 97 | self._detected_charset = coding_sm.get_coding_state_machine() 98 | self._detected_language = coding_sm.language 99 | return self.state 100 | 101 | return self.state 102 | -------------------------------------------------------------------------------- /urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from base64 import b64encode 3 | 4 | from ..packages.six import b, integer_types 5 | from ..exceptions import UnrewindableBodyError 6 | 7 | ACCEPT_ENCODING = "gzip,deflate" 8 | try: 9 | import brotli as _unused_module_brotli # noqa: F401 10 | except ImportError: 11 | pass 12 | else: 13 | ACCEPT_ENCODING += ",br" 14 | 15 | _FAILEDTELL = object() 16 | 17 | 18 | def make_headers( 19 | keep_alive=None, 20 | accept_encoding=None, 21 | user_agent=None, 22 | basic_auth=None, 23 | proxy_basic_auth=None, 24 | disable_cache=None, 25 | ): 26 | """ 27 | Shortcuts for generating request headers. 28 | 29 | :param keep_alive: 30 | If ``True``, adds 'connection: keep-alive' header. 31 | 32 | :param accept_encoding: 33 | Can be a boolean, list, or string. 34 | ``True`` translates to 'gzip,deflate'. 35 | List will get joined by comma. 36 | String will be used as provided. 37 | 38 | :param user_agent: 39 | String representing the user-agent you want, such as 40 | "python-urllib3/0.6" 41 | 42 | :param basic_auth: 43 | Colon-separated username:password string for 'authorization: basic ...' 44 | auth header. 45 | 46 | :param proxy_basic_auth: 47 | Colon-separated username:password string for 'proxy-authorization: basic ...' 48 | auth header. 49 | 50 | :param disable_cache: 51 | If ``True``, adds 'cache-control: no-cache' header. 52 | 53 | Example:: 54 | 55 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 56 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 57 | >>> make_headers(accept_encoding=True) 58 | {'accept-encoding': 'gzip,deflate'} 59 | """ 60 | headers = {} 61 | if accept_encoding: 62 | if isinstance(accept_encoding, str): 63 | pass 64 | elif isinstance(accept_encoding, list): 65 | accept_encoding = ",".join(accept_encoding) 66 | else: 67 | accept_encoding = ACCEPT_ENCODING 68 | headers["accept-encoding"] = accept_encoding 69 | 70 | if user_agent: 71 | headers["user-agent"] = user_agent 72 | 73 | if keep_alive: 74 | headers["connection"] = "keep-alive" 75 | 76 | if basic_auth: 77 | headers["authorization"] = "Basic " + b64encode(b(basic_auth)).decode("utf-8") 78 | 79 | if proxy_basic_auth: 80 | headers["proxy-authorization"] = "Basic " + b64encode( 81 | b(proxy_basic_auth) 82 | ).decode("utf-8") 83 | 84 | if disable_cache: 85 | headers["cache-control"] = "no-cache" 86 | 87 | return headers 88 | 89 | 90 | def set_file_position(body, pos): 91 | """ 92 | If a position is provided, move file to that point. 93 | Otherwise, we'll attempt to record a position for future use. 94 | """ 95 | if pos is not None: 96 | rewind_body(body, pos) 97 | elif getattr(body, "tell", None) is not None: 98 | try: 99 | pos = body.tell() 100 | except (IOError, OSError): 101 | # This differentiates from None, allowing us to catch 102 | # a failed `tell()` later when trying to rewind the body. 103 | pos = _FAILEDTELL 104 | 105 | return pos 106 | 107 | 108 | def rewind_body(body, body_pos): 109 | """ 110 | Attempt to rewind body to a certain position. 111 | Primarily used for request redirects and retries. 112 | 113 | :param body: 114 | File-like object that supports seek. 115 | 116 | :param int pos: 117 | Position to seek to in file. 118 | """ 119 | body_seek = getattr(body, "seek", None) 120 | if body_seek is not None and isinstance(body_pos, integer_types): 121 | try: 122 | body_seek(body_pos) 123 | except (IOError, OSError): 124 | raise UnrewindableBodyError( 125 | "An error occurred when rewinding request body for redirect/retry." 126 | ) 127 | elif body_pos is _FAILEDTELL: 128 | raise UnrewindableBodyError( 129 | "Unable to record file position for rewinding " 130 | "request body during a redirect/retry." 131 | ) 132 | else: 133 | raise ValueError( 134 | "body_pos must be of type integer, instead it was %s." % type(body_pos) 135 | ) 136 | -------------------------------------------------------------------------------- /requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | Requests HTTP Library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. 13 | Basic GET usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> b'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('https://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key1": "value1", 31 | "key2": "value2" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2017 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | """ 42 | 43 | import urllib3 44 | import chardet 45 | import warnings 46 | from .exceptions import RequestsDependencyWarning 47 | 48 | 49 | def check_compatibility(urllib3_version, chardet_version): 50 | urllib3_version = urllib3_version.split('.') 51 | assert urllib3_version != ['dev'] # Verify urllib3 isn't installed from git. 52 | 53 | # Sometimes, urllib3 only reports its version as 16.1. 54 | if len(urllib3_version) == 2: 55 | urllib3_version.append('0') 56 | 57 | # Check urllib3 for compatibility. 58 | major, minor, patch = urllib3_version # noqa: F811 59 | major, minor, patch = int(major), int(minor), int(patch) 60 | # urllib3 >= 1.21.1, <= 1.25 61 | assert major == 1 62 | assert minor >= 21 63 | assert minor <= 25 64 | 65 | # Check chardet for compatibility. 66 | major, minor, patch = chardet_version.split('.')[:3] 67 | major, minor, patch = int(major), int(minor), int(patch) 68 | # chardet >= 3.0.2, < 3.1.0 69 | assert major == 3 70 | assert minor < 1 71 | assert patch >= 2 72 | 73 | 74 | def _check_cryptography(cryptography_version): 75 | # cryptography < 1.3.4 76 | try: 77 | cryptography_version = list(map(int, cryptography_version.split('.'))) 78 | except ValueError: 79 | return 80 | 81 | if cryptography_version < [1, 3, 4]: 82 | warning = 'Old version of cryptography ({}) may cause slowdown.'.format(cryptography_version) 83 | warnings.warn(warning, RequestsDependencyWarning) 84 | 85 | # Check imported dependencies for compatibility. 86 | try: 87 | check_compatibility(urllib3.__version__, chardet.__version__) 88 | except (AssertionError, ValueError): 89 | warnings.warn("urllib3 ({}) or chardet ({}) doesn't match a supported " 90 | "version!".format(urllib3.__version__, chardet.__version__), 91 | RequestsDependencyWarning) 92 | 93 | # Attempt to enable urllib3's SNI support, if possible 94 | try: 95 | from urllib3.contrib import pyopenssl 96 | pyopenssl.inject_into_urllib3() 97 | 98 | # Check cryptography version 99 | from cryptography import __version__ as cryptography_version 100 | _check_cryptography(cryptography_version) 101 | except ImportError: 102 | pass 103 | 104 | # urllib3's DependencyWarnings should be silenced. 105 | from urllib3.exceptions import DependencyWarning 106 | warnings.simplefilter('ignore', DependencyWarning) 107 | 108 | from .__version__ import __title__, __description__, __url__, __version__ 109 | from .__version__ import __build__, __author__, __author_email__, __license__ 110 | from .__version__ import __copyright__, __cake__ 111 | 112 | from . import utils 113 | from . import packages 114 | from .models import Request, Response, PreparedRequest 115 | from .api import request, get, head, post, patch, put, delete, options 116 | from .sessions import session, Session 117 | from .status_codes import codes 118 | from .exceptions import ( 119 | RequestException, Timeout, URLRequired, 120 | TooManyRedirects, HTTPError, ConnectionError, 121 | FileModeWarning, ConnectTimeout, ReadTimeout 122 | ) 123 | 124 | # Set default logging handler to avoid "No handler found" warnings. 125 | import logging 126 | from logging import NullHandler 127 | 128 | logging.getLogger(__name__).addHandler(NullHandler()) 129 | 130 | # FileModeWarnings go off per the default. 131 | warnings.simplefilter('default', FileModeWarning, append=True) 132 | -------------------------------------------------------------------------------- /urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | from __future__ import absolute_import 7 | 8 | from logging import getLogger 9 | from ntlm import ntlm 10 | 11 | from .. import HTTPSConnectionPool 12 | from ..packages.six.moves.http_client import HTTPSConnection 13 | 14 | 15 | log = getLogger(__name__) 16 | 17 | 18 | class NTLMConnectionPool(HTTPSConnectionPool): 19 | """ 20 | Implements an NTLM authentication version of an urllib3 connection pool 21 | """ 22 | 23 | scheme = "https" 24 | 25 | def __init__(self, user, pw, authurl, *args, **kwargs): 26 | """ 27 | authurl is a random URL on the server that is protected by NTLM. 28 | user is the Windows user, probably in the DOMAIN\\username format. 29 | pw is the password for the user. 30 | """ 31 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 32 | self.authurl = authurl 33 | self.rawuser = user 34 | user_parts = user.split("\\", 1) 35 | self.domain = user_parts[0].upper() 36 | self.user = user_parts[1] 37 | self.pw = pw 38 | 39 | def _new_conn(self): 40 | # Performs the NTLM handshake that secures the connection. The socket 41 | # must be kept open while requests are performed. 42 | self.num_connections += 1 43 | log.debug( 44 | "Starting NTLM HTTPS connection no. %d: https://%s%s", 45 | self.num_connections, 46 | self.host, 47 | self.authurl, 48 | ) 49 | 50 | headers = {"Connection": "Keep-Alive"} 51 | req_header = "Authorization" 52 | resp_header = "www-authenticate" 53 | 54 | conn = HTTPSConnection(host=self.host, port=self.port) 55 | 56 | # Send negotiation message 57 | headers[req_header] = "NTLM %s" % ntlm.create_NTLM_NEGOTIATE_MESSAGE( 58 | self.rawuser 59 | ) 60 | log.debug("Request headers: %s", headers) 61 | conn.request("GET", self.authurl, None, headers) 62 | res = conn.getresponse() 63 | reshdr = dict(res.getheaders()) 64 | log.debug("Response status: %s %s", res.status, res.reason) 65 | log.debug("Response headers: %s", reshdr) 66 | log.debug("Response data: %s [...]", res.read(100)) 67 | 68 | # Remove the reference to the socket, so that it can not be closed by 69 | # the response object (we want to keep the socket open) 70 | res.fp = None 71 | 72 | # Server should respond with a challenge message 73 | auth_header_values = reshdr[resp_header].split(", ") 74 | auth_header_value = None 75 | for s in auth_header_values: 76 | if s[:5] == "NTLM ": 77 | auth_header_value = s[5:] 78 | if auth_header_value is None: 79 | raise Exception( 80 | "Unexpected %s response header: %s" % (resp_header, reshdr[resp_header]) 81 | ) 82 | 83 | # Send authentication message 84 | ServerChallenge, NegotiateFlags = ntlm.parse_NTLM_CHALLENGE_MESSAGE( 85 | auth_header_value 86 | ) 87 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE( 88 | ServerChallenge, self.user, self.domain, self.pw, NegotiateFlags 89 | ) 90 | headers[req_header] = "NTLM %s" % auth_msg 91 | log.debug("Request headers: %s", headers) 92 | conn.request("GET", self.authurl, None, headers) 93 | res = conn.getresponse() 94 | log.debug("Response status: %s %s", res.status, res.reason) 95 | log.debug("Response headers: %s", dict(res.getheaders())) 96 | log.debug("Response data: %s [...]", res.read()[:100]) 97 | if res.status != 200: 98 | if res.status == 401: 99 | raise Exception("Server rejected request: wrong username or password") 100 | raise Exception("Wrong server response: %s %s" % (res.status, res.reason)) 101 | 102 | res.fp = None 103 | log.debug("Connection established") 104 | return conn 105 | 106 | def urlopen( 107 | self, 108 | method, 109 | url, 110 | body=None, 111 | headers=None, 112 | retries=3, 113 | redirect=True, 114 | assert_same_host=True, 115 | ): 116 | if headers is None: 117 | headers = {} 118 | headers["Connection"] = "Keep-Alive" 119 | return super(NTLMConnectionPool, self).urlopen( 120 | method, url, body, headers, retries, redirect, assert_same_host 121 | ) 122 | -------------------------------------------------------------------------------- /requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | r""" 4 | The ``codes`` object defines a mapping from common names for HTTP statuses 5 | to their numerical codes, accessible either as attributes or as dictionary 6 | items. 7 | 8 | Example:: 9 | 10 | >>> import requests 11 | >>> requests.codes['temporary_redirect'] 12 | 307 13 | >>> requests.codes.teapot 14 | 418 15 | >>> requests.codes['\o/'] 16 | 200 17 | 18 | Some codes have multiple names, and both upper- and lower-case versions of 19 | the names are allowed. For example, ``codes.ok``, ``codes.OK``, and 20 | ``codes.okay`` all correspond to the HTTP status code 200. 21 | """ 22 | 23 | from .structures import LookupDict 24 | 25 | _codes = { 26 | 27 | # Informational. 28 | 100: ('continue',), 29 | 101: ('switching_protocols',), 30 | 102: ('processing',), 31 | 103: ('checkpoint',), 32 | 122: ('uri_too_long', 'request_uri_too_long'), 33 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 34 | 201: ('created',), 35 | 202: ('accepted',), 36 | 203: ('non_authoritative_info', 'non_authoritative_information'), 37 | 204: ('no_content',), 38 | 205: ('reset_content', 'reset'), 39 | 206: ('partial_content', 'partial'), 40 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 41 | 208: ('already_reported',), 42 | 226: ('im_used',), 43 | 44 | # Redirection. 45 | 300: ('multiple_choices',), 46 | 301: ('moved_permanently', 'moved', '\\o-'), 47 | 302: ('found',), 48 | 303: ('see_other', 'other'), 49 | 304: ('not_modified',), 50 | 305: ('use_proxy',), 51 | 306: ('switch_proxy',), 52 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 53 | 308: ('permanent_redirect', 54 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 55 | 56 | # Client Error. 57 | 400: ('bad_request', 'bad'), 58 | 401: ('unauthorized',), 59 | 402: ('payment_required', 'payment'), 60 | 403: ('forbidden',), 61 | 404: ('not_found', '-o-'), 62 | 405: ('method_not_allowed', 'not_allowed'), 63 | 406: ('not_acceptable',), 64 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 65 | 408: ('request_timeout', 'timeout'), 66 | 409: ('conflict',), 67 | 410: ('gone',), 68 | 411: ('length_required',), 69 | 412: ('precondition_failed', 'precondition'), 70 | 413: ('request_entity_too_large',), 71 | 414: ('request_uri_too_large',), 72 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 73 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 74 | 417: ('expectation_failed',), 75 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 76 | 421: ('misdirected_request',), 77 | 422: ('unprocessable_entity', 'unprocessable'), 78 | 423: ('locked',), 79 | 424: ('failed_dependency', 'dependency'), 80 | 425: ('unordered_collection', 'unordered'), 81 | 426: ('upgrade_required', 'upgrade'), 82 | 428: ('precondition_required', 'precondition'), 83 | 429: ('too_many_requests', 'too_many'), 84 | 431: ('header_fields_too_large', 'fields_too_large'), 85 | 444: ('no_response', 'none'), 86 | 449: ('retry_with', 'retry'), 87 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 88 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 89 | 499: ('client_closed_request',), 90 | 91 | # Server Error. 92 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 93 | 501: ('not_implemented',), 94 | 502: ('bad_gateway',), 95 | 503: ('service_unavailable', 'unavailable'), 96 | 504: ('gateway_timeout',), 97 | 505: ('http_version_not_supported', 'http_version'), 98 | 506: ('variant_also_negotiates',), 99 | 507: ('insufficient_storage',), 100 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 101 | 510: ('not_extended',), 102 | 511: ('network_authentication_required', 'network_auth', 'network_authentication'), 103 | } 104 | 105 | codes = LookupDict(name='status_codes') 106 | 107 | def _init(): 108 | for code, titles in _codes.items(): 109 | for title in titles: 110 | setattr(codes, title, code) 111 | if not title.startswith(('\\', '/')): 112 | setattr(codes, title.upper(), code) 113 | 114 | def doc(code): 115 | names = ', '.join('``%s``' % n for n in _codes[code]) 116 | return '* %d: %s' % (code, names) 117 | 118 | global __doc__ 119 | __doc__ = (__doc__ + '\n' + 120 | '\n'.join(doc(code) for code in sorted(_codes)) 121 | if __doc__ is not None else None) 122 | 123 | _init() 124 | -------------------------------------------------------------------------------- /urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import socket 3 | from .wait import NoWayToWaitForSocketError, wait_for_read 4 | from ..contrib import _appengine_environ 5 | 6 | 7 | def is_connection_dropped(conn): # Platform-specific 8 | """ 9 | Returns True if the connection is dropped and should be closed. 10 | 11 | :param conn: 12 | :class:`httplib.HTTPConnection` object. 13 | 14 | Note: For platforms like AppEngine, this will always return ``False`` to 15 | let the platform handle connection recycling transparently for us. 16 | """ 17 | sock = getattr(conn, "sock", False) 18 | if sock is False: # Platform-specific: AppEngine 19 | return False 20 | if sock is None: # Connection already closed (such as by httplib). 21 | return True 22 | try: 23 | # Returns True if readable, which here means it's been dropped 24 | return wait_for_read(sock, timeout=0.0) 25 | except NoWayToWaitForSocketError: # Platform-specific: AppEngine 26 | return False 27 | 28 | 29 | # This function is copied from socket.py in the Python 2.7 standard 30 | # library test suite. Added to its signature is only `socket_options`. 31 | # One additional modification is that we avoid binding to IPv6 servers 32 | # discovered in DNS if the system doesn't have IPv6 functionality. 33 | def create_connection( 34 | address, 35 | timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 36 | source_address=None, 37 | socket_options=None, 38 | ): 39 | """Connect to *address* and return the socket object. 40 | 41 | Convenience function. Connect to *address* (a 2-tuple ``(host, 42 | port)``) and return the socket object. Passing the optional 43 | *timeout* parameter will set the timeout on the socket instance 44 | before attempting to connect. If no *timeout* is supplied, the 45 | global default timeout setting returned by :func:`getdefaulttimeout` 46 | is used. If *source_address* is set it must be a tuple of (host, port) 47 | for the socket to bind as a source address before making the connection. 48 | An host of '' or port 0 tells the OS to use the default. 49 | """ 50 | 51 | host, port = address 52 | if host.startswith("["): 53 | host = host.strip("[]") 54 | err = None 55 | 56 | # Using the value from allowed_gai_family() in the context of getaddrinfo lets 57 | # us select whether to work with IPv4 DNS records, IPv6 records, or both. 58 | # The original create_connection function always returns all records. 59 | family = allowed_gai_family() 60 | 61 | for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): 62 | af, socktype, proto, canonname, sa = res 63 | sock = None 64 | try: 65 | sock = socket.socket(af, socktype, proto) 66 | 67 | # If provided, set socket level options before connecting. 68 | _set_socket_options(sock, socket_options) 69 | 70 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 71 | sock.settimeout(timeout) 72 | if source_address: 73 | sock.bind(source_address) 74 | sock.connect(sa) 75 | return sock 76 | 77 | except socket.error as e: 78 | err = e 79 | if sock is not None: 80 | sock.close() 81 | sock = None 82 | 83 | if err is not None: 84 | raise err 85 | 86 | raise socket.error("getaddrinfo returns an empty list") 87 | 88 | 89 | def _set_socket_options(sock, options): 90 | if options is None: 91 | return 92 | 93 | for opt in options: 94 | sock.setsockopt(*opt) 95 | 96 | 97 | def allowed_gai_family(): 98 | """This function is designed to work in the context of 99 | getaddrinfo, where family=socket.AF_UNSPEC is the default and 100 | will perform a DNS search for both IPv6 and IPv4 records.""" 101 | 102 | family = socket.AF_INET 103 | if HAS_IPV6: 104 | family = socket.AF_UNSPEC 105 | return family 106 | 107 | 108 | def _has_ipv6(host): 109 | """ Returns True if the system can bind an IPv6 address. """ 110 | sock = None 111 | has_ipv6 = False 112 | 113 | # App Engine doesn't support IPV6 sockets and actually has a quota on the 114 | # number of sockets that can be used, so just early out here instead of 115 | # creating a socket needlessly. 116 | # See https://github.com/urllib3/urllib3/issues/1446 117 | if _appengine_environ.is_appengine_sandbox(): 118 | return False 119 | 120 | if socket.has_ipv6: 121 | # has_ipv6 returns true if cPython was compiled with IPv6 support. 122 | # It does not tell us if the system has IPv6 support enabled. To 123 | # determine that we must bind to an IPv6 address. 124 | # https://github.com/urllib3/urllib3/pull/611 125 | # https://bugs.python.org/issue658327 126 | try: 127 | sock = socket.socket(socket.AF_INET6) 128 | sock.bind((host, 0)) 129 | has_ipv6 = True 130 | except Exception: 131 | pass 132 | 133 | if sock: 134 | sock.close() 135 | return has_ipv6 136 | 137 | 138 | HAS_IPV6 = _has_ipv6("::1") 139 | -------------------------------------------------------------------------------- /chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import logging 30 | import re 31 | 32 | from .enums import ProbingState 33 | 34 | 35 | class CharSetProber(object): 36 | 37 | SHORTCUT_THRESHOLD = 0.95 38 | 39 | def __init__(self, lang_filter=None): 40 | self._state = None 41 | self.lang_filter = lang_filter 42 | self.logger = logging.getLogger(__name__) 43 | 44 | def reset(self): 45 | self._state = ProbingState.DETECTING 46 | 47 | @property 48 | def charset_name(self): 49 | return None 50 | 51 | def feed(self, buf): 52 | pass 53 | 54 | @property 55 | def state(self): 56 | return self._state 57 | 58 | def get_confidence(self): 59 | return 0.0 60 | 61 | @staticmethod 62 | def filter_high_byte_only(buf): 63 | buf = re.sub(b'([\x00-\x7F])+', b' ', buf) 64 | return buf 65 | 66 | @staticmethod 67 | def filter_international_words(buf): 68 | """ 69 | We define three types of bytes: 70 | alphabet: english alphabets [a-zA-Z] 71 | international: international characters [\x80-\xFF] 72 | marker: everything else [^a-zA-Z\x80-\xFF] 73 | 74 | The input buffer can be thought to contain a series of words delimited 75 | by markers. This function works to filter all words that contain at 76 | least one international character. All contiguous sequences of markers 77 | are replaced by a single space ascii character. 78 | 79 | This filter applies to all scripts which do not use English characters. 80 | """ 81 | filtered = bytearray() 82 | 83 | # This regex expression filters out only words that have at-least one 84 | # international character. The word may include one marker character at 85 | # the end. 86 | words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', 87 | buf) 88 | 89 | for word in words: 90 | filtered.extend(word[:-1]) 91 | 92 | # If the last character in the word is a marker, replace it with a 93 | # space as markers shouldn't affect our analysis (they are used 94 | # similarly across all languages and may thus have similar 95 | # frequencies). 96 | last_char = word[-1:] 97 | if not last_char.isalpha() and last_char < b'\x80': 98 | last_char = b' ' 99 | filtered.extend(last_char) 100 | 101 | return filtered 102 | 103 | @staticmethod 104 | def filter_with_english_letters(buf): 105 | """ 106 | Returns a copy of ``buf`` that retains only the sequences of English 107 | alphabet and high byte characters that are not between <> characters. 108 | Also retains English alphabet and high byte characters immediately 109 | before occurrences of >. 110 | 111 | This filter can be applied to all scripts which contain both English 112 | characters and extended ASCII characters, but is currently only used by 113 | ``Latin1Prober``. 114 | """ 115 | filtered = bytearray() 116 | in_tag = False 117 | prev = 0 118 | 119 | for curr in range(len(buf)): 120 | # Slice here to get bytes instead of an int with Python 3 121 | buf_char = buf[curr:curr + 1] 122 | # Check if we're coming out of or entering an HTML tag 123 | if buf_char == b'>': 124 | in_tag = False 125 | elif buf_char == b'<': 126 | in_tag = True 127 | 128 | # If current character is not extended-ASCII and not alphabetic... 129 | if buf_char < b'\x80' and not buf_char.isalpha(): 130 | # ...and we're not in a tag 131 | if curr > prev and not in_tag: 132 | # Keep everything after last non-extended-ASCII, 133 | # non-alphabetic character 134 | filtered.extend(buf[prev:curr]) 135 | # Output a space to delimit stretch we kept 136 | filtered.extend(b' ') 137 | prev = curr + 1 138 | 139 | # If we're not in a tag... 140 | if not in_tag: 141 | # Keep everything after last non-extended-ASCII, non-alphabetic 142 | # character 143 | filtered.extend(buf[prev:]) 144 | 145 | return filtered 146 | -------------------------------------------------------------------------------- /chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import ProbingState 31 | 32 | FREQ_CAT_NUM = 4 33 | 34 | UDF = 0 # undefined 35 | OTH = 1 # other 36 | ASC = 2 # ascii capital letter 37 | ASS = 3 # ascii small letter 38 | ACV = 4 # accent capital vowel 39 | ACO = 5 # accent capital other 40 | ASV = 6 # accent small vowel 41 | ASO = 7 # accent small other 42 | CLASS_NUM = 8 # total classes 43 | 44 | Latin1_CharToClass = ( 45 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 53 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 54 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 56 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 57 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 58 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 60 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 61 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 62 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 63 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 64 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 65 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 69 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 71 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 72 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 73 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 75 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 76 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 77 | ) 78 | 79 | # 0 : illegal 80 | # 1 : very unlikely 81 | # 2 : normal 82 | # 3 : very likely 83 | Latin1ClassModel = ( 84 | # UDF OTH ASC ASS ACV ACO ASV ASO 85 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 86 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 87 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 88 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 89 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 90 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 91 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 92 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 93 | ) 94 | 95 | 96 | class Latin1Prober(CharSetProber): 97 | def __init__(self): 98 | super(Latin1Prober, self).__init__() 99 | self._last_char_class = None 100 | self._freq_counter = None 101 | self.reset() 102 | 103 | def reset(self): 104 | self._last_char_class = OTH 105 | self._freq_counter = [0] * FREQ_CAT_NUM 106 | CharSetProber.reset(self) 107 | 108 | @property 109 | def charset_name(self): 110 | return "ISO-8859-1" 111 | 112 | @property 113 | def language(self): 114 | return "" 115 | 116 | def feed(self, byte_str): 117 | byte_str = self.filter_with_english_letters(byte_str) 118 | for c in byte_str: 119 | char_class = Latin1_CharToClass[c] 120 | freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) 121 | + char_class] 122 | if freq == 0: 123 | self._state = ProbingState.NOT_ME 124 | break 125 | self._freq_counter[freq] += 1 126 | self._last_char_class = char_class 127 | 128 | return self.state 129 | 130 | def get_confidence(self): 131 | if self.state == ProbingState.NOT_ME: 132 | return 0.01 133 | 134 | total = sum(self._freq_counter) 135 | if total < 0.01: 136 | confidence = 0.0 137 | else: 138 | confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) 139 | / total) 140 | if confidence < 0.0: 141 | confidence = 0.0 142 | # lower the confidence of latin1 so that other more accurate 143 | # detector can take priority. 144 | confidence = confidence * 0.73 145 | return confidence 146 | -------------------------------------------------------------------------------- /urllib3/util/wait.py: -------------------------------------------------------------------------------- 1 | import errno 2 | from functools import partial 3 | import select 4 | import sys 5 | 6 | try: 7 | from time import monotonic 8 | except ImportError: 9 | from time import time as monotonic 10 | 11 | __all__ = ["NoWayToWaitForSocketError", "wait_for_read", "wait_for_write"] 12 | 13 | 14 | class NoWayToWaitForSocketError(Exception): 15 | pass 16 | 17 | 18 | # How should we wait on sockets? 19 | # 20 | # There are two types of APIs you can use for waiting on sockets: the fancy 21 | # modern stateful APIs like epoll/kqueue, and the older stateless APIs like 22 | # select/poll. The stateful APIs are more efficient when you have a lots of 23 | # sockets to keep track of, because you can set them up once and then use them 24 | # lots of times. But we only ever want to wait on a single socket at a time 25 | # and don't want to keep track of state, so the stateless APIs are actually 26 | # more efficient. So we want to use select() or poll(). 27 | # 28 | # Now, how do we choose between select() and poll()? On traditional Unixes, 29 | # select() has a strange calling convention that makes it slow, or fail 30 | # altogether, for high-numbered file descriptors. The point of poll() is to fix 31 | # that, so on Unixes, we prefer poll(). 32 | # 33 | # On Windows, there is no poll() (or at least Python doesn't provide a wrapper 34 | # for it), but that's OK, because on Windows, select() doesn't have this 35 | # strange calling convention; plain select() works fine. 36 | # 37 | # So: on Windows we use select(), and everywhere else we use poll(). We also 38 | # fall back to select() in case poll() is somehow broken or missing. 39 | 40 | if sys.version_info >= (3, 5): 41 | # Modern Python, that retries syscalls by default 42 | def _retry_on_intr(fn, timeout): 43 | return fn(timeout) 44 | 45 | 46 | else: 47 | # Old and broken Pythons. 48 | def _retry_on_intr(fn, timeout): 49 | if timeout is None: 50 | deadline = float("inf") 51 | else: 52 | deadline = monotonic() + timeout 53 | 54 | while True: 55 | try: 56 | return fn(timeout) 57 | # OSError for 3 <= pyver < 3.5, select.error for pyver <= 2.7 58 | except (OSError, select.error) as e: 59 | # 'e.args[0]' incantation works for both OSError and select.error 60 | if e.args[0] != errno.EINTR: 61 | raise 62 | else: 63 | timeout = deadline - monotonic() 64 | if timeout < 0: 65 | timeout = 0 66 | if timeout == float("inf"): 67 | timeout = None 68 | continue 69 | 70 | 71 | def select_wait_for_socket(sock, read=False, write=False, timeout=None): 72 | if not read and not write: 73 | raise RuntimeError("must specify at least one of read=True, write=True") 74 | rcheck = [] 75 | wcheck = [] 76 | if read: 77 | rcheck.append(sock) 78 | if write: 79 | wcheck.append(sock) 80 | # When doing a non-blocking connect, most systems signal success by 81 | # marking the socket writable. Windows, though, signals success by marked 82 | # it as "exceptional". We paper over the difference by checking the write 83 | # sockets for both conditions. (The stdlib selectors module does the same 84 | # thing.) 85 | fn = partial(select.select, rcheck, wcheck, wcheck) 86 | rready, wready, xready = _retry_on_intr(fn, timeout) 87 | return bool(rready or wready or xready) 88 | 89 | 90 | def poll_wait_for_socket(sock, read=False, write=False, timeout=None): 91 | if not read and not write: 92 | raise RuntimeError("must specify at least one of read=True, write=True") 93 | mask = 0 94 | if read: 95 | mask |= select.POLLIN 96 | if write: 97 | mask |= select.POLLOUT 98 | poll_obj = select.poll() 99 | poll_obj.register(sock, mask) 100 | 101 | # For some reason, poll() takes timeout in milliseconds 102 | def do_poll(t): 103 | if t is not None: 104 | t *= 1000 105 | return poll_obj.poll(t) 106 | 107 | return bool(_retry_on_intr(do_poll, timeout)) 108 | 109 | 110 | def null_wait_for_socket(*args, **kwargs): 111 | raise NoWayToWaitForSocketError("no select-equivalent available") 112 | 113 | 114 | def _have_working_poll(): 115 | # Apparently some systems have a select.poll that fails as soon as you try 116 | # to use it, either due to strange configuration or broken monkeypatching 117 | # from libraries like eventlet/greenlet. 118 | try: 119 | poll_obj = select.poll() 120 | _retry_on_intr(poll_obj.poll, 0) 121 | except (AttributeError, OSError): 122 | return False 123 | else: 124 | return True 125 | 126 | 127 | def wait_for_socket(*args, **kwargs): 128 | # We delay choosing which implementation to use until the first time we're 129 | # called. We could do it at import time, but then we might make the wrong 130 | # decision if someone goes wild with monkeypatching select.poll after 131 | # we're imported. 132 | global wait_for_socket 133 | if _have_working_poll(): 134 | wait_for_socket = poll_wait_for_socket 135 | elif hasattr(select, "select"): 136 | wait_for_socket = select_wait_for_socket 137 | else: # Platform-specific: Appengine. 138 | wait_for_socket = null_wait_for_socket 139 | return wait_for_socket(*args, **kwargs) 140 | 141 | 142 | def wait_for_read(sock, timeout=None): 143 | """ Waits for reading to be available on a given socket. 144 | Returns True if the socket is readable, or False if the timeout expired. 145 | """ 146 | return wait_for_socket(sock, read=True, timeout=timeout) 147 | 148 | 149 | def wait_for_write(sock, timeout=None): 150 | """ Waits for writing to be available on a given socket. 151 | Returns True if the socket is readable, or False if the timeout expired. 152 | """ 153 | return wait_for_socket(sock, write=True, timeout=timeout) 154 | -------------------------------------------------------------------------------- /chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood 31 | 32 | 33 | class SingleByteCharSetProber(CharSetProber): 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | 39 | def __init__(self, model, reversed=False, name_prober=None): 40 | super(SingleByteCharSetProber, self).__init__() 41 | self._model = model 42 | # TRUE if we need to reverse every pair in the model lookup 43 | self._reversed = reversed 44 | # Optional auxiliary prober for name decision 45 | self._name_prober = name_prober 46 | self._last_order = None 47 | self._seq_counters = None 48 | self._total_seqs = None 49 | self._total_char = None 50 | self._freq_char = None 51 | self.reset() 52 | 53 | def reset(self): 54 | super(SingleByteCharSetProber, self).reset() 55 | # char order of last character 56 | self._last_order = 255 57 | self._seq_counters = [0] * SequenceLikelihood.get_num_categories() 58 | self._total_seqs = 0 59 | self._total_char = 0 60 | # characters that fall in our sampling range 61 | self._freq_char = 0 62 | 63 | @property 64 | def charset_name(self): 65 | if self._name_prober: 66 | return self._name_prober.charset_name 67 | else: 68 | return self._model['charset_name'] 69 | 70 | @property 71 | def language(self): 72 | if self._name_prober: 73 | return self._name_prober.language 74 | else: 75 | return self._model.get('language') 76 | 77 | def feed(self, byte_str): 78 | if not self._model['keep_english_letter']: 79 | byte_str = self.filter_international_words(byte_str) 80 | if not byte_str: 81 | return self.state 82 | char_to_order_map = self._model['char_to_order_map'] 83 | for i, c in enumerate(byte_str): 84 | # XXX: Order is in range 1-64, so one would think we want 0-63 here, 85 | # but that leads to 27 more test failures than before. 86 | order = char_to_order_map[c] 87 | # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but 88 | # CharacterCategory.SYMBOL is actually 253, so we use CONTROL 89 | # to make it closer to the original intent. The only difference 90 | # is whether or not we count digits and control characters for 91 | # _total_char purposes. 92 | if order < CharacterCategory.CONTROL: 93 | self._total_char += 1 94 | if order < self.SAMPLE_SIZE: 95 | self._freq_char += 1 96 | if self._last_order < self.SAMPLE_SIZE: 97 | self._total_seqs += 1 98 | if not self._reversed: 99 | i = (self._last_order * self.SAMPLE_SIZE) + order 100 | model = self._model['precedence_matrix'][i] 101 | else: # reverse the order of the letters in the lookup 102 | i = (order * self.SAMPLE_SIZE) + self._last_order 103 | model = self._model['precedence_matrix'][i] 104 | self._seq_counters[model] += 1 105 | self._last_order = order 106 | 107 | charset_name = self._model['charset_name'] 108 | if self.state == ProbingState.DETECTING: 109 | if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: 110 | confidence = self.get_confidence() 111 | if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: 112 | self.logger.debug('%s confidence = %s, we have a winner', 113 | charset_name, confidence) 114 | self._state = ProbingState.FOUND_IT 115 | elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: 116 | self.logger.debug('%s confidence = %s, below negative ' 117 | 'shortcut threshhold %s', charset_name, 118 | confidence, 119 | self.NEGATIVE_SHORTCUT_THRESHOLD) 120 | self._state = ProbingState.NOT_ME 121 | 122 | return self.state 123 | 124 | def get_confidence(self): 125 | r = 0.01 126 | if self._total_seqs > 0: 127 | r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / 128 | self._total_seqs / self._model['typical_positive_ratio']) 129 | r = r * self._freq_char / self._total_char 130 | if r >= 1.0: 131 | r = 0.99 132 | return r 133 | -------------------------------------------------------------------------------- /urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | import sys 8 | 9 | # ipaddress has been backported to 2.6+ in pypi. If it is installed on the 10 | # system, use it to handle IPAddress ServerAltnames (this was added in 11 | # python-3.5) otherwise only do DNS matching. This allows 12 | # backports.ssl_match_hostname to continue to be used in Python 2.7. 13 | try: 14 | import ipaddress 15 | except ImportError: 16 | ipaddress = None 17 | 18 | __version__ = "3.5.0.1" 19 | 20 | 21 | class CertificateError(ValueError): 22 | pass 23 | 24 | 25 | def _dnsname_match(dn, hostname, max_wildcards=1): 26 | """Matching according to RFC 6125, section 6.4.3 27 | 28 | http://tools.ietf.org/html/rfc6125#section-6.4.3 29 | """ 30 | pats = [] 31 | if not dn: 32 | return False 33 | 34 | # Ported from python3-syntax: 35 | # leftmost, *remainder = dn.split(r'.') 36 | parts = dn.split(r".") 37 | leftmost = parts[0] 38 | remainder = parts[1:] 39 | 40 | wildcards = leftmost.count("*") 41 | if wildcards > max_wildcards: 42 | # Issue #17980: avoid denials of service by refusing more 43 | # than one wildcard per fragment. A survey of established 44 | # policy among SSL implementations showed it to be a 45 | # reasonable choice. 46 | raise CertificateError( 47 | "too many wildcards in certificate DNS name: " + repr(dn) 48 | ) 49 | 50 | # speed up common case w/o wildcards 51 | if not wildcards: 52 | return dn.lower() == hostname.lower() 53 | 54 | # RFC 6125, section 6.4.3, subitem 1. 55 | # The client SHOULD NOT attempt to match a presented identifier in which 56 | # the wildcard character comprises a label other than the left-most label. 57 | if leftmost == "*": 58 | # When '*' is a fragment by itself, it matches a non-empty dotless 59 | # fragment. 60 | pats.append("[^.]+") 61 | elif leftmost.startswith("xn--") or hostname.startswith("xn--"): 62 | # RFC 6125, section 6.4.3, subitem 3. 63 | # The client SHOULD NOT attempt to match a presented identifier 64 | # where the wildcard character is embedded within an A-label or 65 | # U-label of an internationalized domain name. 66 | pats.append(re.escape(leftmost)) 67 | else: 68 | # Otherwise, '*' matches any dotless string, e.g. www* 69 | pats.append(re.escape(leftmost).replace(r"\*", "[^.]*")) 70 | 71 | # add the remaining fragments, ignore any wildcards 72 | for frag in remainder: 73 | pats.append(re.escape(frag)) 74 | 75 | pat = re.compile(r"\A" + r"\.".join(pats) + r"\Z", re.IGNORECASE) 76 | return pat.match(hostname) 77 | 78 | 79 | def _to_unicode(obj): 80 | if isinstance(obj, str) and sys.version_info < (3,): 81 | obj = unicode(obj, encoding="ascii", errors="strict") 82 | return obj 83 | 84 | 85 | def _ipaddress_match(ipname, host_ip): 86 | """Exact matching of IP addresses. 87 | 88 | RFC 6125 explicitly doesn't define an algorithm for this 89 | (section 1.7.2 - "Out of Scope"). 90 | """ 91 | # OpenSSL may add a trailing newline to a subjectAltName's IP address 92 | # Divergence from upstream: ipaddress can't handle byte str 93 | ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) 94 | return ip == host_ip 95 | 96 | 97 | def match_hostname(cert, hostname): 98 | """Verify that *cert* (in decoded format as returned by 99 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 100 | rules are followed, but IP addresses are not accepted for *hostname*. 101 | 102 | CertificateError is raised on failure. On success, the function 103 | returns nothing. 104 | """ 105 | if not cert: 106 | raise ValueError( 107 | "empty or no certificate, match_hostname needs a " 108 | "SSL socket or SSL context with either " 109 | "CERT_OPTIONAL or CERT_REQUIRED" 110 | ) 111 | try: 112 | # Divergence from upstream: ipaddress can't handle byte str 113 | host_ip = ipaddress.ip_address(_to_unicode(hostname)) 114 | except ValueError: 115 | # Not an IP address (common case) 116 | host_ip = None 117 | except UnicodeError: 118 | # Divergence from upstream: Have to deal with ipaddress not taking 119 | # byte strings. addresses should be all ascii, so we consider it not 120 | # an ipaddress in this case 121 | host_ip = None 122 | except AttributeError: 123 | # Divergence from upstream: Make ipaddress library optional 124 | if ipaddress is None: 125 | host_ip = None 126 | else: 127 | raise 128 | dnsnames = [] 129 | san = cert.get("subjectAltName", ()) 130 | for key, value in san: 131 | if key == "DNS": 132 | if host_ip is None and _dnsname_match(value, hostname): 133 | return 134 | dnsnames.append(value) 135 | elif key == "IP Address": 136 | if host_ip is not None and _ipaddress_match(value, host_ip): 137 | return 138 | dnsnames.append(value) 139 | if not dnsnames: 140 | # The subject is only checked when there is no dNSName entry 141 | # in subjectAltName 142 | for sub in cert.get("subject", ()): 143 | for key, value in sub: 144 | # XXX according to RFC 2818, the most specific Common Name 145 | # must be used. 146 | if key == "commonName": 147 | if _dnsname_match(value, hostname): 148 | return 149 | dnsnames.append(value) 150 | if len(dnsnames) > 1: 151 | raise CertificateError( 152 | "hostname %r " 153 | "doesn't match either of %s" % (hostname, ", ".join(map(repr, dnsnames))) 154 | ) 155 | elif len(dnsnames) == 1: 156 | raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0])) 157 | else: 158 | raise CertificateError( 159 | "no appropriate commonName or subjectAltName fields were found" 160 | ) 161 | -------------------------------------------------------------------------------- /urllib3/request.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .filepost import encode_multipart_formdata 4 | from .packages.six.moves.urllib.parse import urlencode 5 | 6 | 7 | __all__ = ["RequestMethods"] 8 | 9 | 10 | class RequestMethods(object): 11 | """ 12 | Convenience mixin for classes who implement a :meth:`urlopen` method, such 13 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and 14 | :class:`~urllib3.poolmanager.PoolManager`. 15 | 16 | Provides behavior for making common types of HTTP request methods and 17 | decides which type of request field encoding to use. 18 | 19 | Specifically, 20 | 21 | :meth:`.request_encode_url` is for sending requests whose fields are 22 | encoded in the URL (such as GET, HEAD, DELETE). 23 | 24 | :meth:`.request_encode_body` is for sending requests whose fields are 25 | encoded in the *body* of the request using multipart or www-form-urlencoded 26 | (such as for POST, PUT, PATCH). 27 | 28 | :meth:`.request` is for making any kind of request, it will look up the 29 | appropriate encoding format and use one of the above two methods to make 30 | the request. 31 | 32 | Initializer parameters: 33 | 34 | :param headers: 35 | Headers to include with all requests, unless other headers are given 36 | explicitly. 37 | """ 38 | 39 | _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"} 40 | 41 | def __init__(self, headers=None): 42 | self.headers = headers or {} 43 | 44 | def urlopen( 45 | self, 46 | method, 47 | url, 48 | body=None, 49 | headers=None, 50 | encode_multipart=True, 51 | multipart_boundary=None, 52 | **kw 53 | ): # Abstract 54 | raise NotImplementedError( 55 | "Classes extending RequestMethods must implement " 56 | "their own ``urlopen`` method." 57 | ) 58 | 59 | def request(self, method, url, fields=None, headers=None, **urlopen_kw): 60 | """ 61 | Make a request using :meth:`urlopen` with the appropriate encoding of 62 | ``fields`` based on the ``method`` used. 63 | 64 | This is a convenience method that requires the least amount of manual 65 | effort. It can be used in most situations, while still having the 66 | option to drop down to more specific methods when necessary, such as 67 | :meth:`request_encode_url`, :meth:`request_encode_body`, 68 | or even the lowest level :meth:`urlopen`. 69 | """ 70 | method = method.upper() 71 | 72 | urlopen_kw["request_url"] = url 73 | 74 | if method in self._encode_url_methods: 75 | return self.request_encode_url( 76 | method, url, fields=fields, headers=headers, **urlopen_kw 77 | ) 78 | else: 79 | return self.request_encode_body( 80 | method, url, fields=fields, headers=headers, **urlopen_kw 81 | ) 82 | 83 | def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw): 84 | """ 85 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 86 | the url. This is useful for request methods like GET, HEAD, DELETE, etc. 87 | """ 88 | if headers is None: 89 | headers = self.headers 90 | 91 | extra_kw = {"headers": headers} 92 | extra_kw.update(urlopen_kw) 93 | 94 | if fields: 95 | url += "?" + urlencode(fields) 96 | 97 | return self.urlopen(method, url, **extra_kw) 98 | 99 | def request_encode_body( 100 | self, 101 | method, 102 | url, 103 | fields=None, 104 | headers=None, 105 | encode_multipart=True, 106 | multipart_boundary=None, 107 | **urlopen_kw 108 | ): 109 | """ 110 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 111 | the body. This is useful for request methods like POST, PUT, PATCH, etc. 112 | 113 | When ``encode_multipart=True`` (default), then 114 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode 115 | the payload with the appropriate content type. Otherwise 116 | :meth:`urllib.urlencode` is used with the 117 | 'application/x-www-form-urlencoded' content type. 118 | 119 | Multipart encoding must be used when posting files, and it's reasonably 120 | safe to use it in other times too. However, it may break request 121 | signing, such as with OAuth. 122 | 123 | Supports an optional ``fields`` parameter of key/value strings AND 124 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where 125 | the MIME type is optional. For example:: 126 | 127 | fields = { 128 | 'foo': 'bar', 129 | 'fakefile': ('foofile.txt', 'contents of foofile'), 130 | 'realfile': ('barfile.txt', open('realfile').read()), 131 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 132 | 'image/jpeg'), 133 | 'nonamefile': 'contents of nonamefile field', 134 | } 135 | 136 | When uploading a file, providing a filename (the first parameter of the 137 | tuple) is optional but recommended to best mimic behavior of browsers. 138 | 139 | Note that if ``headers`` are supplied, the 'Content-Type' header will 140 | be overwritten because it depends on the dynamic random boundary string 141 | which is used to compose the body of the request. The random boundary 142 | string can be explicitly set with the ``multipart_boundary`` parameter. 143 | """ 144 | if headers is None: 145 | headers = self.headers 146 | 147 | extra_kw = {"headers": {}} 148 | 149 | if fields: 150 | if "body" in urlopen_kw: 151 | raise TypeError( 152 | "request got values for both 'fields' and 'body', can only specify one." 153 | ) 154 | 155 | if encode_multipart: 156 | body, content_type = encode_multipart_formdata( 157 | fields, boundary=multipart_boundary 158 | ) 159 | else: 160 | body, content_type = ( 161 | urlencode(fields), 162 | "application/x-www-form-urlencoded", 163 | ) 164 | 165 | extra_kw["body"] = body 166 | extra_kw["headers"] = {"Content-Type": content_type} 167 | 168 | extra_kw["headers"].update(headers) 169 | extra_kw.update(urlopen_kw) 170 | 171 | return self.urlopen(method, url, **extra_kw) 172 | -------------------------------------------------------------------------------- /requests/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.api 5 | ~~~~~~~~~~~~ 6 | 7 | This module implements the Requests API. 8 | 9 | :copyright: (c) 2012 by Kenneth Reitz. 10 | :license: Apache2, see LICENSE for more details. 11 | """ 12 | 13 | from . import sessions 14 | 15 | 16 | def request(method, url, **kwargs): 17 | """Constructs and sends a :class:`Request `. 18 | 19 | :param method: method for the new :class:`Request` object: ``GET``, ``OPTIONS``, ``HEAD``, ``POST``, ``PUT``, ``PATCH``, or ``DELETE``. 20 | :param url: URL for the new :class:`Request` object. 21 | :param params: (optional) Dictionary, list of tuples or bytes to send 22 | in the query string for the :class:`Request`. 23 | :param data: (optional) Dictionary, list of tuples, bytes, or file-like 24 | object to send in the body of the :class:`Request`. 25 | :param json: (optional) A JSON serializable Python object to send in the body of the :class:`Request`. 26 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. 27 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. 28 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': file-tuple}``) for multipart encoding upload. 29 | ``file-tuple`` can be a 2-tuple ``('filename', fileobj)``, 3-tuple ``('filename', fileobj, 'content_type')`` 30 | or a 4-tuple ``('filename', fileobj, 'content_type', custom_headers)``, where ``'content-type'`` is a string 31 | defining the content type of the given file and ``custom_headers`` a dict-like object containing additional headers 32 | to add for the file. 33 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. 34 | :param timeout: (optional) How many seconds to wait for the server to send data 35 | before giving up, as a float, or a :ref:`(connect timeout, read 36 | timeout) ` tuple. 37 | :type timeout: float or tuple 38 | :param allow_redirects: (optional) Boolean. Enable/disable GET/OPTIONS/POST/PUT/PATCH/DELETE/HEAD redirection. Defaults to ``True``. 39 | :type allow_redirects: bool 40 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. 41 | :param verify: (optional) Either a boolean, in which case it controls whether we verify 42 | the server's TLS certificate, or a string, in which case it must be a path 43 | to a CA bundle to use. Defaults to ``True``. 44 | :param stream: (optional) if ``False``, the response content will be immediately downloaded. 45 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. 46 | :return: :class:`Response ` object 47 | :rtype: requests.Response 48 | 49 | Usage:: 50 | 51 | >>> import requests 52 | >>> req = requests.request('GET', 'https://httpbin.org/get') 53 | >>> req 54 | 55 | """ 56 | 57 | # By using the 'with' statement we are sure the session is closed, thus we 58 | # avoid leaving sockets open which can trigger a ResourceWarning in some 59 | # cases, and look like a memory leak in others. 60 | with sessions.Session() as session: 61 | return session.request(method=method, url=url, **kwargs) 62 | 63 | 64 | def get(url, params=None, **kwargs): 65 | r"""Sends a GET request. 66 | 67 | :param url: URL for the new :class:`Request` object. 68 | :param params: (optional) Dictionary, list of tuples or bytes to send 69 | in the query string for the :class:`Request`. 70 | :param \*\*kwargs: Optional arguments that ``request`` takes. 71 | :return: :class:`Response ` object 72 | :rtype: requests.Response 73 | """ 74 | 75 | kwargs.setdefault('allow_redirects', True) 76 | return request('get', url, params=params, **kwargs) 77 | 78 | 79 | def options(url, **kwargs): 80 | r"""Sends an OPTIONS request. 81 | 82 | :param url: URL for the new :class:`Request` object. 83 | :param \*\*kwargs: Optional arguments that ``request`` takes. 84 | :return: :class:`Response ` object 85 | :rtype: requests.Response 86 | """ 87 | 88 | kwargs.setdefault('allow_redirects', True) 89 | return request('options', url, **kwargs) 90 | 91 | 92 | def head(url, **kwargs): 93 | r"""Sends a HEAD request. 94 | 95 | :param url: URL for the new :class:`Request` object. 96 | :param \*\*kwargs: Optional arguments that ``request`` takes. If 97 | `allow_redirects` is not provided, it will be set to `False` (as 98 | opposed to the default :meth:`request` behavior). 99 | :return: :class:`Response ` object 100 | :rtype: requests.Response 101 | """ 102 | 103 | kwargs.setdefault('allow_redirects', False) 104 | return request('head', url, **kwargs) 105 | 106 | 107 | def post(url, data=None, json=None, **kwargs): 108 | r"""Sends a POST request. 109 | 110 | :param url: URL for the new :class:`Request` object. 111 | :param data: (optional) Dictionary, list of tuples, bytes, or file-like 112 | object to send in the body of the :class:`Request`. 113 | :param json: (optional) json data to send in the body of the :class:`Request`. 114 | :param \*\*kwargs: Optional arguments that ``request`` takes. 115 | :return: :class:`Response ` object 116 | :rtype: requests.Response 117 | """ 118 | 119 | return request('post', url, data=data, json=json, **kwargs) 120 | 121 | 122 | def put(url, data=None, **kwargs): 123 | r"""Sends a PUT request. 124 | 125 | :param url: URL for the new :class:`Request` object. 126 | :param data: (optional) Dictionary, list of tuples, bytes, or file-like 127 | object to send in the body of the :class:`Request`. 128 | :param json: (optional) json data to send in the body of the :class:`Request`. 129 | :param \*\*kwargs: Optional arguments that ``request`` takes. 130 | :return: :class:`Response ` object 131 | :rtype: requests.Response 132 | """ 133 | 134 | return request('put', url, data=data, **kwargs) 135 | 136 | 137 | def patch(url, data=None, **kwargs): 138 | r"""Sends a PATCH request. 139 | 140 | :param url: URL for the new :class:`Request` object. 141 | :param data: (optional) Dictionary, list of tuples, bytes, or file-like 142 | object to send in the body of the :class:`Request`. 143 | :param json: (optional) json data to send in the body of the :class:`Request`. 144 | :param \*\*kwargs: Optional arguments that ``request`` takes. 145 | :return: :class:`Response ` object 146 | :rtype: requests.Response 147 | """ 148 | 149 | return request('patch', url, data=data, **kwargs) 150 | 151 | 152 | def delete(url, **kwargs): 153 | r"""Sends a DELETE request. 154 | 155 | :param url: URL for the new :class:`Request` object. 156 | :param \*\*kwargs: Optional arguments that ``request`` takes. 157 | :return: :class:`Response ` object 158 | :rtype: requests.Response 159 | """ 160 | 161 | return request('delete', url, **kwargs) 162 | -------------------------------------------------------------------------------- /urllib3/contrib/socks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module contains provisional support for SOCKS proxies from within 4 | urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and 5 | SOCKS5. To enable its functionality, either install PySocks or install this 6 | module with the ``socks`` extra. 7 | 8 | The SOCKS implementation supports the full range of urllib3 features. It also 9 | supports the following SOCKS features: 10 | 11 | - SOCKS4A (``proxy_url='socks4a://...``) 12 | - SOCKS4 (``proxy_url='socks4://...``) 13 | - SOCKS5 with remote DNS (``proxy_url='socks5h://...``) 14 | - SOCKS5 with local DNS (``proxy_url='socks5://...``) 15 | - Usernames and passwords for the SOCKS proxy 16 | 17 | .. note:: 18 | It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in 19 | your ``proxy_url`` to ensure that DNS resolution is done from the remote 20 | server instead of client-side when connecting to a domain name. 21 | 22 | SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5 23 | supports IPv4, IPv6, and domain names. 24 | 25 | When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url`` 26 | will be sent as the ``userid`` section of the SOCKS request:: 27 | 28 | proxy_url="socks4a://@proxy-host" 29 | 30 | When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion 31 | of the ``proxy_url`` will be sent as the username/password to authenticate 32 | with the proxy:: 33 | 34 | proxy_url="socks5h://:@proxy-host" 35 | 36 | """ 37 | from __future__ import absolute_import 38 | 39 | try: 40 | import socks 41 | except ImportError: 42 | import warnings 43 | from ..exceptions import DependencyWarning 44 | 45 | warnings.warn( 46 | ( 47 | "SOCKS support in urllib3 requires the installation of optional " 48 | "dependencies: specifically, PySocks. For more information, see " 49 | "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies" 50 | ), 51 | DependencyWarning, 52 | ) 53 | raise 54 | 55 | from socket import error as SocketError, timeout as SocketTimeout 56 | 57 | from ..connection import HTTPConnection, HTTPSConnection 58 | from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool 59 | from ..exceptions import ConnectTimeoutError, NewConnectionError 60 | from ..poolmanager import PoolManager 61 | from ..util.url import parse_url 62 | 63 | try: 64 | import ssl 65 | except ImportError: 66 | ssl = None 67 | 68 | 69 | class SOCKSConnection(HTTPConnection): 70 | """ 71 | A plain-text HTTP connection that connects via a SOCKS proxy. 72 | """ 73 | 74 | def __init__(self, *args, **kwargs): 75 | self._socks_options = kwargs.pop("_socks_options") 76 | super(SOCKSConnection, self).__init__(*args, **kwargs) 77 | 78 | def _new_conn(self): 79 | """ 80 | Establish a new connection via the SOCKS proxy. 81 | """ 82 | extra_kw = {} 83 | if self.source_address: 84 | extra_kw["source_address"] = self.source_address 85 | 86 | if self.socket_options: 87 | extra_kw["socket_options"] = self.socket_options 88 | 89 | try: 90 | conn = socks.create_connection( 91 | (self.host, self.port), 92 | proxy_type=self._socks_options["socks_version"], 93 | proxy_addr=self._socks_options["proxy_host"], 94 | proxy_port=self._socks_options["proxy_port"], 95 | proxy_username=self._socks_options["username"], 96 | proxy_password=self._socks_options["password"], 97 | proxy_rdns=self._socks_options["rdns"], 98 | timeout=self.timeout, 99 | **extra_kw 100 | ) 101 | 102 | except SocketTimeout: 103 | raise ConnectTimeoutError( 104 | self, 105 | "Connection to %s timed out. (connect timeout=%s)" 106 | % (self.host, self.timeout), 107 | ) 108 | 109 | except socks.ProxyError as e: 110 | # This is fragile as hell, but it seems to be the only way to raise 111 | # useful errors here. 112 | if e.socket_err: 113 | error = e.socket_err 114 | if isinstance(error, SocketTimeout): 115 | raise ConnectTimeoutError( 116 | self, 117 | "Connection to %s timed out. (connect timeout=%s)" 118 | % (self.host, self.timeout), 119 | ) 120 | else: 121 | raise NewConnectionError( 122 | self, "Failed to establish a new connection: %s" % error 123 | ) 124 | else: 125 | raise NewConnectionError( 126 | self, "Failed to establish a new connection: %s" % e 127 | ) 128 | 129 | except SocketError as e: # Defensive: PySocks should catch all these. 130 | raise NewConnectionError( 131 | self, "Failed to establish a new connection: %s" % e 132 | ) 133 | 134 | return conn 135 | 136 | 137 | # We don't need to duplicate the Verified/Unverified distinction from 138 | # urllib3/connection.py here because the HTTPSConnection will already have been 139 | # correctly set to either the Verified or Unverified form by that module. This 140 | # means the SOCKSHTTPSConnection will automatically be the correct type. 141 | class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): 142 | pass 143 | 144 | 145 | class SOCKSHTTPConnectionPool(HTTPConnectionPool): 146 | ConnectionCls = SOCKSConnection 147 | 148 | 149 | class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): 150 | ConnectionCls = SOCKSHTTPSConnection 151 | 152 | 153 | class SOCKSProxyManager(PoolManager): 154 | """ 155 | A version of the urllib3 ProxyManager that routes connections via the 156 | defined SOCKS proxy. 157 | """ 158 | 159 | pool_classes_by_scheme = { 160 | "http": SOCKSHTTPConnectionPool, 161 | "https": SOCKSHTTPSConnectionPool, 162 | } 163 | 164 | def __init__( 165 | self, 166 | proxy_url, 167 | username=None, 168 | password=None, 169 | num_pools=10, 170 | headers=None, 171 | **connection_pool_kw 172 | ): 173 | parsed = parse_url(proxy_url) 174 | 175 | if username is None and password is None and parsed.auth is not None: 176 | split = parsed.auth.split(":") 177 | if len(split) == 2: 178 | username, password = split 179 | if parsed.scheme == "socks5": 180 | socks_version = socks.PROXY_TYPE_SOCKS5 181 | rdns = False 182 | elif parsed.scheme == "socks5h": 183 | socks_version = socks.PROXY_TYPE_SOCKS5 184 | rdns = True 185 | elif parsed.scheme == "socks4": 186 | socks_version = socks.PROXY_TYPE_SOCKS4 187 | rdns = False 188 | elif parsed.scheme == "socks4a": 189 | socks_version = socks.PROXY_TYPE_SOCKS4 190 | rdns = True 191 | else: 192 | raise ValueError("Unable to determine SOCKS version from %s" % proxy_url) 193 | 194 | self.proxy_url = proxy_url 195 | 196 | socks_options = { 197 | "socks_version": socks_version, 198 | "proxy_host": parsed.host, 199 | "proxy_port": parsed.port, 200 | "username": username, 201 | "password": password, 202 | "rdns": rdns, 203 | } 204 | connection_pool_kw["_socks_options"] = socks_options 205 | 206 | super(SOCKSProxyManager, self).__init__( 207 | num_pools, headers, **connection_pool_kw 208 | ) 209 | 210 | self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme 211 | -------------------------------------------------------------------------------- /urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead 3 | 4 | # Base Exceptions 5 | 6 | 7 | class HTTPError(Exception): 8 | "Base exception used by this module." 9 | pass 10 | 11 | 12 | class HTTPWarning(Warning): 13 | "Base warning used by this module." 14 | pass 15 | 16 | 17 | class PoolError(HTTPError): 18 | "Base exception for errors caused within a pool." 19 | 20 | def __init__(self, pool, message): 21 | self.pool = pool 22 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 23 | 24 | def __reduce__(self): 25 | # For pickling purposes. 26 | return self.__class__, (None, None) 27 | 28 | 29 | class RequestError(PoolError): 30 | "Base exception for PoolErrors that have associated URLs." 31 | 32 | def __init__(self, pool, url, message): 33 | self.url = url 34 | PoolError.__init__(self, pool, message) 35 | 36 | def __reduce__(self): 37 | # For pickling purposes. 38 | return self.__class__, (None, self.url, None) 39 | 40 | 41 | class SSLError(HTTPError): 42 | "Raised when SSL certificate fails in an HTTPS connection." 43 | pass 44 | 45 | 46 | class ProxyError(HTTPError): 47 | "Raised when the connection to a proxy fails." 48 | 49 | def __init__(self, message, error, *args): 50 | super(ProxyError, self).__init__(message, error, *args) 51 | self.original_error = error 52 | 53 | 54 | class DecodeError(HTTPError): 55 | "Raised when automatic decoding based on Content-Type fails." 56 | pass 57 | 58 | 59 | class ProtocolError(HTTPError): 60 | "Raised when something unexpected happens mid-request/response." 61 | pass 62 | 63 | 64 | #: Renamed to ProtocolError but aliased for backwards compatibility. 65 | ConnectionError = ProtocolError 66 | 67 | 68 | # Leaf Exceptions 69 | 70 | 71 | class MaxRetryError(RequestError): 72 | """Raised when the maximum number of retries is exceeded. 73 | 74 | :param pool: The connection pool 75 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 76 | :param string url: The requested Url 77 | :param exceptions.Exception reason: The underlying error 78 | 79 | """ 80 | 81 | def __init__(self, pool, url, reason=None): 82 | self.reason = reason 83 | 84 | message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason) 85 | 86 | RequestError.__init__(self, pool, url, message) 87 | 88 | 89 | class HostChangedError(RequestError): 90 | "Raised when an existing pool gets a request for a foreign host." 91 | 92 | def __init__(self, pool, url, retries=3): 93 | message = "Tried to open a foreign host with url: %s" % url 94 | RequestError.__init__(self, pool, url, message) 95 | self.retries = retries 96 | 97 | 98 | class TimeoutStateError(HTTPError): 99 | """ Raised when passing an invalid state to a timeout """ 100 | 101 | pass 102 | 103 | 104 | class TimeoutError(HTTPError): 105 | """ Raised when a socket timeout error occurs. 106 | 107 | Catching this error will catch both :exc:`ReadTimeoutErrors 108 | ` and :exc:`ConnectTimeoutErrors `. 109 | """ 110 | 111 | pass 112 | 113 | 114 | class ReadTimeoutError(TimeoutError, RequestError): 115 | "Raised when a socket timeout occurs while receiving data from a server" 116 | pass 117 | 118 | 119 | # This timeout error does not have a URL attached and needs to inherit from the 120 | # base HTTPError 121 | class ConnectTimeoutError(TimeoutError): 122 | "Raised when a socket timeout occurs while connecting to a server" 123 | pass 124 | 125 | 126 | class NewConnectionError(ConnectTimeoutError, PoolError): 127 | "Raised when we fail to establish a new connection. Usually ECONNREFUSED." 128 | pass 129 | 130 | 131 | class EmptyPoolError(PoolError): 132 | "Raised when a pool runs out of connections and no more are allowed." 133 | pass 134 | 135 | 136 | class ClosedPoolError(PoolError): 137 | "Raised when a request enters a pool after the pool has been closed." 138 | pass 139 | 140 | 141 | class LocationValueError(ValueError, HTTPError): 142 | "Raised when there is something wrong with a given URL input." 143 | pass 144 | 145 | 146 | class LocationParseError(LocationValueError): 147 | "Raised when get_host or similar fails to parse the URL input." 148 | 149 | def __init__(self, location): 150 | message = "Failed to parse: %s" % location 151 | HTTPError.__init__(self, message) 152 | 153 | self.location = location 154 | 155 | 156 | class ResponseError(HTTPError): 157 | "Used as a container for an error reason supplied in a MaxRetryError." 158 | GENERIC_ERROR = "too many error responses" 159 | SPECIFIC_ERROR = "too many {status_code} error responses" 160 | 161 | 162 | class SecurityWarning(HTTPWarning): 163 | "Warned when performing security reducing actions" 164 | pass 165 | 166 | 167 | class SubjectAltNameWarning(SecurityWarning): 168 | "Warned when connecting to a host with a certificate missing a SAN." 169 | pass 170 | 171 | 172 | class InsecureRequestWarning(SecurityWarning): 173 | "Warned when making an unverified HTTPS request." 174 | pass 175 | 176 | 177 | class SystemTimeWarning(SecurityWarning): 178 | "Warned when system time is suspected to be wrong" 179 | pass 180 | 181 | 182 | class InsecurePlatformWarning(SecurityWarning): 183 | "Warned when certain SSL configuration is not available on a platform." 184 | pass 185 | 186 | 187 | class SNIMissingWarning(HTTPWarning): 188 | "Warned when making a HTTPS request without SNI available." 189 | pass 190 | 191 | 192 | class DependencyWarning(HTTPWarning): 193 | """ 194 | Warned when an attempt is made to import a module with missing optional 195 | dependencies. 196 | """ 197 | 198 | pass 199 | 200 | 201 | class InvalidProxyConfigurationWarning(HTTPWarning): 202 | """ 203 | Warned when using an HTTPS proxy and an HTTPS URL. Currently 204 | urllib3 doesn't support HTTPS proxies and the proxy will be 205 | contacted via HTTP instead. This warning can be fixed by 206 | changing your HTTPS proxy URL into an HTTP proxy URL. 207 | 208 | If you encounter this warning read this: 209 | https://github.com/urllib3/urllib3/issues/1850 210 | """ 211 | 212 | pass 213 | 214 | 215 | class ResponseNotChunked(ProtocolError, ValueError): 216 | "Response needs to be chunked in order to read it as chunks." 217 | pass 218 | 219 | 220 | class BodyNotHttplibCompatible(HTTPError): 221 | """ 222 | Body should be httplib.HTTPResponse like (have an fp attribute which 223 | returns raw chunks) for read_chunked(). 224 | """ 225 | 226 | pass 227 | 228 | 229 | class IncompleteRead(HTTPError, httplib_IncompleteRead): 230 | """ 231 | Response length doesn't match expected Content-Length 232 | 233 | Subclass of http_client.IncompleteRead to allow int value 234 | for `partial` to avoid creating large objects on streamed 235 | reads. 236 | """ 237 | 238 | def __init__(self, partial, expected): 239 | super(IncompleteRead, self).__init__(partial, expected) 240 | 241 | def __repr__(self): 242 | return "IncompleteRead(%i bytes read, %i more expected)" % ( 243 | self.partial, 244 | self.expected, 245 | ) 246 | 247 | 248 | class InvalidHeader(HTTPError): 249 | "The header provided was somehow invalid." 250 | pass 251 | 252 | 253 | class ProxySchemeUnknown(AssertionError, ValueError): 254 | "ProxyManager does not support the supplied scheme" 255 | # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. 256 | 257 | def __init__(self, scheme): 258 | message = "Not supported proxy scheme %s" % scheme 259 | super(ProxySchemeUnknown, self).__init__(message) 260 | 261 | 262 | class HeaderParsingError(HTTPError): 263 | "Raised by assert_header_parsing, but we convert it to a log.warning statement." 264 | 265 | def __init__(self, defects, unparsed_data): 266 | message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data) 267 | super(HeaderParsingError, self).__init__(message) 268 | 269 | 270 | class UnrewindableBodyError(HTTPError): 271 | "urllib3 encountered an error when trying to rewind a body" 272 | pass 273 | -------------------------------------------------------------------------------- /urllib3/fields.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import email.utils 3 | import mimetypes 4 | import re 5 | 6 | from .packages import six 7 | 8 | 9 | def guess_content_type(filename, default="application/octet-stream"): 10 | """ 11 | Guess the "Content-Type" of a file. 12 | 13 | :param filename: 14 | The filename to guess the "Content-Type" of using :mod:`mimetypes`. 15 | :param default: 16 | If no "Content-Type" can be guessed, default to `default`. 17 | """ 18 | if filename: 19 | return mimetypes.guess_type(filename)[0] or default 20 | return default 21 | 22 | 23 | def format_header_param_rfc2231(name, value): 24 | """ 25 | Helper function to format and quote a single header parameter using the 26 | strategy defined in RFC 2231. 27 | 28 | Particularly useful for header parameters which might contain 29 | non-ASCII values, like file names. This follows RFC 2388 Section 4.4. 30 | 31 | :param name: 32 | The name of the parameter, a string expected to be ASCII only. 33 | :param value: 34 | The value of the parameter, provided as ``bytes`` or `str``. 35 | :ret: 36 | An RFC-2231-formatted unicode string. 37 | """ 38 | if isinstance(value, six.binary_type): 39 | value = value.decode("utf-8") 40 | 41 | if not any(ch in value for ch in '"\\\r\n'): 42 | result = u'%s="%s"' % (name, value) 43 | try: 44 | result.encode("ascii") 45 | except (UnicodeEncodeError, UnicodeDecodeError): 46 | pass 47 | else: 48 | return result 49 | 50 | if six.PY2: # Python 2: 51 | value = value.encode("utf-8") 52 | 53 | # encode_rfc2231 accepts an encoded string and returns an ascii-encoded 54 | # string in Python 2 but accepts and returns unicode strings in Python 3 55 | value = email.utils.encode_rfc2231(value, "utf-8") 56 | value = "%s*=%s" % (name, value) 57 | 58 | if six.PY2: # Python 2: 59 | value = value.decode("utf-8") 60 | 61 | return value 62 | 63 | 64 | _HTML5_REPLACEMENTS = { 65 | u"\u0022": u"%22", 66 | # Replace "\" with "\\". 67 | u"\u005C": u"\u005C\u005C", 68 | u"\u005C": u"\u005C\u005C", 69 | } 70 | 71 | # All control characters from 0x00 to 0x1F *except* 0x1B. 72 | _HTML5_REPLACEMENTS.update( 73 | { 74 | six.unichr(cc): u"%{:02X}".format(cc) 75 | for cc in range(0x00, 0x1F + 1) 76 | if cc not in (0x1B,) 77 | } 78 | ) 79 | 80 | 81 | def _replace_multiple(value, needles_and_replacements): 82 | def replacer(match): 83 | return needles_and_replacements[match.group(0)] 84 | 85 | pattern = re.compile( 86 | r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()]) 87 | ) 88 | 89 | result = pattern.sub(replacer, value) 90 | 91 | return result 92 | 93 | 94 | def format_header_param_html5(name, value): 95 | """ 96 | Helper function to format and quote a single header parameter using the 97 | HTML5 strategy. 98 | 99 | Particularly useful for header parameters which might contain 100 | non-ASCII values, like file names. This follows the `HTML5 Working Draft 101 | Section 4.10.22.7`_ and matches the behavior of curl and modern browsers. 102 | 103 | .. _HTML5 Working Draft Section 4.10.22.7: 104 | https://w3c.github.io/html/sec-forms.html#multipart-form-data 105 | 106 | :param name: 107 | The name of the parameter, a string expected to be ASCII only. 108 | :param value: 109 | The value of the parameter, provided as ``bytes`` or `str``. 110 | :ret: 111 | A unicode string, stripped of troublesome characters. 112 | """ 113 | if isinstance(value, six.binary_type): 114 | value = value.decode("utf-8") 115 | 116 | value = _replace_multiple(value, _HTML5_REPLACEMENTS) 117 | 118 | return u'%s="%s"' % (name, value) 119 | 120 | 121 | # For backwards-compatibility. 122 | format_header_param = format_header_param_html5 123 | 124 | 125 | class RequestField(object): 126 | """ 127 | A data container for request body parameters. 128 | 129 | :param name: 130 | The name of this request field. Must be unicode. 131 | :param data: 132 | The data/value body. 133 | :param filename: 134 | An optional filename of the request field. Must be unicode. 135 | :param headers: 136 | An optional dict-like object of headers to initially use for the field. 137 | :param header_formatter: 138 | An optional callable that is used to encode and format the headers. By 139 | default, this is :func:`format_header_param_html5`. 140 | """ 141 | 142 | def __init__( 143 | self, 144 | name, 145 | data, 146 | filename=None, 147 | headers=None, 148 | header_formatter=format_header_param_html5, 149 | ): 150 | self._name = name 151 | self._filename = filename 152 | self.data = data 153 | self.headers = {} 154 | if headers: 155 | self.headers = dict(headers) 156 | self.header_formatter = header_formatter 157 | 158 | @classmethod 159 | def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5): 160 | """ 161 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 162 | 163 | Supports constructing :class:`~urllib3.fields.RequestField` from 164 | parameter of key/value strings AND key/filetuple. A filetuple is a 165 | (filename, data, MIME type) tuple where the MIME type is optional. 166 | For example:: 167 | 168 | 'foo': 'bar', 169 | 'fakefile': ('foofile.txt', 'contents of foofile'), 170 | 'realfile': ('barfile.txt', open('realfile').read()), 171 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 172 | 'nonamefile': 'contents of nonamefile field', 173 | 174 | Field names and filenames must be unicode. 175 | """ 176 | if isinstance(value, tuple): 177 | if len(value) == 3: 178 | filename, data, content_type = value 179 | else: 180 | filename, data = value 181 | content_type = guess_content_type(filename) 182 | else: 183 | filename = None 184 | content_type = None 185 | data = value 186 | 187 | request_param = cls( 188 | fieldname, data, filename=filename, header_formatter=header_formatter 189 | ) 190 | request_param.make_multipart(content_type=content_type) 191 | 192 | return request_param 193 | 194 | def _render_part(self, name, value): 195 | """ 196 | Overridable helper function to format a single header parameter. By 197 | default, this calls ``self.header_formatter``. 198 | 199 | :param name: 200 | The name of the parameter, a string expected to be ASCII only. 201 | :param value: 202 | The value of the parameter, provided as a unicode string. 203 | """ 204 | 205 | return self.header_formatter(name, value) 206 | 207 | def _render_parts(self, header_parts): 208 | """ 209 | Helper function to format and quote a single header. 210 | 211 | Useful for single headers that are composed of multiple items. E.g., 212 | 'Content-Disposition' fields. 213 | 214 | :param header_parts: 215 | A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format 216 | as `k1="v1"; k2="v2"; ...`. 217 | """ 218 | parts = [] 219 | iterable = header_parts 220 | if isinstance(header_parts, dict): 221 | iterable = header_parts.items() 222 | 223 | for name, value in iterable: 224 | if value is not None: 225 | parts.append(self._render_part(name, value)) 226 | 227 | return u"; ".join(parts) 228 | 229 | def render_headers(self): 230 | """ 231 | Renders the headers for this request field. 232 | """ 233 | lines = [] 234 | 235 | sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"] 236 | for sort_key in sort_keys: 237 | if self.headers.get(sort_key, False): 238 | lines.append(u"%s: %s" % (sort_key, self.headers[sort_key])) 239 | 240 | for header_name, header_value in self.headers.items(): 241 | if header_name not in sort_keys: 242 | if header_value: 243 | lines.append(u"%s: %s" % (header_name, header_value)) 244 | 245 | lines.append(u"\r\n") 246 | return u"\r\n".join(lines) 247 | 248 | def make_multipart( 249 | self, content_disposition=None, content_type=None, content_location=None 250 | ): 251 | """ 252 | Makes this request field into a multipart request field. 253 | 254 | This method overrides "Content-Disposition", "Content-Type" and 255 | "Content-Location" headers to the request parameter. 256 | 257 | :param content_type: 258 | The 'Content-Type' of the request body. 259 | :param content_location: 260 | The 'Content-Location' of the request body. 261 | 262 | """ 263 | self.headers["Content-Disposition"] = content_disposition or u"form-data" 264 | self.headers["Content-Disposition"] += u"; ".join( 265 | [ 266 | u"", 267 | self._render_parts( 268 | ((u"name", self._name), (u"filename", self._filename)) 269 | ), 270 | ] 271 | ) 272 | self.headers["Content-Type"] = content_type 273 | self.headers["Content-Location"] = content_location 274 | -------------------------------------------------------------------------------- /chardet/chardistribution.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, 29 | EUCTW_TYPICAL_DISTRIBUTION_RATIO) 30 | from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, 31 | EUCKR_TYPICAL_DISTRIBUTION_RATIO) 32 | from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, 33 | GB2312_TYPICAL_DISTRIBUTION_RATIO) 34 | from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, 35 | BIG5_TYPICAL_DISTRIBUTION_RATIO) 36 | from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, 37 | JIS_TYPICAL_DISTRIBUTION_RATIO) 38 | 39 | 40 | class CharDistributionAnalysis(object): 41 | ENOUGH_DATA_THRESHOLD = 1024 42 | SURE_YES = 0.99 43 | SURE_NO = 0.01 44 | MINIMUM_DATA_THRESHOLD = 3 45 | 46 | def __init__(self): 47 | # Mapping table to get frequency order from char order (get from 48 | # GetOrder()) 49 | self._char_to_freq_order = None 50 | self._table_size = None # Size of above table 51 | # This is a constant value which varies from language to language, 52 | # used in calculating confidence. See 53 | # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html 54 | # for further detail. 55 | self.typical_distribution_ratio = None 56 | self._done = None 57 | self._total_chars = None 58 | self._freq_chars = None 59 | self.reset() 60 | 61 | def reset(self): 62 | """reset analyser, clear any state""" 63 | # If this flag is set to True, detection is done and conclusion has 64 | # been made 65 | self._done = False 66 | self._total_chars = 0 # Total characters encountered 67 | # The number of characters whose frequency order is less than 512 68 | self._freq_chars = 0 69 | 70 | def feed(self, char, char_len): 71 | """feed a character with known length""" 72 | if char_len == 2: 73 | # we only care about 2-bytes character in our distribution analysis 74 | order = self.get_order(char) 75 | else: 76 | order = -1 77 | if order >= 0: 78 | self._total_chars += 1 79 | # order is valid 80 | if order < self._table_size: 81 | if 512 > self._char_to_freq_order[order]: 82 | self._freq_chars += 1 83 | 84 | def get_confidence(self): 85 | """return confidence based on existing data""" 86 | # if we didn't receive any character in our consideration range, 87 | # return negative answer 88 | if self._total_chars <= 0 or self._freq_chars <= self.MINIMUM_DATA_THRESHOLD: 89 | return self.SURE_NO 90 | 91 | if self._total_chars != self._freq_chars: 92 | r = (self._freq_chars / ((self._total_chars - self._freq_chars) 93 | * self.typical_distribution_ratio)) 94 | if r < self.SURE_YES: 95 | return r 96 | 97 | # normalize confidence (we don't want to be 100% sure) 98 | return self.SURE_YES 99 | 100 | def got_enough_data(self): 101 | # It is not necessary to receive all data to draw conclusion. 102 | # For charset detection, certain amount of data is enough 103 | return self._total_chars > self.ENOUGH_DATA_THRESHOLD 104 | 105 | def get_order(self, byte_str): 106 | # We do not handle characters based on the original encoding string, 107 | # but convert this encoding string to a number, here called order. 108 | # This allows multiple encodings of a language to share one frequency 109 | # table. 110 | return -1 111 | 112 | 113 | class EUCTWDistributionAnalysis(CharDistributionAnalysis): 114 | def __init__(self): 115 | super(EUCTWDistributionAnalysis, self).__init__() 116 | self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER 117 | self._table_size = EUCTW_TABLE_SIZE 118 | self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO 119 | 120 | def get_order(self, byte_str): 121 | # for euc-TW encoding, we are interested 122 | # first byte range: 0xc4 -- 0xfe 123 | # second byte range: 0xa1 -- 0xfe 124 | # no validation needed here. State machine has done that 125 | first_char = byte_str[0] 126 | if first_char >= 0xC4: 127 | return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1 128 | else: 129 | return -1 130 | 131 | 132 | class EUCKRDistributionAnalysis(CharDistributionAnalysis): 133 | def __init__(self): 134 | super(EUCKRDistributionAnalysis, self).__init__() 135 | self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER 136 | self._table_size = EUCKR_TABLE_SIZE 137 | self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO 138 | 139 | def get_order(self, byte_str): 140 | # for euc-KR encoding, we are interested 141 | # first byte range: 0xb0 -- 0xfe 142 | # second byte range: 0xa1 -- 0xfe 143 | # no validation needed here. State machine has done that 144 | first_char = byte_str[0] 145 | if first_char >= 0xB0: 146 | return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1 147 | else: 148 | return -1 149 | 150 | 151 | class GB2312DistributionAnalysis(CharDistributionAnalysis): 152 | def __init__(self): 153 | super(GB2312DistributionAnalysis, self).__init__() 154 | self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER 155 | self._table_size = GB2312_TABLE_SIZE 156 | self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO 157 | 158 | def get_order(self, byte_str): 159 | # for GB2312 encoding, we are interested 160 | # first byte range: 0xb0 -- 0xfe 161 | # second byte range: 0xa1 -- 0xfe 162 | # no validation needed here. State machine has done that 163 | first_char, second_char = byte_str[0], byte_str[1] 164 | if (first_char >= 0xB0) and (second_char >= 0xA1): 165 | return 94 * (first_char - 0xB0) + second_char - 0xA1 166 | else: 167 | return -1 168 | 169 | 170 | class Big5DistributionAnalysis(CharDistributionAnalysis): 171 | def __init__(self): 172 | super(Big5DistributionAnalysis, self).__init__() 173 | self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER 174 | self._table_size = BIG5_TABLE_SIZE 175 | self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO 176 | 177 | def get_order(self, byte_str): 178 | # for big5 encoding, we are interested 179 | # first byte range: 0xa4 -- 0xfe 180 | # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe 181 | # no validation needed here. State machine has done that 182 | first_char, second_char = byte_str[0], byte_str[1] 183 | if first_char >= 0xA4: 184 | if second_char >= 0xA1: 185 | return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 186 | else: 187 | return 157 * (first_char - 0xA4) + second_char - 0x40 188 | else: 189 | return -1 190 | 191 | 192 | class SJISDistributionAnalysis(CharDistributionAnalysis): 193 | def __init__(self): 194 | super(SJISDistributionAnalysis, self).__init__() 195 | self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER 196 | self._table_size = JIS_TABLE_SIZE 197 | self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO 198 | 199 | def get_order(self, byte_str): 200 | # for sjis encoding, we are interested 201 | # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe 202 | # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe 203 | # no validation needed here. State machine has done that 204 | first_char, second_char = byte_str[0], byte_str[1] 205 | if (first_char >= 0x81) and (first_char <= 0x9F): 206 | order = 188 * (first_char - 0x81) 207 | elif (first_char >= 0xE0) and (first_char <= 0xEF): 208 | order = 188 * (first_char - 0xE0 + 31) 209 | else: 210 | return -1 211 | order = order + second_char - 0x40 212 | if second_char > 0x7F: 213 | order = -1 214 | return order 215 | 216 | 217 | class EUCJPDistributionAnalysis(CharDistributionAnalysis): 218 | def __init__(self): 219 | super(EUCJPDistributionAnalysis, self).__init__() 220 | self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER 221 | self._table_size = JIS_TABLE_SIZE 222 | self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO 223 | 224 | def get_order(self, byte_str): 225 | # for euc-JP encoding, we are interested 226 | # first byte range: 0xa0 -- 0xfe 227 | # second byte range: 0xa1 -- 0xfe 228 | # no validation needed here. State machine has done that 229 | char = byte_str[0] 230 | if char >= 0xA0: 231 | return 94 * (char - 0xA1) + byte_str[1] - 0xa1 232 | else: 233 | return -1 234 | -------------------------------------------------------------------------------- /urllib3/util/timeout.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | # The default socket timeout, used by httplib to indicate that no timeout was 4 | # specified by the user 5 | from socket import _GLOBAL_DEFAULT_TIMEOUT 6 | import time 7 | 8 | from ..exceptions import TimeoutStateError 9 | 10 | # A sentinel value to indicate that no timeout was specified by the user in 11 | # urllib3 12 | _Default = object() 13 | 14 | 15 | # Use time.monotonic if available. 16 | current_time = getattr(time, "monotonic", time.time) 17 | 18 | 19 | class Timeout(object): 20 | """ Timeout configuration. 21 | 22 | Timeouts can be defined as a default for a pool:: 23 | 24 | timeout = Timeout(connect=2.0, read=7.0) 25 | http = PoolManager(timeout=timeout) 26 | response = http.request('GET', 'http://example.com/') 27 | 28 | Or per-request (which overrides the default for the pool):: 29 | 30 | response = http.request('GET', 'http://example.com/', timeout=Timeout(10)) 31 | 32 | Timeouts can be disabled by setting all the parameters to ``None``:: 33 | 34 | no_timeout = Timeout(connect=None, read=None) 35 | response = http.request('GET', 'http://example.com/, timeout=no_timeout) 36 | 37 | 38 | :param total: 39 | This combines the connect and read timeouts into one; the read timeout 40 | will be set to the time leftover from the connect attempt. In the 41 | event that both a connect timeout and a total are specified, or a read 42 | timeout and a total are specified, the shorter timeout will be applied. 43 | 44 | Defaults to None. 45 | 46 | :type total: integer, float, or None 47 | 48 | :param connect: 49 | The maximum amount of time (in seconds) to wait for a connection 50 | attempt to a server to succeed. Omitting the parameter will default the 51 | connect timeout to the system default, probably `the global default 52 | timeout in socket.py 53 | `_. 54 | None will set an infinite timeout for connection attempts. 55 | 56 | :type connect: integer, float, or None 57 | 58 | :param read: 59 | The maximum amount of time (in seconds) to wait between consecutive 60 | read operations for a response from the server. Omitting the parameter 61 | will default the read timeout to the system default, probably `the 62 | global default timeout in socket.py 63 | `_. 64 | None will set an infinite timeout. 65 | 66 | :type read: integer, float, or None 67 | 68 | .. note:: 69 | 70 | Many factors can affect the total amount of time for urllib3 to return 71 | an HTTP response. 72 | 73 | For example, Python's DNS resolver does not obey the timeout specified 74 | on the socket. Other factors that can affect total request time include 75 | high CPU load, high swap, the program running at a low priority level, 76 | or other behaviors. 77 | 78 | In addition, the read and total timeouts only measure the time between 79 | read operations on the socket connecting the client and the server, 80 | not the total amount of time for the request to return a complete 81 | response. For most requests, the timeout is raised because the server 82 | has not sent the first byte in the specified time. This is not always 83 | the case; if a server streams one byte every fifteen seconds, a timeout 84 | of 20 seconds will not trigger, even though the request will take 85 | several minutes to complete. 86 | 87 | If your goal is to cut off any request after a set amount of wall clock 88 | time, consider having a second "watcher" thread to cut off a slow 89 | request. 90 | """ 91 | 92 | #: A sentinel object representing the default timeout value 93 | DEFAULT_TIMEOUT = _GLOBAL_DEFAULT_TIMEOUT 94 | 95 | def __init__(self, total=None, connect=_Default, read=_Default): 96 | self._connect = self._validate_timeout(connect, "connect") 97 | self._read = self._validate_timeout(read, "read") 98 | self.total = self._validate_timeout(total, "total") 99 | self._start_connect = None 100 | 101 | def __repr__(self): 102 | return "%s(connect=%r, read=%r, total=%r)" % ( 103 | type(self).__name__, 104 | self._connect, 105 | self._read, 106 | self.total, 107 | ) 108 | 109 | # __str__ provided for backwards compatibility 110 | __str__ = __repr__ 111 | 112 | @classmethod 113 | def _validate_timeout(cls, value, name): 114 | """ Check that a timeout attribute is valid. 115 | 116 | :param value: The timeout value to validate 117 | :param name: The name of the timeout attribute to validate. This is 118 | used to specify in error messages. 119 | :return: The validated and casted version of the given value. 120 | :raises ValueError: If it is a numeric value less than or equal to 121 | zero, or the type is not an integer, float, or None. 122 | """ 123 | if value is _Default: 124 | return cls.DEFAULT_TIMEOUT 125 | 126 | if value is None or value is cls.DEFAULT_TIMEOUT: 127 | return value 128 | 129 | if isinstance(value, bool): 130 | raise ValueError( 131 | "Timeout cannot be a boolean value. It must " 132 | "be an int, float or None." 133 | ) 134 | try: 135 | float(value) 136 | except (TypeError, ValueError): 137 | raise ValueError( 138 | "Timeout value %s was %s, but it must be an " 139 | "int, float or None." % (name, value) 140 | ) 141 | 142 | try: 143 | if value <= 0: 144 | raise ValueError( 145 | "Attempted to set %s timeout to %s, but the " 146 | "timeout cannot be set to a value less " 147 | "than or equal to 0." % (name, value) 148 | ) 149 | except TypeError: 150 | # Python 3 151 | raise ValueError( 152 | "Timeout value %s was %s, but it must be an " 153 | "int, float or None." % (name, value) 154 | ) 155 | 156 | return value 157 | 158 | @classmethod 159 | def from_float(cls, timeout): 160 | """ Create a new Timeout from a legacy timeout value. 161 | 162 | The timeout value used by httplib.py sets the same timeout on the 163 | connect(), and recv() socket requests. This creates a :class:`Timeout` 164 | object that sets the individual timeouts to the ``timeout`` value 165 | passed to this function. 166 | 167 | :param timeout: The legacy timeout value. 168 | :type timeout: integer, float, sentinel default object, or None 169 | :return: Timeout object 170 | :rtype: :class:`Timeout` 171 | """ 172 | return Timeout(read=timeout, connect=timeout) 173 | 174 | def clone(self): 175 | """ Create a copy of the timeout object 176 | 177 | Timeout properties are stored per-pool but each request needs a fresh 178 | Timeout object to ensure each one has its own start/stop configured. 179 | 180 | :return: a copy of the timeout object 181 | :rtype: :class:`Timeout` 182 | """ 183 | # We can't use copy.deepcopy because that will also create a new object 184 | # for _GLOBAL_DEFAULT_TIMEOUT, which socket.py uses as a sentinel to 185 | # detect the user default. 186 | return Timeout(connect=self._connect, read=self._read, total=self.total) 187 | 188 | def start_connect(self): 189 | """ Start the timeout clock, used during a connect() attempt 190 | 191 | :raises urllib3.exceptions.TimeoutStateError: if you attempt 192 | to start a timer that has been started already. 193 | """ 194 | if self._start_connect is not None: 195 | raise TimeoutStateError("Timeout timer has already been started.") 196 | self._start_connect = current_time() 197 | return self._start_connect 198 | 199 | def get_connect_duration(self): 200 | """ Gets the time elapsed since the call to :meth:`start_connect`. 201 | 202 | :return: Elapsed time in seconds. 203 | :rtype: float 204 | :raises urllib3.exceptions.TimeoutStateError: if you attempt 205 | to get duration for a timer that hasn't been started. 206 | """ 207 | if self._start_connect is None: 208 | raise TimeoutStateError( 209 | "Can't get connect duration for timer that has not started." 210 | ) 211 | return current_time() - self._start_connect 212 | 213 | @property 214 | def connect_timeout(self): 215 | """ Get the value to use when setting a connection timeout. 216 | 217 | This will be a positive float or integer, the value None 218 | (never timeout), or the default system timeout. 219 | 220 | :return: Connect timeout. 221 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None 222 | """ 223 | if self.total is None: 224 | return self._connect 225 | 226 | if self._connect is None or self._connect is self.DEFAULT_TIMEOUT: 227 | return self.total 228 | 229 | return min(self._connect, self.total) 230 | 231 | @property 232 | def read_timeout(self): 233 | """ Get the value for the read timeout. 234 | 235 | This assumes some time has elapsed in the connection timeout and 236 | computes the read timeout appropriately. 237 | 238 | If self.total is set, the read timeout is dependent on the amount of 239 | time taken by the connect timeout. If the connection time has not been 240 | established, a :exc:`~urllib3.exceptions.TimeoutStateError` will be 241 | raised. 242 | 243 | :return: Value to use for the read timeout. 244 | :rtype: int, float, :attr:`Timeout.DEFAULT_TIMEOUT` or None 245 | :raises urllib3.exceptions.TimeoutStateError: If :meth:`start_connect` 246 | has not yet been called on this object. 247 | """ 248 | if ( 249 | self.total is not None 250 | and self.total is not self.DEFAULT_TIMEOUT 251 | and self._read is not None 252 | and self._read is not self.DEFAULT_TIMEOUT 253 | ): 254 | # In case the connect timeout has not yet been established. 255 | if self._start_connect is None: 256 | return self._read 257 | return max(0, min(self.total - self.get_connect_duration(), self._read)) 258 | elif self.total is not None and self.total is not self.DEFAULT_TIMEOUT: 259 | return max(0, self.total - self.get_connect_duration()) 260 | else: 261 | return self._read 262 | --------------------------------------------------------------------------------