├── db ├── 500_blacklist.txt ├── 400_blacklist.txt └── 403_blacklist.txt ├── lib ├── __init__.py ├── controller │ ├── __init__.py │ └── banner.txt ├── output │ └── __init__.py ├── connection │ ├── __init__.py │ ├── RequestException.py │ └── Response.py ├── utils │ ├── __init__.py │ ├── RandomUtils.py │ ├── DefaultConfigParser.py │ ├── TerminalSize.py │ └── FileUtils.py ├── core │ ├── __init__.py │ ├── Path.py │ ├── ReportManager.py │ ├── Scanner.py │ └── Dictionary.py └── reports │ ├── __init__.py │ ├── SimpleReport.py │ ├── PlainTextReport.py │ ├── JSONReport.py │ └── BaseReport.py ├── logs └── DO_NOT_DELETE_THIS_FOLDER.txt ├── reports └── DO_NOT_DELETE_THIS_FOLDER.txt ├── thirdparty ├── chardet │ ├── cli │ │ ├── __init__.py │ │ └── chardetect.py │ ├── version.py │ ├── compat.py │ ├── __init__.py │ ├── euctwprober.py │ ├── euckrprober.py │ ├── gb2312prober.py │ ├── big5prober.py │ ├── enums.py │ ├── cp949prober.py │ ├── mbcsgroupprober.py │ ├── utf8prober.py │ ├── mbcharsetprober.py │ ├── sbcsgroupprober.py │ ├── codingstatemachine.py │ ├── eucjpprober.py │ ├── sjisprober.py │ ├── charsetgroupprober.py │ ├── escprober.py │ └── charsetprober.py ├── requests │ ├── packages │ │ ├── urllib3 │ │ │ ├── contrib │ │ │ │ ├── __init__.py │ │ │ │ └── ntlmpool.py │ │ │ ├── packages │ │ │ │ ├── __init__.py │ │ │ │ └── ssl_match_hostname │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── _implementation.py │ │ │ ├── util │ │ │ │ ├── __init__.py │ │ │ │ ├── response.py │ │ │ │ ├── request.py │ │ │ │ └── connection.py │ │ │ ├── __init__.py │ │ │ ├── filepost.py │ │ │ └── exceptions.py │ │ ├── __init__.py │ │ ├── README.rst │ │ └── chardet │ │ │ ├── compat.py │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── euctwprober.py │ │ │ ├── euckrprober.py │ │ │ ├── gb2312prober.py │ │ │ ├── big5prober.py │ │ │ ├── cp949prober.py │ │ │ ├── charsetprober.py │ │ │ ├── mbcsgroupprober.py │ │ │ ├── codingstatemachine.py │ │ │ ├── chardetect.py │ │ │ ├── utf8prober.py │ │ │ ├── escprober.py │ │ │ ├── sbcsgroupprober.py │ │ │ ├── mbcharsetprober.py │ │ │ ├── eucjpprober.py │ │ │ ├── sjisprober.py │ │ │ ├── charsetgroupprober.py │ │ │ ├── sbcharsetprober.py │ │ │ └── latin1prober.py │ ├── certs.py │ ├── hooks.py │ ├── compat.py │ ├── __init__.py │ ├── exceptions.py │ ├── structures.py │ └── status_codes.py ├── __init__.py ├── sqlmap │ ├── __init__.py │ └── DynamicContentParser.py ├── oset │ ├── __init__.py │ ├── tests.py │ └── pyoset.py └── colorama │ ├── __init__.py │ ├── initialise.py │ └── ansi.py ├── .gitignore ├── default.conf ├── CHANGELOG.md ├── dirsearch.py └── README.md /db/500_blacklist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | pass 2 | -------------------------------------------------------------------------------- /logs/DO_NOT_DELETE_THIS_FOLDER.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /reports/DO_NOT_DELETE_THIS_FOLDER.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /thirdparty/chardet/cli/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/controller/__init__.py: -------------------------------------------------------------------------------- 1 | from .Controller import * -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /db/400_blacklist.txt: -------------------------------------------------------------------------------- 1 | % 2 | Nueva Carpeta 3 | New Folder 4 | -------------------------------------------------------------------------------- /thirdparty/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | pass 4 | -------------------------------------------------------------------------------- /lib/output/__init__.py: -------------------------------------------------------------------------------- 1 | from .CLIOutput import * 2 | 3 | pass 4 | -------------------------------------------------------------------------------- /thirdparty/sqlmap/__init__.py: -------------------------------------------------------------------------------- 1 | from .DynamicContentParser import * -------------------------------------------------------------------------------- /thirdparty/oset/__init__.py: -------------------------------------------------------------------------------- 1 | """Main Ordered Set module """ 2 | 3 | from .pyoset import oset 4 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import urllib3 4 | -------------------------------------------------------------------------------- /lib/connection/__init__.py: -------------------------------------------------------------------------------- 1 | from .RequestException import * 2 | from .Requester import * 3 | from .Response import * 4 | -------------------------------------------------------------------------------- /lib/controller/banner.txt: -------------------------------------------------------------------------------- 1 | 2 | _|. _ _ _ _ _ _|_ v{MAYOR_VERSION}.{MINOR_VERSION}.{REVISION} 3 | (_||| _) (/_(_|| (_| ) 4 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .FileUtils import * 2 | from .RandomUtils import * 3 | from .DefaultConfigParser import * 4 | pass 5 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .ArgumentParser import * 2 | from .Dictionary import * 3 | from .Fuzzer import * 4 | from .Path import * 5 | from .ReportManager import * 6 | -------------------------------------------------------------------------------- /lib/reports/__init__.py: -------------------------------------------------------------------------------- 1 | from .BaseReport import * 2 | from .JSONReport import * 3 | from .PlainTextReport import * 4 | from .SimpleReport import * 5 | 6 | pass 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | reports/ 2 | logs/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | .idea 10 | 11 | db/test.txt -------------------------------------------------------------------------------- /db/403_blacklist.txt: -------------------------------------------------------------------------------- 1 | .htaccess 2 | .htaccess.bak 3 | .htaccess.inc 4 | .htaccess/ 5 | .htpasswd.bak 6 | .htpasswd 7 | .htpasswd.inc 8 | .htpa55wd 9 | .htpasswd/ 10 | .htpasswrd 11 | 12 | -------------------------------------------------------------------------------- /thirdparty/colorama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | from .initialise import init, deinit, reinit, colorama_text 3 | from .ansi import Fore, Back, Style, Cursor 4 | from .ansitowin32 import AnsiToWin32 5 | 6 | __version__ = '0.3.3' 7 | 8 | -------------------------------------------------------------------------------- /thirdparty/chardet/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module exists only to simplify retrieving the version number of chardet 3 | from within setup.py and from chardet subpackages. 4 | 5 | :author: Dan Blanchard (dan.blanchard@gmail.com) 6 | """ 7 | 8 | __version__ = "3.0.4" 9 | VERSION = __version__.split('.') 10 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/README.rst: -------------------------------------------------------------------------------- 1 | If you are planning to submit a pull request to requests with any changes in 2 | this library do not go any further. These are independent libraries which we 3 | vendor into requests. Any changes necessary to these libraries must be made in 4 | them and submitted as separate pull requests to those libraries. 5 | 6 | urllib3 pull requests go here: https://github.com/shazow/urllib3 7 | 8 | chardet pull requests go here: https://github.com/chardet/chardet 9 | -------------------------------------------------------------------------------- /default.conf: -------------------------------------------------------------------------------- 1 | [general] 2 | #threads = 10 3 | #follow-redirects = False 4 | #exclude-status = 200,301 5 | #recursive = False 6 | #scanner-fail-path = InvalidPath123123 7 | #save-logs-home = True 8 | 9 | [reports] 10 | autosave-report = True 11 | autosave-report-format = plain 12 | 13 | [dictionary] 14 | #wordlist = test.dicc 15 | #lowercase = False 16 | 17 | [connection] 18 | #useragent = MyUserAgent 19 | #timeout = 30 20 | #max-retries = 5 21 | #http-proxy = localhost:8080 22 | #random-user-agents = True 23 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Python 3.2+ 3 | from ssl import CertificateError, match_hostname 4 | except ImportError: 5 | try: 6 | # Backport of the function from a pypi module 7 | from backports.ssl_match_hostname import CertificateError, match_hostname 8 | except ImportError: 9 | # Our vendored copy 10 | from ._implementation import CertificateError, match_hostname 11 | 12 | # Not needed, but documenting what we provide. 13 | __all__ = ('CertificateError', 'match_hostname') 14 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | # For backwards compatibility, provide imports that used to be here. 2 | from .connection import is_connection_dropped 3 | from .request import make_headers 4 | from .response import is_fp_closed 5 | from .ssl_ import ( 6 | SSLContext, 7 | HAS_SNI, 8 | assert_fingerprint, 9 | resolve_cert_reqs, 10 | resolve_ssl_version, 11 | ssl_wrap_socket, 12 | ) 13 | from .timeout import ( 14 | current_time, 15 | Timeout, 16 | ) 17 | 18 | from .retry import Retry 19 | from .url import ( 20 | get_host, 21 | parse_url, 22 | split_first, 23 | Url, 24 | ) 25 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | def is_fp_closed(obj): 2 | """ 3 | Checks whether a given file-like object is closed. 4 | 5 | :param obj: 6 | The file-like object to check. 7 | """ 8 | 9 | try: 10 | # Check via the official file-like-object way. 11 | return obj.closed 12 | except AttributeError: 13 | pass 14 | 15 | try: 16 | # Check if the object is a container for another file-like object that 17 | # gets released on exhaustion (e.g. HTTPResponse). 18 | return obj.fp is None 19 | except AttributeError: 20 | pass 21 | 22 | raise ValueError("Unable to determine whether fp is closed.") 23 | -------------------------------------------------------------------------------- /thirdparty/requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | certs.py 6 | ~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | import os.path 15 | 16 | try: 17 | from certifi import where 18 | except ImportError: 19 | def where(): 20 | """Return the preferred certificate bundle.""" 21 | # vendored bundle inside Requests 22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 23 | 24 | if __name__ == '__main__': 25 | print(where()) 26 | -------------------------------------------------------------------------------- /lib/connection/RequestException.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | 20 | class RequestException(Exception): 21 | pass 22 | -------------------------------------------------------------------------------- /thirdparty/requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | 14 | """ 15 | HOOKS = ['response'] 16 | 17 | def default_hooks(): 18 | return dict((event, []) for event in HOOKS) 19 | 20 | # TODO: response is the only one 21 | 22 | 23 | def dispatch_hook(key, hooks, hook_data, **kwargs): 24 | """Dispatches a hook dictionary on a given piece of data.""" 25 | hooks = hooks or dict() 26 | hooks = hooks.get(key) 27 | if hooks: 28 | if hasattr(hooks, '__call__'): 29 | hooks = [hooks] 30 | for hook in hooks: 31 | _hook_data = hook(hook_data, **kwargs) 32 | if _hook_data is not None: 33 | hook_data = _hook_data 34 | return hook_data 35 | -------------------------------------------------------------------------------- /thirdparty/oset/tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode:python; tab-width: 2; coding: utf-8 -*- 3 | 4 | """Partially backported python ABC classes""" 5 | 6 | 7 | 8 | import doctest 9 | import unittest 10 | 11 | optionflags = doctest.NORMALIZE_WHITESPACE | \ 12 | doctest.ELLIPSIS | \ 13 | doctest.REPORT_ONLY_FIRST_FAILURE 14 | 15 | TESTFILES = [ 16 | 'pyoset.txt', 17 | ] 18 | 19 | 20 | def test_suite(): 21 | """Simple tes suite""" 22 | 23 | globs = {} 24 | try: 25 | from pprint import pprint 26 | globs['pprint'] = pprint 27 | except Exception: 28 | pass 29 | try: 30 | from interlude import interact 31 | globs['interact'] = interact 32 | except Exception: 33 | pass 34 | 35 | return unittest.TestSuite([ 36 | doctest.DocFileSuite( 37 | file, 38 | optionflags=optionflags, 39 | globs=globs, 40 | ) for file in TESTFILES 41 | ]) 42 | 43 | if __name__ == '__main__': 44 | unittest.main(defaultTest='test_suite') 45 | -------------------------------------------------------------------------------- /lib/core/Path.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | 20 | class Path(object): 21 | def __init__(self, path=None, status=None, response=None): 22 | self.path = path 23 | self.status = status 24 | self.response = response 25 | 26 | def __str__(self): 27 | return self.path 28 | -------------------------------------------------------------------------------- /lib/utils/RandomUtils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import random 20 | import string 21 | 22 | 23 | class RandomUtils(object): 24 | @classmethod 25 | def randString(cls, n=12, omit=None): 26 | seq = string.ascii_lowercase + string.ascii_uppercase + string.digits 27 | if omit: 28 | seq = list(set(seq) - set(omit)) 29 | return ''.join(random.choice(seq) for _ in range(n)) -------------------------------------------------------------------------------- /lib/reports/SimpleReport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | from lib.reports import * 20 | 21 | 22 | class SimpleReport(BaseReport): 23 | def generate(self): 24 | result = '' 25 | 26 | for path, _, _ in self.pathList: 27 | result += '{0}://{1}:{2}/'.format(self.protocol, self.host, self.port) 28 | result += ('{0}\n'.format(path) if self.basePath is '' else '{0}/{1}\n'.format(self.basePath, path)) 29 | 30 | return result 31 | -------------------------------------------------------------------------------- /thirdparty/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Dan Blanchard 4 | # Ian Cordasco 5 | # 6 | # This library is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU Lesser General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2.1 of the License, or (at your option) any later version. 10 | # 11 | # This library is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public 17 | # License along with this library; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 19 | # 02110-1301 USA 20 | ######################### END LICENSE BLOCK ######################### 21 | 22 | import sys 23 | 24 | 25 | if sys.version_info < (3, 0): 26 | PY2 = True 27 | PY3 = False 28 | base_str = (str, unicode) 29 | text_type = unicode 30 | else: 31 | PY2 = False 32 | PY3 = True 33 | base_str = (bytes, str) 34 | text_type = str 35 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | - 0.3.8 - 2017.07.25 Delay argument added. Request by hostname switch added. Suppress empty switch added. Added Force Extensions switch. Multiple fixes. 4 | - 0.3.7 - 2016.08.22 Force extensions switch added. 5 | - 0.3.6 - 2016.02.14 Bugfixes 6 | - 0.3.5 - 2016.01.29 Improved heuristic, replaced urllib3 for requests, error logs, batch reports, user agent randomization, bugfixes 7 | - 0.3.0 - 2015.02.05 Fixed issue3, fixed timeout exception, ported to Python3, other bugfixes 8 | - 0.2.7 - 2014.11.21 Added Url List feature (-L). Changed output. Minor Fixes 9 | - 0.2.6 - 2014.9.12 Fixed bug when dictionary size is greater than threads count. Fixed URL encoding bug (issue2). 10 | - 0.2.5 - 2014.9.2 Shows Content-Length in output and reports, added default.conf file (for setting defaults) and report auto save feature added. 11 | - 0.2.4 - 2014.7.17 Added Windows support, --scan-subdir|--scan-subdirs argument added, --exclude-subdir|--exclude-subdirs added, --header argument added, dirbuster dictionaries added, fixed some concurrency bugs, MVC refactoring 12 | - 0.2.3 - 2014.7.7 Fixed some bugs, minor refactorings, exclude status switch, "pause/next directory" feature, changed help structure, expaded default dictionary 13 | - 0.2.2 - 2014.7.2 Fixed some bugs, showing percentage of tested paths and added report generation feature 14 | - 0.2.1 - 2014.5.1 Fixed some bugs and added recursive option 15 | - 0.2.0 - 2014.1.31 Initial public release 16 | -------------------------------------------------------------------------------- /lib/reports/PlainTextReport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | from lib.reports import * 20 | from lib.utils.FileUtils import * 21 | 22 | 23 | class PlainTextReport(BaseReport): 24 | 25 | def generate(self): 26 | result = '' 27 | 28 | for path, status, contentLength in self.pathList: 29 | result += '{0} '.format(status) 30 | result += '{0} '.format(FileUtils.sizeHuman(contentLength).rjust(6, ' ')) 31 | result += '{0}://{1}:{2}/'.format(self.protocol, self.host, self.port) 32 | result += ('{0}\n'.format(path) if self.basePath is '' else '{0}/{1}\n'.format(self.basePath, path)) 33 | 34 | return result 35 | -------------------------------------------------------------------------------- /dirsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # This program is free software; you can redistribute it and/or modify 4 | # it under the terms of the GNU General Public License as published by 5 | # the Free Software Foundation; either version 2 of the License, or 6 | # (at your option) any later version. 7 | # 8 | # This program is distributed in the hope that it will be useful, 9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | # GNU General Public License for more details. 12 | # 13 | # You should have received a copy of the GNU General Public License 14 | # along with this program; if not, write to the Free Software 15 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 16 | # MA 02110-1301, USA. 17 | # 18 | # Author: Mauro Soria 19 | 20 | import sys 21 | 22 | if sys.version_info < (3, 0): 23 | sys.stdout.write("Sorry, dirsearch requires Python 3.x\n") 24 | sys.exit(1) 25 | 26 | from lib.core import ArgumentParser 27 | from lib.controller import * 28 | from lib.output import * 29 | 30 | 31 | class Program(object): 32 | def __init__(self): 33 | self.script_path = (os.path.dirname(os.path.realpath(__file__))) 34 | self.arguments = ArgumentParser(self.script_path) 35 | self.output = CLIOutput() 36 | self.controller = Controller(self.script_path, self.arguments, self.output) 37 | 38 | 39 | if __name__ == '__main__': 40 | main = Program() 41 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /lib/core/ReportManager.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import threading 20 | 21 | 22 | class ReportManager(object): 23 | 24 | def __init__(self): 25 | self.outputs = [] 26 | self.lock = threading.Lock() 27 | 28 | def addOutput(self, output): 29 | self.outputs.append(output) 30 | 31 | def addPath(self, path, status, response): 32 | with self.lock: 33 | for output in self.outputs: 34 | output.addPath(path, status, response) 35 | 36 | def save(self): 37 | with self.lock: 38 | for output in self.outputs: 39 | output.save() 40 | 41 | def close(self): 42 | for output in self.outputs: 43 | output.close() 44 | -------------------------------------------------------------------------------- /thirdparty/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | 19 | from .compat import PY2, PY3 20 | from .universaldetector import UniversalDetector 21 | from .version import __version__, VERSION 22 | 23 | 24 | def detect(byte_str): 25 | """ 26 | Detect the encoding of the given byte string. 27 | 28 | :param byte_str: The byte sequence to examine. 29 | :type byte_str: ``bytes`` or ``bytearray`` 30 | """ 31 | if not isinstance(byte_str, bytearray): 32 | if not isinstance(byte_str, bytes): 33 | raise TypeError('Expected object of type bytes or bytearray, got: ' 34 | '{0}'.format(type(byte_str))) 35 | else: 36 | byte_str = bytearray(byte_str) 37 | detector = UniversalDetector() 38 | detector.feed(byte_str) 39 | return detector.close() 40 | -------------------------------------------------------------------------------- /thirdparty/requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | pythoncompat 5 | """ 6 | 7 | from .packages import chardet 8 | 9 | import sys 10 | 11 | # ------- 12 | # Pythons 13 | # ------- 14 | 15 | # Syntax sugar. 16 | _ver = sys.version_info 17 | 18 | #: Python 2.x? 19 | is_py2 = (_ver[0] == 2) 20 | 21 | #: Python 3.x? 22 | is_py3 = (_ver[0] == 3) 23 | 24 | try: 25 | import simplejson as json 26 | except (ImportError, SyntaxError): 27 | # simplejson does not support Python 3.2, it throws a SyntaxError 28 | # because of u'...' Unicode literals. 29 | import json 30 | 31 | # --------- 32 | # Specifics 33 | # --------- 34 | 35 | if is_py2: 36 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 37 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 38 | from urllib2 import parse_http_list 39 | import cookielib 40 | from Cookie import Morsel 41 | from StringIO import StringIO 42 | from .packages.urllib3.packages.ordered_dict import OrderedDict 43 | 44 | builtin_str = str 45 | bytes = str 46 | str = unicode 47 | basestring = basestring 48 | numeric_types = (int, long, float) 49 | 50 | elif is_py3: 51 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 52 | from urllib.request import parse_http_list, getproxies, proxy_bypass 53 | from http import cookiejar as cookielib 54 | from http.cookies import Morsel 55 | from io import StringIO 56 | from collections import OrderedDict 57 | 58 | builtin_str = str 59 | str = str 60 | bytes = bytes 61 | basestring = (str, bytes) 62 | numeric_types = (int, float) 63 | -------------------------------------------------------------------------------- /lib/reports/JSONReport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import json 20 | 21 | from lib.reports import * 22 | 23 | 24 | class JSONReport(BaseReport): 25 | 26 | def addPath(self, path, status, response): 27 | contentLength = None 28 | 29 | try: 30 | contentLength = int(response.headers['content-length']) 31 | 32 | except (KeyError, ValueError): 33 | contentLength = len(response.body) 34 | 35 | self.pathList.append((path, status, contentLength, response.redirect)) 36 | 37 | def generate(self): 38 | headerName = '{0}://{1}:{2}/{3}'.format(self.protocol, self.host, self.port, self.basePath) 39 | result = {headerName: []} 40 | 41 | for path, status, contentLength, redirect in self.pathList: 42 | entry = {'status': status, 'path': path, 'content-length': contentLength, 'redirect': redirect} 43 | result[headerName].append(entry) 44 | 45 | return json.dumps(result, sort_keys=True, indent=4) 46 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /thirdparty/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTW_SM_MODEL 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(EUCTWProber, self).__init__() 36 | self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) 37 | self.distribution_analyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "EUC-TW" 43 | 44 | @property 45 | def language(self): 46 | return "Taiwan" 47 | -------------------------------------------------------------------------------- /thirdparty/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKR_SM_MODEL 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(EUCKRProber, self).__init__() 37 | self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) 38 | self.distribution_analyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "EUC-KR" 44 | 45 | @property 46 | def language(self): 47 | return "Korean" 48 | -------------------------------------------------------------------------------- /thirdparty/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312_SM_MODEL 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(GB2312Prober, self).__init__() 36 | self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) 37 | self.distribution_analyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "GB2312" 43 | 44 | @property 45 | def language(self): 46 | return "Chinese" 47 | -------------------------------------------------------------------------------- /thirdparty/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import BIG5_SM_MODEL 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(Big5Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) 38 | self.distribution_analyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "Big5" 44 | 45 | @property 46 | def language(self): 47 | return "Chinese" 48 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /thirdparty/chardet/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | All of the Enums that are used throughout the chardet package. 3 | 4 | :author: Dan Blanchard (dan.blanchard@gmail.com) 5 | """ 6 | 7 | 8 | class InputState(object): 9 | """ 10 | This enum represents the different states a universal detector can be in. 11 | """ 12 | PURE_ASCII = 0 13 | ESC_ASCII = 1 14 | HIGH_BYTE = 2 15 | 16 | 17 | class LanguageFilter(object): 18 | """ 19 | This enum represents the different language filters we can apply to a 20 | ``UniversalDetector``. 21 | """ 22 | CHINESE_SIMPLIFIED = 0x01 23 | CHINESE_TRADITIONAL = 0x02 24 | JAPANESE = 0x04 25 | KOREAN = 0x08 26 | NON_CJK = 0x10 27 | ALL = 0x1F 28 | CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 29 | CJK = CHINESE | JAPANESE | KOREAN 30 | 31 | 32 | class ProbingState(object): 33 | """ 34 | This enum represents the different states a prober can be in. 35 | """ 36 | DETECTING = 0 37 | FOUND_IT = 1 38 | NOT_ME = 2 39 | 40 | 41 | class MachineState(object): 42 | """ 43 | This enum represents the different states a state machine can be in. 44 | """ 45 | START = 0 46 | ERROR = 1 47 | ITS_ME = 2 48 | 49 | 50 | class SequenceLikelihood(object): 51 | """ 52 | This enum represents the likelihood of a character following the previous one. 53 | """ 54 | NEGATIVE = 0 55 | UNLIKELY = 1 56 | LIKELY = 2 57 | POSITIVE = 3 58 | 59 | @classmethod 60 | def get_num_categories(cls): 61 | """:returns: The number of likelihood categories in the enum.""" 62 | return 4 63 | 64 | 65 | class CharacterCategory(object): 66 | """ 67 | This enum represents the different categories language models for 68 | ``SingleByteCharsetProber`` put characters into. 69 | 70 | Anything less than CONTROL is considered a letter. 71 | """ 72 | UNDEFINED = 255 73 | LINE_BREAK = 254 74 | SYMBOL = 253 75 | DIGIT = 252 76 | CONTROL = 251 77 | -------------------------------------------------------------------------------- /thirdparty/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .chardistribution import EUCKRDistributionAnalysis 29 | from .codingstatemachine import CodingStateMachine 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .mbcssm import CP949_SM_MODEL 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(CP949Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(CP949_SM_MODEL) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self.distribution_analyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | @property 44 | def charset_name(self): 45 | return "CP949" 46 | 47 | @property 48 | def language(self): 49 | return "Korean" 50 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /thirdparty/colorama/initialise.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | import atexit 3 | import contextlib 4 | import sys 5 | 6 | from .ansitowin32 import AnsiToWin32 7 | 8 | 9 | orig_stdout = None 10 | orig_stderr = None 11 | 12 | wrapped_stdout = None 13 | wrapped_stderr = None 14 | 15 | atexit_done = False 16 | 17 | 18 | def reset_all(): 19 | AnsiToWin32(orig_stdout).reset_all() 20 | 21 | 22 | def init(autoreset=False, convert=None, strip=None, wrap=True): 23 | 24 | if not wrap and any([autoreset, convert, strip]): 25 | raise ValueError('wrap=False conflicts with any other arg=True') 26 | 27 | global wrapped_stdout, wrapped_stderr 28 | global orig_stdout, orig_stderr 29 | 30 | orig_stdout = sys.stdout 31 | orig_stderr = sys.stderr 32 | 33 | if sys.stdout is None: 34 | wrapped_stdout = None 35 | else: 36 | sys.stdout = wrapped_stdout = \ 37 | wrap_stream(orig_stdout, convert, strip, autoreset, wrap) 38 | if sys.stderr is None: 39 | wrapped_stderr = None 40 | else: 41 | sys.stderr = wrapped_stderr = \ 42 | wrap_stream(orig_stderr, convert, strip, autoreset, wrap) 43 | 44 | global atexit_done 45 | if not atexit_done: 46 | atexit.register(reset_all) 47 | atexit_done = True 48 | 49 | 50 | def deinit(): 51 | if orig_stdout is not None: 52 | sys.stdout = orig_stdout 53 | if orig_stderr is not None: 54 | sys.stderr = orig_stderr 55 | 56 | 57 | @contextlib.contextmanager 58 | def colorama_text(*args, **kwargs): 59 | init(*args, **kwargs) 60 | try: 61 | yield 62 | finally: 63 | deinit() 64 | 65 | 66 | def reinit(): 67 | if wrapped_stdout is not None: 68 | sys.stdout = wrapped_stdout 69 | if wrapped_stderr is not None: 70 | sys.stderr = wrapped_stderr 71 | 72 | 73 | def wrap_stream(stream, convert, strip, autoreset, wrap): 74 | if wrap: 75 | wrapper = AnsiToWin32(stream, 76 | convert=convert, strip=strip, autoreset=autoreset) 77 | if wrapper.should_wrap(): 78 | stream = wrapper.stream 79 | return stream 80 | 81 | 82 | -------------------------------------------------------------------------------- /thirdparty/requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | Requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('http://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2015 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | 42 | """ 43 | 44 | __title__ = 'requests' 45 | __version__ = '2.7.0' 46 | __build__ = 0x020700 47 | __author__ = 'Kenneth Reitz' 48 | __license__ = 'Apache 2.0' 49 | __copyright__ = 'Copyright 2015 Kenneth Reitz' 50 | 51 | # Attempt to enable urllib3's SNI support, if possible 52 | try: 53 | from .packages.urllib3.contrib import pyopenssl 54 | pyopenssl.inject_into_urllib3() 55 | except ImportError: 56 | pass 57 | 58 | from . import utils 59 | from .models import Request, Response, PreparedRequest 60 | from .api import request, get, head, post, patch, put, delete, options 61 | from .sessions import session, Session 62 | from .status_codes import codes 63 | from .exceptions import ( 64 | RequestException, Timeout, URLRequired, 65 | TooManyRedirects, HTTPError, ConnectionError 66 | ) 67 | 68 | # Set default logging handler to avoid "No handler found" warnings. 69 | import logging 70 | try: # Python 2.7+ 71 | from logging import NullHandler 72 | except ImportError: 73 | class NullHandler(logging.Handler): 74 | def emit(self, record): 75 | pass 76 | 77 | logging.getLogger(__name__).addHandler(NullHandler()) 78 | -------------------------------------------------------------------------------- /lib/connection/Response.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | 20 | class Response(object): 21 | 22 | def __init__(self, status, reason, headers, body): 23 | self.status = status 24 | self.reason = reason 25 | self.headers = headers 26 | self.body = body 27 | 28 | def __str__(self): 29 | return self.body 30 | 31 | def __int__(self): 32 | return self.status 33 | 34 | def __eq__(self, other): 35 | return self.status == other.status and self.body == other.body 36 | 37 | def __cmp__(self, other): 38 | return (self.body > other) - (self.body < other) 39 | 40 | def __len__(self): 41 | return len(self.body) 42 | 43 | def __hash__(self): 44 | return hash(self.body) 45 | 46 | def __del__(self): 47 | del self.body 48 | del self.headers 49 | del self.status 50 | del self.reason 51 | 52 | @property 53 | def redirect(self): 54 | headers = dict((key.lower(), value) for key, value in self.headers.items()) 55 | return headers.get("location") 56 | 57 | @property 58 | def pretty(self): 59 | try: 60 | from BeautifulSoup import BeautifulSoup 61 | except ImportError: 62 | raise Exception('BeautifulSoup must be installed to get pretty HTML =(') 63 | html = BeautifulSoup(self.body) 64 | return html.prettify() 65 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /thirdparty/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self, lang_filter=None): 43 | super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) 44 | self.probers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 6 | __license__ = 'MIT' 7 | __version__ = '1.10.4' 8 | 9 | 10 | from .connectionpool import ( 11 | HTTPConnectionPool, 12 | HTTPSConnectionPool, 13 | connection_from_url 14 | ) 15 | 16 | from . import exceptions 17 | from .filepost import encode_multipart_formdata 18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 19 | from .response import HTTPResponse 20 | from .util.request import make_headers 21 | from .util.url import get_host 22 | from .util.timeout import Timeout 23 | from .util.retry import Retry 24 | 25 | 26 | # Set default logging handler to avoid "No handler found" warnings. 27 | import logging 28 | try: # Python 2.7+ 29 | from logging import NullHandler 30 | except ImportError: 31 | class NullHandler(logging.Handler): 32 | def emit(self, record): 33 | pass 34 | 35 | logging.getLogger(__name__).addHandler(NullHandler()) 36 | 37 | def add_stderr_logger(level=logging.DEBUG): 38 | """ 39 | Helper for quickly adding a StreamHandler to the logger. Useful for 40 | debugging. 41 | 42 | Returns the handler after adding it. 43 | """ 44 | # This method needs to be in this __init__.py to get the __name__ correct 45 | # even if urllib3 is vendored within another package. 46 | logger = logging.getLogger(__name__) 47 | handler = logging.StreamHandler() 48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 49 | logger.addHandler(handler) 50 | logger.setLevel(level) 51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__) 52 | return handler 53 | 54 | # ... Clean up. 55 | del NullHandler 56 | 57 | 58 | import warnings 59 | # SecurityWarning's always go off by default. 60 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True) 61 | # InsecurePlatformWarning's don't vary between requests, so we keep it default. 62 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning, 63 | append=True) 64 | 65 | def disable_warnings(category=exceptions.HTTPWarning): 66 | """ 67 | Helper for quickly disabling all urllib3 warnings. 68 | """ 69 | warnings.simplefilter('ignore', category) 70 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | from ..packages.six import b 4 | 5 | ACCEPT_ENCODING = 'gzip,deflate' 6 | 7 | 8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 10 | """ 11 | Shortcuts for generating request headers. 12 | 13 | :param keep_alive: 14 | If ``True``, adds 'connection: keep-alive' header. 15 | 16 | :param accept_encoding: 17 | Can be a boolean, list, or string. 18 | ``True`` translates to 'gzip,deflate'. 19 | List will get joined by comma. 20 | String will be used as provided. 21 | 22 | :param user_agent: 23 | String representing the user-agent you want, such as 24 | "python-urllib3/0.6" 25 | 26 | :param basic_auth: 27 | Colon-separated username:password string for 'authorization: basic ...' 28 | auth header. 29 | 30 | :param proxy_basic_auth: 31 | Colon-separated username:password string for 'proxy-authorization: basic ...' 32 | auth header. 33 | 34 | :param disable_cache: 35 | If ``True``, adds 'cache-control: no-cache' header. 36 | 37 | Example:: 38 | 39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 41 | >>> make_headers(accept_encoding=True) 42 | {'accept-encoding': 'gzip,deflate'} 43 | """ 44 | headers = {} 45 | if accept_encoding: 46 | if isinstance(accept_encoding, str): 47 | pass 48 | elif isinstance(accept_encoding, list): 49 | accept_encoding = ','.join(accept_encoding) 50 | else: 51 | accept_encoding = ACCEPT_ENCODING 52 | headers['accept-encoding'] = accept_encoding 53 | 54 | if user_agent: 55 | headers['user-agent'] = user_agent 56 | 57 | if keep_alive: 58 | headers['connection'] = 'keep-alive' 59 | 60 | if basic_auth: 61 | headers['authorization'] = 'Basic ' + \ 62 | b64encode(b(basic_auth)).decode('utf-8') 63 | 64 | if proxy_basic_auth: 65 | headers['proxy-authorization'] = 'Basic ' + \ 66 | b64encode(b(proxy_basic_auth)).decode('utf-8') 67 | 68 | if disable_cache: 69 | headers['cache-control'] = 'no-cache' 70 | 71 | return headers 72 | -------------------------------------------------------------------------------- /thirdparty/oset/pyoset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- mode:python; tab-width: 2; coding: utf-8 -*- 3 | 4 | """Partially backported python ABC classes""" 5 | 6 | 7 | 8 | try: 9 | from collections import MutableSet 10 | except ImportError: 11 | # Running in Python <= 2.5 12 | from ._abc import MutableSet 13 | 14 | 15 | KEY, PREV, NEXT = list(range(3)) 16 | 17 | 18 | class OrderedSet(MutableSet): 19 | 20 | def __init__(self, iterable=None): 21 | self.end = end = [] 22 | end += [None, end, end] # sentinel node for doubly linked list 23 | self.map = {} # key --> [key, prev, next] 24 | if iterable is not None: 25 | self |= iterable 26 | 27 | def __len__(self): 28 | return len(self.map) 29 | 30 | def __contains__(self, key): 31 | return key in self.map 32 | 33 | def __getitem__(self, key): 34 | return list(self)[key] 35 | 36 | def add(self, key): 37 | if key not in self.map: 38 | end = self.end 39 | curr = end[PREV] 40 | curr[NEXT] = end[PREV] = self.map[key] = [key, curr, end] 41 | 42 | def discard(self, key): 43 | if key in self.map: 44 | key, prev, next = self.map.pop(key) 45 | prev[NEXT] = next 46 | next[PREV] = prev 47 | 48 | def __iter__(self): 49 | end = self.end 50 | curr = end[NEXT] 51 | while curr is not end: 52 | yield curr[KEY] 53 | curr = curr[NEXT] 54 | 55 | def __reversed__(self): 56 | end = self.end 57 | curr = end[PREV] 58 | while curr is not end: 59 | yield curr[KEY] 60 | curr = curr[PREV] 61 | 62 | def pop(self, last=True): 63 | if not self: 64 | raise KeyError('set is empty') 65 | key = next(reversed(self)) if last else next(iter(self)) 66 | self.discard(key) 67 | return key 68 | 69 | def __repr__(self): 70 | if not self: 71 | return '%s()' % (self.__class__.__name__,) 72 | return '%s(%r)' % (self.__class__.__name__, list(self)) 73 | 74 | def __eq__(self, other): 75 | if isinstance(other, OrderedSet): 76 | return len(self) == len(other) and list(self) == list(other) 77 | return set(self) == set(other) 78 | 79 | def __del__(self): 80 | self.clear() # remove circular references 81 | 82 | oset = OrderedSet 83 | -------------------------------------------------------------------------------- /lib/reports/BaseReport.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | class BaseReport(object): 20 | 21 | def __init__(self, host, port, protocol, basePath, output): 22 | self.output = output 23 | self.port = port 24 | self.host = host 25 | self.protocol = protocol 26 | self.basePath = basePath 27 | 28 | if self.basePath.endswith('/'): 29 | self.basePath = self.basePath[:-1] 30 | 31 | if self.basePath.startswith('/'): 32 | self.basePath = self.basePath[1:] 33 | 34 | self.pathList = [] 35 | self.open() 36 | 37 | def addPath(self, path, status, response): 38 | contentLength = None 39 | 40 | try: 41 | contentLength = int(response.headers['content-length']) 42 | 43 | except (KeyError, ValueError): 44 | contentLength = len(response.body) 45 | 46 | self.pathList.append((path, status, contentLength)) 47 | 48 | def open(self): 49 | from os import name as os_name 50 | 51 | if os_name == "nt": 52 | from os.path import normpath, dirname 53 | from os import makedirs 54 | 55 | output = normpath(self.output) 56 | makedirs(dirname(output), exist_ok=True) 57 | 58 | self.output = output 59 | 60 | self.file = open(self.output, 'w+') 61 | 62 | def save(self): 63 | self.file.seek(0) 64 | self.file.truncate(0) 65 | self.file.flush() 66 | self.file.writelines(self.generate()) 67 | self.file.flush() 68 | 69 | def close(self): 70 | self.file.close() 71 | 72 | def generate(self): 73 | raise NotImplementedError 74 | -------------------------------------------------------------------------------- /lib/utils/DefaultConfigParser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import configparser 20 | 21 | class DefaultConfigParser(configparser.ConfigParser): 22 | def __init__(self): 23 | configparser.ConfigParser.__init__(self) 24 | 25 | 26 | def safe_get(self, section, option, default, allowed=None): 27 | try: 28 | result = configparser.ConfigParser.get(self, section, option) 29 | if allowed is not None: 30 | return result if result in allowed else default 31 | else: 32 | return result 33 | except (configparser.NoSectionError, configparser.NoOptionError): 34 | return default 35 | 36 | def safe_getfloat(self, section, option, default, allowed=None): 37 | try: 38 | result = configparser.ConfigParser.getfloat(self, section, option) 39 | if allowed is not None: 40 | return result if result in allowed else default 41 | else: 42 | return result 43 | except (configparser.NoSectionError, configparser.NoOptionError): 44 | return default 45 | 46 | def safe_getboolean(self, section, option, default, allowed=None): 47 | try: 48 | result = configparser.ConfigParser.getboolean(self, section, option) 49 | if allowed is not None: 50 | return result if result in allowed else default 51 | else: 52 | return result 53 | except (configparser.NoSectionError, configparser.NoOptionError): 54 | return default 55 | 56 | def safe_getint(self, section, option, default, allowed=None): 57 | try: 58 | result = configparser.ConfigParser.getint(self, section, option) 59 | if allowed is not None: 60 | return result if result in allowed else default 61 | else: 62 | return result 63 | except (configparser.NoSectionError, configparser.NoOptionError): 64 | return default -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | from uuid import uuid4 4 | from io import BytesIO 5 | 6 | from .packages import six 7 | from .packages.six import b 8 | from .fields import RequestField 9 | 10 | writer = codecs.lookup('utf-8')[3] 11 | 12 | 13 | def choose_boundary(): 14 | """ 15 | Our embarassingly-simple replacement for mimetools.choose_boundary. 16 | """ 17 | return uuid4().hex 18 | 19 | 20 | def iter_field_objects(fields): 21 | """ 22 | Iterate over fields. 23 | 24 | Supports list of (k, v) tuples and dicts, and lists of 25 | :class:`~urllib3.fields.RequestField`. 26 | 27 | """ 28 | if isinstance(fields, dict): 29 | i = six.iteritems(fields) 30 | else: 31 | i = iter(fields) 32 | 33 | for field in i: 34 | if isinstance(field, RequestField): 35 | yield field 36 | else: 37 | yield RequestField.from_tuples(*field) 38 | 39 | 40 | def iter_fields(fields): 41 | """ 42 | .. deprecated:: 1.6 43 | 44 | Iterate over fields. 45 | 46 | The addition of :class:`~urllib3.fields.RequestField` makes this function 47 | obsolete. Instead, use :func:`iter_field_objects`, which returns 48 | :class:`~urllib3.fields.RequestField` objects. 49 | 50 | Supports list of (k, v) tuples and dicts. 51 | """ 52 | if isinstance(fields, dict): 53 | return ((k, v) for k, v in six.iteritems(fields)) 54 | 55 | return ((k, v) for k, v in fields) 56 | 57 | 58 | def encode_multipart_formdata(fields, boundary=None): 59 | """ 60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 61 | 62 | :param fields: 63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 64 | 65 | :param boundary: 66 | If not specified, then a random boundary will be generated using 67 | :func:`mimetools.choose_boundary`. 68 | """ 69 | body = BytesIO() 70 | if boundary is None: 71 | boundary = choose_boundary() 72 | 73 | for field in iter_field_objects(fields): 74 | body.write(b('--%s\r\n' % (boundary))) 75 | 76 | writer(body).write(field.render_headers()) 77 | data = field.data 78 | 79 | if isinstance(data, int): 80 | data = str(data) # Backwards compatibility 81 | 82 | if isinstance(data, six.text_type): 83 | writer(body).write(data) 84 | else: 85 | body.write(data) 86 | 87 | body.write(b'\r\n') 88 | 89 | body.write(b('--%s--\r\n' % (boundary))) 90 | 91 | content_type = str('multipart/form-data; boundary=%s' % boundary) 92 | 93 | return body.getvalue(), content_type 94 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /thirdparty/requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | 9 | """ 10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError 11 | 12 | 13 | class RequestException(IOError): 14 | """There was an ambiguous exception that occurred while handling your 15 | request.""" 16 | 17 | def __init__(self, *args, **kwargs): 18 | """ 19 | Initialize RequestException with `request` and `response` objects. 20 | """ 21 | response = kwargs.pop('response', None) 22 | self.response = response 23 | self.request = kwargs.pop('request', None) 24 | if (response is not None and not self.request and 25 | hasattr(response, 'request')): 26 | self.request = self.response.request 27 | super(RequestException, self).__init__(*args, **kwargs) 28 | 29 | 30 | class HTTPError(RequestException): 31 | """An HTTP error occurred.""" 32 | 33 | 34 | class ConnectionError(RequestException): 35 | """A Connection error occurred.""" 36 | 37 | 38 | class ProxyError(ConnectionError): 39 | """A proxy error occurred.""" 40 | 41 | 42 | class SSLError(ConnectionError): 43 | """An SSL error occurred.""" 44 | 45 | 46 | class Timeout(RequestException): 47 | """The request timed out. 48 | 49 | Catching this error will catch both 50 | :exc:`~requests.exceptions.ConnectTimeout` and 51 | :exc:`~requests.exceptions.ReadTimeout` errors. 52 | """ 53 | 54 | 55 | class ConnectTimeout(ConnectionError, Timeout): 56 | """The request timed out while trying to connect to the remote server. 57 | 58 | Requests that produced this error are safe to retry. 59 | """ 60 | 61 | 62 | class ReadTimeout(Timeout): 63 | """The server did not send any data in the allotted amount of time.""" 64 | 65 | 66 | class URLRequired(RequestException): 67 | """A valid URL is required to make a request.""" 68 | 69 | 70 | class TooManyRedirects(RequestException): 71 | """Too many redirects.""" 72 | 73 | 74 | class MissingSchema(RequestException, ValueError): 75 | """The URL schema (e.g. http or https) is missing.""" 76 | 77 | 78 | class InvalidSchema(RequestException, ValueError): 79 | """See defaults.py for valid schemas.""" 80 | 81 | 82 | class InvalidURL(RequestException, ValueError): 83 | """ The URL provided was somehow invalid. """ 84 | 85 | 86 | class ChunkedEncodingError(RequestException): 87 | """The server declared chunked encoding but sent an invalid chunk.""" 88 | 89 | 90 | class ContentDecodingError(RequestException, BaseHTTPError): 91 | """Failed to decode response content""" 92 | 93 | 94 | class StreamConsumedError(RequestException, TypeError): 95 | """The content for this response was already consumed""" 96 | 97 | 98 | class RetryError(RequestException): 99 | """Custom retries logic failed""" 100 | -------------------------------------------------------------------------------- /thirdparty/colorama/ansi.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | ''' 3 | This module generates ANSI character codes to printing colors to terminals. 4 | See: http://en.wikipedia.org/wiki/ANSI_escape_code 5 | ''' 6 | 7 | CSI = '\033[' 8 | OSC = '\033]' 9 | BEL = '\007' 10 | 11 | 12 | def code_to_chars(code): 13 | return CSI + str(code) + 'm' 14 | 15 | def set_title(title): 16 | return OSC + '2;' + title + BEL 17 | 18 | def clear_screen(mode=2): 19 | return CSI + str(mode) + 'J' 20 | 21 | def clear_line(mode=2): 22 | return CSI + str(mode) + 'K' 23 | 24 | 25 | class AnsiCodes(object): 26 | def __init__(self): 27 | # the subclasses declare class attributes which are numbers. 28 | # Upon instantiation we define instance attributes, which are the same 29 | # as the class attributes but wrapped with the ANSI escape sequence 30 | for name in dir(self): 31 | if not name.startswith('_'): 32 | value = getattr(self, name) 33 | setattr(self, name, code_to_chars(value)) 34 | 35 | 36 | class AnsiCursor(object): 37 | def UP(self, n=1): 38 | return CSI + str(n) + 'A' 39 | def DOWN(self, n=1): 40 | return CSI + str(n) + 'B' 41 | def FORWARD(self, n=1): 42 | return CSI + str(n) + 'C' 43 | def BACK(self, n=1): 44 | return CSI + str(n) + 'D' 45 | def POS(self, x=1, y=1): 46 | return CSI + str(y) + ';' + str(x) + 'H' 47 | 48 | 49 | class AnsiFore(AnsiCodes): 50 | BLACK = 30 51 | RED = 31 52 | GREEN = 32 53 | YELLOW = 33 54 | BLUE = 34 55 | MAGENTA = 35 56 | CYAN = 36 57 | WHITE = 37 58 | RESET = 39 59 | 60 | # These are fairly well supported, but not part of the standard. 61 | LIGHTBLACK_EX = 90 62 | LIGHTRED_EX = 91 63 | LIGHTGREEN_EX = 92 64 | LIGHTYELLOW_EX = 93 65 | LIGHTBLUE_EX = 94 66 | LIGHTMAGENTA_EX = 95 67 | LIGHTCYAN_EX = 96 68 | LIGHTWHITE_EX = 97 69 | 70 | 71 | class AnsiBack(AnsiCodes): 72 | BLACK = 40 73 | RED = 41 74 | GREEN = 42 75 | YELLOW = 43 76 | BLUE = 44 77 | MAGENTA = 45 78 | CYAN = 46 79 | WHITE = 47 80 | RESET = 49 81 | 82 | # These are fairly well supported, but not part of the standard. 83 | LIGHTBLACK_EX = 100 84 | LIGHTRED_EX = 101 85 | LIGHTGREEN_EX = 102 86 | LIGHTYELLOW_EX = 103 87 | LIGHTBLUE_EX = 104 88 | LIGHTMAGENTA_EX = 105 89 | LIGHTCYAN_EX = 106 90 | LIGHTWHITE_EX = 107 91 | 92 | 93 | class AnsiStyle(AnsiCodes): 94 | BRIGHT = 1 95 | DIM = 2 96 | NORMAL = 22 97 | RESET_ALL = 0 98 | 99 | Fore = AnsiFore() 100 | Back = AnsiBack() 101 | Style = AnsiStyle() 102 | Cursor = AnsiCursor() 103 | -------------------------------------------------------------------------------- /thirdparty/chardet/cli/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | 21 | from chardet import __version__ 22 | from chardet.compat import PY2 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | line = bytearray(line) 39 | u.feed(line) 40 | # shortcut out of the loop to save reading further - particularly useful if we read a BOM. 41 | if u.done: 42 | break 43 | u.close() 44 | result = u.result 45 | if PY2: 46 | name = name.decode(sys.getfilesystemencoding(), 'ignore') 47 | if result['encoding']: 48 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 49 | result['confidence']) 50 | else: 51 | return '{0}: no result'.format(name) 52 | 53 | 54 | def main(argv=None): 55 | """ 56 | Handles command line arguments and gets things started. 57 | 58 | :param argv: List of arguments, as if specified on the command-line. 59 | If None, ``sys.argv[1:]`` is used instead. 60 | :type argv: list of str 61 | """ 62 | # Get command line arguments 63 | parser = argparse.ArgumentParser( 64 | description="Takes one or more file paths and reports their detected \ 65 | encodings") 66 | parser.add_argument('input', 67 | help='File whose encoding we would like to determine. \ 68 | (default: stdin)', 69 | type=argparse.FileType('rb'), nargs='*', 70 | default=[sys.stdin if PY2 else sys.stdin.buffer]) 71 | parser.add_argument('--version', action='version', 72 | version='%(prog)s {0}'.format(__version__)) 73 | args = parser.parse_args(argv) 74 | 75 | for f in args.input: 76 | if f.isatty(): 77 | print("You are running chardetect interactively. Press " + 78 | "CTRL-D twice at the start of a blank line to signal the " + 79 | "end of your input. If you want help, run chardetect " + 80 | "--help\n", file=sys.stderr) 81 | print(description_of(f, f.name)) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /thirdparty/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .enums import ProbingState, MachineState 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8_SM_MODEL 32 | 33 | 34 | 35 | class UTF8Prober(CharSetProber): 36 | ONE_CHAR_PROB = 0.5 37 | 38 | def __init__(self): 39 | super(UTF8Prober, self).__init__() 40 | self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) 41 | self._num_mb_chars = None 42 | self.reset() 43 | 44 | def reset(self): 45 | super(UTF8Prober, self).reset() 46 | self.coding_sm.reset() 47 | self._num_mb_chars = 0 48 | 49 | @property 50 | def charset_name(self): 51 | return "utf-8" 52 | 53 | @property 54 | def language(self): 55 | return "" 56 | 57 | def feed(self, byte_str): 58 | for c in byte_str: 59 | coding_state = self.coding_sm.next_state(c) 60 | if coding_state == MachineState.ERROR: 61 | self._state = ProbingState.NOT_ME 62 | break 63 | elif coding_state == MachineState.ITS_ME: 64 | self._state = ProbingState.FOUND_IT 65 | break 66 | elif coding_state == MachineState.START: 67 | if self.coding_sm.get_current_charlen() >= 2: 68 | self._num_mb_chars += 1 69 | 70 | if self.state == ProbingState.DETECTING: 71 | if self.get_confidence() > self.SHORTCUT_THRESHOLD: 72 | self._state = ProbingState.FOUND_IT 73 | 74 | return self.state 75 | 76 | def get_confidence(self): 77 | unlike = 0.99 78 | if self._num_mb_chars < 6: 79 | unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars 80 | return 1.0 - unlike 81 | else: 82 | return unlike 83 | -------------------------------------------------------------------------------- /thirdparty/requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | 9 | """ 10 | 11 | import collections 12 | 13 | 14 | class CaseInsensitiveDict(collections.MutableMapping): 15 | """ 16 | A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | 41 | """ 42 | def __init__(self, data=None, **kwargs): 43 | self._store = dict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, collections.Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | class LookupDict(dict): 89 | """Dictionary lookup object.""" 90 | 91 | def __init__(self, name=None): 92 | self.name = name 93 | super(LookupDict, self).__init__() 94 | 95 | def __repr__(self): 96 | return '' % (self.name) 97 | 98 | def __getitem__(self, key): 99 | # We allow fall-through here, so values default to None 100 | 101 | return self.__dict__.get(key, None) 102 | 103 | def get(self, key, default=None): 104 | return self.__dict__.get(key, default) 105 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /thirdparty/requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('permanent_redirect', 34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 35 | 36 | # Client Error. 37 | 400: ('bad_request', 'bad'), 38 | 401: ('unauthorized',), 39 | 402: ('payment_required', 'payment'), 40 | 403: ('forbidden',), 41 | 404: ('not_found', '-o-'), 42 | 405: ('method_not_allowed', 'not_allowed'), 43 | 406: ('not_acceptable',), 44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 45 | 408: ('request_timeout', 'timeout'), 46 | 409: ('conflict',), 47 | 410: ('gone',), 48 | 411: ('length_required',), 49 | 412: ('precondition_failed', 'precondition'), 50 | 413: ('request_entity_too_large',), 51 | 414: ('request_uri_too_large',), 52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 54 | 417: ('expectation_failed',), 55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 56 | 422: ('unprocessable_entity', 'unprocessable'), 57 | 423: ('locked',), 58 | 424: ('failed_dependency', 'dependency'), 59 | 425: ('unordered_collection', 'unordered'), 60 | 426: ('upgrade_required', 'upgrade'), 61 | 428: ('precondition_required', 'precondition'), 62 | 429: ('too_many_requests', 'too_many'), 63 | 431: ('header_fields_too_large', 'fields_too_large'), 64 | 444: ('no_response', 'none'), 65 | 449: ('retry_with', 'retry'), 66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 68 | 499: ('client_closed_request',), 69 | 70 | # Server Error. 71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 72 | 501: ('not_implemented',), 73 | 502: ('bad_gateway',), 74 | 503: ('service_unavailable', 'unavailable'), 75 | 504: ('gateway_timeout',), 76 | 505: ('http_version_not_supported', 'http_version'), 77 | 506: ('variant_also_negotiates',), 78 | 507: ('insufficient_storage',), 79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 80 | 510: ('not_extended',), 81 | } 82 | 83 | codes = LookupDict(name='status_codes') 84 | 85 | for code, titles in _codes.items(): 86 | for title in titles: 87 | setattr(codes, title, code) 88 | if not title.startswith('\\'): 89 | setattr(codes, title.upper(), code) 90 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | import socket 2 | try: 3 | from select import poll, POLLIN 4 | except ImportError: # `poll` doesn't exist on OSX and other platforms 5 | poll = False 6 | try: 7 | from select import select 8 | except ImportError: # `select` doesn't exist on AppEngine. 9 | select = False 10 | 11 | 12 | def is_connection_dropped(conn): # Platform-specific 13 | """ 14 | Returns True if the connection is dropped and should be closed. 15 | 16 | :param conn: 17 | :class:`httplib.HTTPConnection` object. 18 | 19 | Note: For platforms like AppEngine, this will always return ``False`` to 20 | let the platform handle connection recycling transparently for us. 21 | """ 22 | sock = getattr(conn, 'sock', False) 23 | if sock is False: # Platform-specific: AppEngine 24 | return False 25 | if sock is None: # Connection already closed (such as by httplib). 26 | return True 27 | 28 | if not poll: 29 | if not select: # Platform-specific: AppEngine 30 | return False 31 | 32 | try: 33 | return select([sock], [], [], 0.0)[0] 34 | except socket.error: 35 | return True 36 | 37 | # This version is better on platforms that support it. 38 | p = poll() 39 | p.register(sock, POLLIN) 40 | for (fno, ev) in p.poll(0.0): 41 | if fno == sock.fileno(): 42 | # Either data is buffered (bad), or the connection is dropped. 43 | return True 44 | 45 | 46 | # This function is copied from socket.py in the Python 2.7 standard 47 | # library scanner suite. Added to its signature is only `socket_options`. 48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 49 | source_address=None, socket_options=None): 50 | """Connect to *address* and return the socket object. 51 | 52 | Convenience function. Connect to *address* (a 2-tuple ``(host, 53 | port)``) and return the socket object. Passing the optional 54 | *timeout* parameter will set the timeout on the socket instance 55 | before attempting to connect. If no *timeout* is supplied, the 56 | global default timeout setting returned by :func:`getdefaulttimeout` 57 | is used. If *source_address* is set it must be a tuple of (host, port) 58 | for the socket to bind as a source address before making the connection. 59 | An host of '' or port 0 tells the OS to use the default. 60 | """ 61 | 62 | host, port = address 63 | err = None 64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 65 | af, socktype, proto, canonname, sa = res 66 | sock = None 67 | try: 68 | sock = socket.socket(af, socktype, proto) 69 | 70 | # If provided, set socket level options before connecting. 71 | # This is the only addition urllib3 makes to this function. 72 | _set_socket_options(sock, socket_options) 73 | 74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 75 | sock.settimeout(timeout) 76 | if source_address: 77 | sock.bind(source_address) 78 | sock.connect(sa) 79 | return sock 80 | 81 | except socket.error as _: 82 | err = _ 83 | if sock is not None: 84 | sock.close() 85 | sock = None 86 | 87 | if err is not None: 88 | raise err 89 | else: 90 | raise socket.error("getaddrinfo returns an empty list") 91 | 92 | 93 | def _set_socket_options(sock, options): 94 | if options is None: 95 | return 96 | 97 | for opt in options: 98 | sock.setsockopt(*opt) 99 | -------------------------------------------------------------------------------- /thirdparty/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetprober import CharSetProber 31 | from .enums import ProbingState, MachineState 32 | 33 | 34 | class MultiByteCharSetProber(CharSetProber): 35 | """ 36 | MultiByteCharSetProber 37 | """ 38 | 39 | def __init__(self, lang_filter=None): 40 | super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) 41 | self.distribution_analyzer = None 42 | self.coding_sm = None 43 | self._last_char = [0, 0] 44 | 45 | def reset(self): 46 | super(MultiByteCharSetProber, self).reset() 47 | if self.coding_sm: 48 | self.coding_sm.reset() 49 | if self.distribution_analyzer: 50 | self.distribution_analyzer.reset() 51 | self._last_char = [0, 0] 52 | 53 | @property 54 | def charset_name(self): 55 | raise NotImplementedError 56 | 57 | @property 58 | def language(self): 59 | raise NotImplementedError 60 | 61 | def feed(self, byte_str): 62 | for i in range(len(byte_str)): 63 | coding_state = self.coding_sm.next_state(byte_str[i]) 64 | if coding_state == MachineState.ERROR: 65 | self.logger.debug('%s %s prober hit error at byte %s', 66 | self.charset_name, self.language, i) 67 | self._state = ProbingState.NOT_ME 68 | break 69 | elif coding_state == MachineState.ITS_ME: 70 | self._state = ProbingState.FOUND_IT 71 | break 72 | elif coding_state == MachineState.START: 73 | char_len = self.coding_sm.get_current_charlen() 74 | if i == 0: 75 | self._last_char[1] = byte_str[0] 76 | self.distribution_analyzer.feed(self._last_char, char_len) 77 | else: 78 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 79 | char_len) 80 | 81 | self._last_char[0] = byte_str[-1] 82 | 83 | if self.state == ProbingState.DETECTING: 84 | if (self.distribution_analyzer.got_enough_data() and 85 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 86 | self._state = ProbingState.FOUND_IT 87 | 88 | return self.state 89 | 90 | def get_confidence(self): 91 | return self.distribution_analyzer.get_confidence() 92 | -------------------------------------------------------------------------------- /thirdparty/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | from .langturkishmodel import Latin5TurkishModel 41 | 42 | 43 | class SBCSGroupProber(CharSetGroupProber): 44 | def __init__(self): 45 | super(SBCSGroupProber, self).__init__() 46 | self.probers = [ 47 | SingleByteCharSetProber(Win1251CyrillicModel), 48 | SingleByteCharSetProber(Koi8rModel), 49 | SingleByteCharSetProber(Latin5CyrillicModel), 50 | SingleByteCharSetProber(MacCyrillicModel), 51 | SingleByteCharSetProber(Ibm866Model), 52 | SingleByteCharSetProber(Ibm855Model), 53 | SingleByteCharSetProber(Latin7GreekModel), 54 | SingleByteCharSetProber(Win1253GreekModel), 55 | SingleByteCharSetProber(Latin5BulgarianModel), 56 | SingleByteCharSetProber(Win1251BulgarianModel), 57 | # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) 58 | # after we retrain model. 59 | # SingleByteCharSetProber(Latin2HungarianModel), 60 | # SingleByteCharSetProber(Win1250HungarianModel), 61 | SingleByteCharSetProber(TIS620ThaiModel), 62 | SingleByteCharSetProber(Latin5TurkishModel), 63 | ] 64 | hebrew_prober = HebrewProber() 65 | logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, 66 | False, hebrew_prober) 67 | visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, 68 | hebrew_prober) 69 | hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) 70 | self.probers.extend([hebrew_prober, logical_hebrew_prober, 71 | visual_hebrew_prober]) 72 | 73 | self.reset() 74 | -------------------------------------------------------------------------------- /thirdparty/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import logging 29 | 30 | from .enums import MachineState 31 | 32 | 33 | class CodingStateMachine(object): 34 | """ 35 | A state machine to verify a byte sequence for a particular encoding. For 36 | each byte the detector receives, it will feed that byte to every active 37 | state machine available, one byte at a time. The state machine changes its 38 | state based on its previous state and the byte it receives. There are 3 39 | states in a state machine that are of interest to an auto-detector: 40 | 41 | START state: This is the state to start with, or a legal byte sequence 42 | (i.e. a valid code point) for character has been identified. 43 | 44 | ME state: This indicates that the state machine identified a byte sequence 45 | that is specific to the charset it is designed for and that 46 | there is no other possible encoding which can contain this byte 47 | sequence. This will to lead to an immediate positive answer for 48 | the detector. 49 | 50 | ERROR state: This indicates the state machine identified an illegal byte 51 | sequence for that encoding. This will lead to an immediate 52 | negative answer for this encoding. Detector will exclude this 53 | encoding from consideration from here on. 54 | """ 55 | def __init__(self, sm): 56 | self._model = sm 57 | self._curr_byte_pos = 0 58 | self._curr_char_len = 0 59 | self._curr_state = None 60 | self.logger = logging.getLogger(__name__) 61 | self.reset() 62 | 63 | def reset(self): 64 | self._curr_state = MachineState.START 65 | 66 | def next_state(self, c): 67 | # for each byte we get its class 68 | # if it is first byte, we also get byte length 69 | byte_class = self._model['class_table'][c] 70 | if self._curr_state == MachineState.START: 71 | self._curr_byte_pos = 0 72 | self._curr_char_len = self._model['char_len_table'][byte_class] 73 | # from byte's class and state_table, we get its next state 74 | curr_state = (self._curr_state * self._model['class_factor'] 75 | + byte_class) 76 | self._curr_state = self._model['state_table'][curr_state] 77 | self._curr_byte_pos += 1 78 | return self._curr_state 79 | 80 | def get_current_charlen(self): 81 | return self._curr_char_len 82 | 83 | def get_coding_state_machine(self): 84 | return self._model['name'] 85 | 86 | @property 87 | def language(self): 88 | return self._model['language'] 89 | -------------------------------------------------------------------------------- /lib/utils/TerminalSize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import os 20 | import shlex 21 | import struct 22 | import platform 23 | import subprocess 24 | 25 | 26 | def get_terminal_size(): 27 | """ getTerminalSize() 28 | - get width and height of console 29 | - works on linux,os x,windows,cygwin(windows) 30 | originally retrieved from: 31 | http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python 32 | """ 33 | current_os = platform.system() 34 | tuple_xy = None 35 | if current_os == 'Windows': 36 | tuple_xy = _get_terminal_size_windows() 37 | if tuple_xy is None: 38 | tuple_xy = _get_terminal_size_tput() 39 | # needed for window's python in cygwin's xterm! 40 | if current_os in ['Linux', 'Darwin', 'FreeBSD'] or current_os.startswith('CYGWIN'): 41 | tuple_xy = _get_terminal_size_linux() 42 | if tuple_xy is None: 43 | tuple_xy = (80, 25) # default value 44 | return tuple_xy 45 | 46 | 47 | def _get_terminal_size_windows(): 48 | try: 49 | from ctypes import windll, create_string_buffer 50 | # stdin handle is -10 51 | # stdout handle is -11 52 | # stderr handle is -12 53 | h = windll.kernel32.GetStdHandle(-12) 54 | csbi = create_string_buffer(22) 55 | res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi) 56 | if res: 57 | (bufx, bufy, curx, cury, wattr, 58 | left, top, right, bottom, 59 | maxx, maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) 60 | sizex = right - left + 1 61 | sizey = bottom - top + 1 62 | return sizex, sizey 63 | except: 64 | pass 65 | 66 | 67 | def _get_terminal_size_tput(): 68 | # get terminal width 69 | # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width-height-of-a-terminal-window 70 | try: 71 | cols = int(subprocess.check_call(shlex.split('tput cols'))) 72 | rows = int(subprocess.check_call(shlex.split('tput lines'))) 73 | return (cols, rows) 74 | except: 75 | pass 76 | 77 | 78 | def _get_terminal_size_linux(): 79 | def ioctl_GWINSZ(fd): 80 | try: 81 | import fcntl 82 | import termios 83 | cr = struct.unpack('hh', 84 | fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) 85 | return cr 86 | except: 87 | pass 88 | cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) 89 | if not cr: 90 | try: 91 | fd = os.open(os.ctermid(), os.O_RDONLY) 92 | cr = ioctl_GWINSZ(fd) 93 | os.close(fd) 94 | except: 95 | pass 96 | if not cr: 97 | try: 98 | cr = (os.environ['LINES'], os.environ['COLUMNS']) 99 | except: 100 | return None 101 | return int(cr[1]), int(cr[0]) 102 | 103 | if __name__ == "__main__": 104 | sizex, sizey = get_terminal_size() 105 | print('width =', sizex, 'height =', sizey) 106 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /thirdparty/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState, MachineState 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import EUCJPDistributionAnalysis 32 | from .jpcntx import EUCJPContextAnalysis 33 | from .mbcssm import EUCJP_SM_MODEL 34 | 35 | 36 | class EUCJPProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(EUCJPProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) 40 | self.distribution_analyzer = EUCJPDistributionAnalysis() 41 | self.context_analyzer = EUCJPContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(EUCJPProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return "EUC-JP" 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte 59 | coding_state = self.coding_sm.next_state(byte_str[i]) 60 | if coding_state == MachineState.ERROR: 61 | self.logger.debug('%s %s prober hit error at byte %s', 62 | self.charset_name, self.language, i) 63 | self._state = ProbingState.NOT_ME 64 | break 65 | elif coding_state == MachineState.ITS_ME: 66 | self._state = ProbingState.FOUND_IT 67 | break 68 | elif coding_state == MachineState.START: 69 | char_len = self.coding_sm.get_current_charlen() 70 | if i == 0: 71 | self._last_char[1] = byte_str[0] 72 | self.context_analyzer.feed(self._last_char, char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i - 1:i + 1], 76 | char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import SJISDistributionAnalysis 31 | from .jpcntx import SJISContextAnalysis 32 | from .mbcssm import SJIS_SM_MODEL 33 | from .enums import ProbingState, MachineState 34 | 35 | 36 | class SJISProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(SJISProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) 40 | self.distribution_analyzer = SJISDistributionAnalysis() 41 | self.context_analyzer = SJISContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(SJISProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return self.context_analyzer.charset_name 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | coding_state = self.coding_sm.next_state(byte_str[i]) 59 | if coding_state == MachineState.ERROR: 60 | self.logger.debug('%s %s prober hit error at byte %s', 61 | self.charset_name, self.language, i) 62 | self._state = ProbingState.NOT_ME 63 | break 64 | elif coding_state == MachineState.ITS_ME: 65 | self._state = ProbingState.FOUND_IT 66 | break 67 | elif coding_state == MachineState.START: 68 | char_len = self.coding_sm.get_current_charlen() 69 | if i == 0: 70 | self._last_char[1] = byte_str[0] 71 | self.context_analyzer.feed(self._last_char[2 - char_len:], 72 | char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 76 | - char_len], char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return self._mContextAnalyzer.get_charset_name() 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /thirdparty/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState 29 | from .charsetprober import CharSetProber 30 | 31 | 32 | class CharSetGroupProber(CharSetProber): 33 | def __init__(self, lang_filter=None): 34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) 35 | self._active_num = 0 36 | self.probers = [] 37 | self._best_guess_prober = None 38 | 39 | def reset(self): 40 | super(CharSetGroupProber, self).reset() 41 | self._active_num = 0 42 | for prober in self.probers: 43 | if prober: 44 | prober.reset() 45 | prober.active = True 46 | self._active_num += 1 47 | self._best_guess_prober = None 48 | 49 | @property 50 | def charset_name(self): 51 | if not self._best_guess_prober: 52 | self.get_confidence() 53 | if not self._best_guess_prober: 54 | return None 55 | return self._best_guess_prober.charset_name 56 | 57 | @property 58 | def language(self): 59 | if not self._best_guess_prober: 60 | self.get_confidence() 61 | if not self._best_guess_prober: 62 | return None 63 | return self._best_guess_prober.language 64 | 65 | def feed(self, byte_str): 66 | for prober in self.probers: 67 | if not prober: 68 | continue 69 | if not prober.active: 70 | continue 71 | state = prober.feed(byte_str) 72 | if not state: 73 | continue 74 | if state == ProbingState.FOUND_IT: 75 | self._best_guess_prober = prober 76 | return self.state 77 | elif state == ProbingState.NOT_ME: 78 | prober.active = False 79 | self._active_num -= 1 80 | if self._active_num <= 0: 81 | self._state = ProbingState.NOT_ME 82 | return self.state 83 | return self.state 84 | 85 | def get_confidence(self): 86 | state = self.state 87 | if state == ProbingState.FOUND_IT: 88 | return 0.99 89 | elif state == ProbingState.NOT_ME: 90 | return 0.01 91 | best_conf = 0.0 92 | self._best_guess_prober = None 93 | for prober in self.probers: 94 | if not prober: 95 | continue 96 | if not prober.active: 97 | self.logger.debug('%s not active', prober.charset_name) 98 | continue 99 | conf = prober.get_confidence() 100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) 101 | if best_conf < conf: 102 | best_conf = conf 103 | self._best_guess_prober = prober 104 | if not self._best_guess_prober: 105 | return 0.0 106 | return best_conf 107 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | 8 | __version__ = '3.4.0.2' 9 | 10 | class CertificateError(ValueError): 11 | pass 12 | 13 | 14 | def _dnsname_match(dn, hostname, max_wildcards=1): 15 | """Matching according to RFC 6125, section 6.4.3 16 | 17 | http://tools.ietf.org/html/rfc6125#section-6.4.3 18 | """ 19 | pats = [] 20 | if not dn: 21 | return False 22 | 23 | # Ported from python3-syntax: 24 | # leftmost, *remainder = dn.split(r'.') 25 | parts = dn.split(r'.') 26 | leftmost = parts[0] 27 | remainder = parts[1:] 28 | 29 | wildcards = leftmost.count('*') 30 | if wildcards > max_wildcards: 31 | # Issue #17980: avoid denials of service by refusing more 32 | # than one wildcard per fragment. A survey of established 33 | # policy among SSL implementations showed it to be a 34 | # reasonable choice. 35 | raise CertificateError( 36 | "too many wildcards in certificate DNS name: " + repr(dn)) 37 | 38 | # speed up common case w/o wildcards 39 | if not wildcards: 40 | return dn.lower() == hostname.lower() 41 | 42 | # RFC 6125, section 6.4.3, subitem 1. 43 | # The client SHOULD NOT attempt to match a presented identifier in which 44 | # the wildcard character comprises a label other than the left-most label. 45 | if leftmost == '*': 46 | # When '*' is a fragment by itself, it matches a non-empty dotless 47 | # fragment. 48 | pats.append('[^.]+') 49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 50 | # RFC 6125, section 6.4.3, subitem 3. 51 | # The client SHOULD NOT attempt to match a presented identifier 52 | # where the wildcard character is embedded within an A-label or 53 | # U-label of an internationalized domain name. 54 | pats.append(re.escape(leftmost)) 55 | else: 56 | # Otherwise, '*' matches any dotless string, e.g. www* 57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 58 | 59 | # add the remaining fragments, ignore any wildcards 60 | for frag in remainder: 61 | pats.append(re.escape(frag)) 62 | 63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 64 | return pat.match(hostname) 65 | 66 | 67 | def match_hostname(cert, hostname): 68 | """Verify that *cert* (in decoded format as returned by 69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 70 | rules are followed, but IP addresses are not accepted for *hostname*. 71 | 72 | CertificateError is raised on failure. On success, the function 73 | returns nothing. 74 | """ 75 | if not cert: 76 | raise ValueError("empty or no certificate") 77 | dnsnames = [] 78 | san = cert.get('subjectAltName', ()) 79 | for key, value in san: 80 | if key == 'DNS': 81 | if _dnsname_match(value, hostname): 82 | return 83 | dnsnames.append(value) 84 | if not dnsnames: 85 | # The subject is only checked when there is no dNSName entry 86 | # in subjectAltName 87 | for sub in cert.get('subject', ()): 88 | for key, value in sub: 89 | # XXX according to RFC 2818, the most specific Common Name 90 | # must be used. 91 | if key == 'commonName': 92 | if _dnsname_match(value, hostname): 93 | return 94 | dnsnames.append(value) 95 | if len(dnsnames) > 1: 96 | raise CertificateError("hostname %r " 97 | "doesn't match either of %s" 98 | % (hostname, ', '.join(map(repr, dnsnames)))) 99 | elif len(dnsnames) == 1: 100 | raise CertificateError("hostname %r " 101 | "doesn't match %r" 102 | % (hostname, dnsnames[0])) 103 | else: 104 | raise CertificateError("no appropriate commonName or " 105 | "subjectAltName fields were found") 106 | -------------------------------------------------------------------------------- /thirdparty/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .enums import LanguageFilter, ProbingState, MachineState 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, 32 | ISO2022KR_SM_MODEL) 33 | 34 | 35 | class EscCharSetProber(CharSetProber): 36 | """ 37 | This CharSetProber uses a "code scheme" approach for detecting encodings, 38 | whereby easily recognizable escape or shift sequences are relied on to 39 | identify these encodings. 40 | """ 41 | 42 | def __init__(self, lang_filter=None): 43 | super(EscCharSetProber, self).__init__(lang_filter=lang_filter) 44 | self.coding_sm = [] 45 | if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: 46 | self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) 47 | self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) 48 | if self.lang_filter & LanguageFilter.JAPANESE: 49 | self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) 50 | if self.lang_filter & LanguageFilter.KOREAN: 51 | self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) 52 | self.active_sm_count = None 53 | self._detected_charset = None 54 | self._detected_language = None 55 | self._state = None 56 | self.reset() 57 | 58 | def reset(self): 59 | super(EscCharSetProber, self).reset() 60 | for coding_sm in self.coding_sm: 61 | if not coding_sm: 62 | continue 63 | coding_sm.active = True 64 | coding_sm.reset() 65 | self.active_sm_count = len(self.coding_sm) 66 | self._detected_charset = None 67 | self._detected_language = None 68 | 69 | @property 70 | def charset_name(self): 71 | return self._detected_charset 72 | 73 | @property 74 | def language(self): 75 | return self._detected_language 76 | 77 | def get_confidence(self): 78 | if self._detected_charset: 79 | return 0.99 80 | else: 81 | return 0.00 82 | 83 | def feed(self, byte_str): 84 | for c in byte_str: 85 | for coding_sm in self.coding_sm: 86 | if not coding_sm or not coding_sm.active: 87 | continue 88 | coding_state = coding_sm.next_state(c) 89 | if coding_state == MachineState.ERROR: 90 | coding_sm.active = False 91 | self.active_sm_count -= 1 92 | if self.active_sm_count <= 0: 93 | self._state = ProbingState.NOT_ME 94 | return self.state 95 | elif coding_state == MachineState.ITS_ME: 96 | self._state = ProbingState.FOUND_IT 97 | self._detected_charset = coding_sm.get_coding_state_machine() 98 | self._detected_language = coding_sm.language 99 | return self.state 100 | 101 | return self.state 102 | -------------------------------------------------------------------------------- /lib/utils/FileUtils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import os 20 | import os.path 21 | 22 | 23 | class File(object): 24 | def __init__(self, *pathComponents): 25 | self._path = FileUtils.buildPath(*pathComponents) 26 | self.content = None 27 | 28 | @property 29 | def path(self): 30 | return self._path 31 | 32 | @path.setter 33 | def path(self, value): 34 | raise NotImplemented 35 | 36 | def isValid(self): 37 | return FileUtils.isFile(self.path) 38 | 39 | def exists(self): 40 | return FileUtils.exists(self.path) 41 | 42 | def canRead(self): 43 | return FileUtils.canRead(self.path) 44 | 45 | def canWrite(self): 46 | return FileUtils.canWrite(self.path) 47 | 48 | def read(self): 49 | return FileUtils.read(self.path) 50 | 51 | def update(self): 52 | self.content = self.read() 53 | 54 | def content(self): 55 | if not self.content: 56 | self.content = FileUtils.read() 57 | return self.content() 58 | 59 | def getLines(self): 60 | for line in FileUtils.getLines(self.path): 61 | yield line 62 | 63 | def __cmp__(self, other): 64 | if not isinstance(other, File): 65 | raise NotImplemented 66 | return cmp(self.content(), other.content()) 67 | 68 | def __enter__(self): 69 | return self 70 | 71 | def __exit__(self, type, value, tb): 72 | pass 73 | 74 | 75 | class FileUtils(object): 76 | @staticmethod 77 | def buildPath(*pathComponents): 78 | if pathComponents: 79 | path = os.path.join(*pathComponents) 80 | else: 81 | path = '' 82 | return path 83 | 84 | @staticmethod 85 | def exists(fileName): 86 | return os.access(fileName, os.F_OK) 87 | 88 | @staticmethod 89 | def canRead(fileName): 90 | if not os.access(fileName, os.R_OK): 91 | return False 92 | try: 93 | with open(fileName): 94 | pass 95 | except IOError: 96 | return False 97 | return True 98 | 99 | @staticmethod 100 | def canWrite(fileName): 101 | return os.access(fileName, os.W_OK) 102 | 103 | @staticmethod 104 | def read(fileName): 105 | result = '' 106 | with open(fileName, 'r') as fd: 107 | for line in fd.readlines(): 108 | result += line 109 | return result 110 | 111 | @staticmethod 112 | def getLines(fileName): 113 | with open(fileName, 'r', errors="replace") as fd: 114 | return fd.read().splitlines() 115 | 116 | @staticmethod 117 | def isDir(fileName): 118 | return os.path.isdir(fileName) 119 | 120 | @staticmethod 121 | def isFile(fileName): 122 | return os.path.isfile(fileName) 123 | 124 | @staticmethod 125 | def createDirectory(directory): 126 | if not FileUtils.exists(directory): 127 | os.makedirs(directory) 128 | 129 | @staticmethod 130 | def sizeHuman(num): 131 | base = 1024 132 | for x in ['B ', 'KB', 'MB', 'GB']: 133 | if num < base and num > -base: 134 | return "%3.0f%s" % (num, x) 135 | num /= base 136 | return "%3.0f %s" % (num, 'TB') 137 | 138 | @staticmethod 139 | def writeLines(fileName, lines): 140 | content = None 141 | if type(lines) is list: 142 | content = "\n".join(lines) 143 | else: 144 | content = lines 145 | with open(fileName, "w") as f: 146 | f.writelines(content) 147 | -------------------------------------------------------------------------------- /lib/core/Scanner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import re 20 | from difflib import SequenceMatcher 21 | 22 | from lib.utils import RandomUtils 23 | from thirdparty.sqlmap import DynamicContentParser 24 | 25 | 26 | class ScannerException(Exception): 27 | pass 28 | 29 | 30 | class Scanner(object): 31 | def __init__(self, requester, testPath=None, suffix=None): 32 | if testPath is None or testPath is "": 33 | self.testPath = RandomUtils.randString() 34 | else: 35 | self.testPath = testPath 36 | 37 | self.suffix = suffix if suffix is not None else "" 38 | self.requester = requester 39 | self.tester = None 40 | self.redirectRegExp = None 41 | self.invalidStatus = None 42 | self.dynamicParser = None 43 | self.ratio = 0.98 44 | self.redirectStatusCodes = [301, 302, 307] 45 | self.setup() 46 | 47 | def setup(self): 48 | firstPath = self.testPath + self.suffix 49 | firstResponse = self.requester.request(firstPath) 50 | self.invalidStatus = firstResponse.status 51 | 52 | if self.invalidStatus == 404: 53 | # Using the response status code is enough :-} 54 | return 55 | 56 | # look for redirects 57 | secondPath = RandomUtils.randString(omit=self.testPath) + self.suffix 58 | secondResponse = self.requester.request(secondPath) 59 | 60 | if firstResponse.status in self.redirectStatusCodes and firstResponse.redirect and secondResponse.redirect: 61 | self.redirectRegExp = self.generateRedirectRegExp(firstResponse.redirect, secondResponse.redirect) 62 | 63 | # Analyze response bodies 64 | self.dynamicParser = DynamicContentParser(self.requester, firstPath, firstResponse.body, secondResponse.body) 65 | 66 | baseRatio = float("{0:.2f}".format(self.dynamicParser.comparisonRatio)) # Rounding to 2 decimals 67 | 68 | # If response length is small, adjust ratio 69 | if len(firstResponse) < 2000: 70 | baseRatio -= 0.1 71 | 72 | if baseRatio < self.ratio: 73 | self.ratio = baseRatio 74 | 75 | def generateRedirectRegExp(self, firstLocation, secondLocation): 76 | if firstLocation is None or secondLocation is None: 77 | return None 78 | 79 | sm = SequenceMatcher(None, firstLocation, secondLocation) 80 | marks = [] 81 | 82 | for blocks in sm.get_matching_blocks(): 83 | i = blocks[0] 84 | n = blocks[2] 85 | # empty block 86 | 87 | if n == 0: 88 | continue 89 | 90 | mark = firstLocation[i:i + n] 91 | marks.append(mark) 92 | 93 | regexp = "^.*{0}.*$".format(".*".join(map(re.escape, marks))) 94 | return regexp 95 | 96 | def scan(self, path, response): 97 | if self.invalidStatus == 404 and response.status == 404: 98 | return False 99 | 100 | if self.invalidStatus != response.status: 101 | return True 102 | 103 | redirectToInvalid = False 104 | 105 | if self.redirectRegExp is not None and response.redirect is not None: 106 | redirectToInvalid = re.match(self.redirectRegExp, response.redirect) is not None 107 | # If redirection doesn't match the rule, mark as found 108 | 109 | if not redirectToInvalid: 110 | return True 111 | 112 | ratio = self.dynamicParser.compareTo(response.body) 113 | 114 | if ratio >= self.ratio: 115 | return False 116 | 117 | elif redirectToInvalid and ratio >= (self.ratio - 0.15): 118 | return False 119 | 120 | return True 121 | -------------------------------------------------------------------------------- /thirdparty/sqlmap/DynamicContentParser.py: -------------------------------------------------------------------------------- 1 | from difflib import SequenceMatcher 2 | import re 3 | 4 | from thirdparty import chardet 5 | 6 | 7 | class DynamicContentParser: 8 | def __init__(self, requester, path, firstPage, secondPage, comparisons=2): 9 | self.DYNAMICITY_MARK_LENGTH = 32 10 | self.UPPER_RATIO_BOUND = 0.98 11 | self.requester = requester 12 | self.keyCallback = path 13 | self.comparisons = comparisons 14 | self.dynamicMarks = [] 15 | self.seqMatcher = SequenceMatcher() 16 | self.generateDynamicMarks(firstPage, secondPage) 17 | 18 | def generateDynamicMarks(self, firstPage, secondPage): 19 | if any(page is None for page in (firstPage, secondPage)): 20 | # No content 21 | return 22 | 23 | self.seqMatcher.set_seq1(firstPage) 24 | self.seqMatcher.set_seq2(secondPage) 25 | ratio = self.seqMatcher.quick_ratio() 26 | # In case of an intolerable difference turn on dynamicity removal engine 27 | if ratio <= self.UPPER_RATIO_BOUND: 28 | self.dynamicMarks += self.findDynamicContent(firstPage, secondPage) 29 | for i in range(self.comparisons): 30 | response = self.requester.request(self.keyCallback) 31 | secondPage = response.body 32 | self.dynamicMarks += self.findDynamicContent(firstPage, secondPage) 33 | self.cleanPage = self.removeDynamicContent(firstPage, self.dynamicMarks) 34 | self.seqMatcher.set_seq1(self.cleanPage) 35 | self.seqMatcher.set_seq2(self.removeDynamicContent(secondPage, self.dynamicMarks)) 36 | ratio = self.seqMatcher.quick_ratio() 37 | else: 38 | self.cleanPage = firstPage 39 | self.comparisonRatio = ratio 40 | 41 | def compareTo(self, page): 42 | seqMatcher = SequenceMatcher() 43 | seqMatcher.set_seq1(self.cleanPage) 44 | seqMatcher.set_seq2(self.removeDynamicContent(page, self.dynamicMarks)) 45 | ratio = seqMatcher.quick_ratio() 46 | return ratio 47 | 48 | def findDynamicContent(self, firstPage, secondPage): 49 | dynamicMarks = [] 50 | 51 | blocks = list(SequenceMatcher(None, firstPage, secondPage).get_matching_blocks()) 52 | 53 | # Removing too small matching blocks 54 | for block in blocks[:]: 55 | (_, _, length) = block 56 | 57 | if length <= self.DYNAMICITY_MARK_LENGTH: 58 | blocks.remove(block) 59 | 60 | # Making of dynamic markings based on prefix/suffix principle 61 | if len(blocks) > 0: 62 | blocks.insert(0, None) 63 | blocks.append(None) 64 | 65 | for i in range(len(blocks) - 1): 66 | prefix = firstPage[blocks[i][0]:blocks[i][0] + blocks[i][2]] if blocks[i] else None 67 | suffix = firstPage[blocks[i + 1][0]:blocks[i + 1][0] + blocks[i + 1][2]] if blocks[i + 1] else None 68 | 69 | if prefix is None and blocks[i + 1][0] == 0: 70 | continue 71 | 72 | if suffix is None and (blocks[i][0] + blocks[i][2] >= len(firstPage)): 73 | continue 74 | 75 | dynamicMarks.append((re.escape(prefix[int(-self.DYNAMICITY_MARK_LENGTH / 2):]) if prefix else None, 76 | re.escape(suffix[:int(self.DYNAMICITY_MARK_LENGTH / 2)]) if suffix else None)) 77 | 78 | return dynamicMarks 79 | 80 | def removeDynamicContent(self, page, dynamicMarks): 81 | """ 82 | Removing dynamic content from supplied page basing removal on 83 | precalculated dynamic markings 84 | """ 85 | if page and len(dynamicMarks) > 0: 86 | encoding = chardet.detect(page)['encoding'] 87 | page = page.decode(encoding, errors='replace') 88 | for item in dynamicMarks: 89 | prefix, suffix = item 90 | if prefix is not None: 91 | prefix = prefix.decode(encoding, errors='replace') 92 | if suffix is not None: 93 | suffix = suffix.decode(encoding, errors='replace') 94 | 95 | if prefix is None and suffix is None: 96 | continue 97 | elif prefix is None: 98 | page = re.sub(r'(?s)^.+{0}'.format(re.escape(suffix)), suffix.replace('\\', r'\\'), page) 99 | elif suffix is None: 100 | page = re.sub(r'(?s){0}.+$'.format(re.escape(prefix)), prefix.replace('\\', r'\\'), page) 101 | else: 102 | page = re.sub(r'(?s){0}.+{1}'.format(re.escape(prefix), re.escape(suffix)), "{0}{1}".format(prefix.replace('\\', r'\\'), suffix.replace('\\', r'\\')), page) 103 | 104 | page = page.encode() 105 | 106 | return page 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dirsearch 2 | ========= 3 | 4 | Current Release: v0.3.8 (2017.07.25) 5 | 6 | 7 | Overview 8 | -------- 9 | dirsearch is a simple command line tool designed to brute force directories and files in websites. 10 | 11 | 12 | Installation & Usage 13 | ------------ 14 | 15 | ``` 16 | git clone https://github.com/maurosoria/dirsearch.git 17 | cd dirsearch 18 | python3 dirsearch.py -u -e 19 | ``` 20 | 21 | you can also use this alias to send directly to proxy 22 | `python3 /path/to/dirsearch/dirsearch.py --http-proxy=localhost:8080` 23 | 24 | 25 | Options 26 | ------- 27 | 28 | 29 | ``` 30 | Options: 31 | -h, --help show this help message and exit 32 | 33 | Mandatory: 34 | -u URL, --url=URL URL target 35 | -L URLLIST, --url-list=URLLIST 36 | URL list target 37 | -e EXTENSIONS, --extensions=EXTENSIONS 38 | Extension list separated by comma (Example: php,asp) 39 | 40 | Dictionary Settings: 41 | -w WORDLIST, --wordlist=WORDLIST 42 | -l, --lowercase 43 | -f, --force-extensions 44 | Force extensions for every wordlist entry (like in 45 | DirBuster) 46 | 47 | General Settings: 48 | -s DELAY, --delay=DELAY 49 | Delay between requests (float number) 50 | -r, --recursive Bruteforce recursively 51 | --suppress-empty, --suppress-empty 52 | --scan-subdir=SCANSUBDIRS, --scan-subdirs=SCANSUBDIRS 53 | Scan subdirectories of the given -u|--url (separated 54 | by comma) 55 | --exclude-subdir=EXCLUDESUBDIRS, --exclude-subdirs=EXCLUDESUBDIRS 56 | Exclude the following subdirectories during recursive 57 | scan (separated by comma) 58 | --exclude-texts='Not found', 'Error' 59 | Exclude results by text in response 60 | --exclude-regexps='Not foun[a-z]{1}', '^Error$' 61 | Exclude results by text regexp in response 62 | -t THREADSCOUNT, --threads=THREADSCOUNT 63 | Number of Threads 64 | -x EXCLUDESTATUSCODES, --exclude-status=EXCLUDESTATUSCODES 65 | Exclude status code, separated by comma (example: 301, 66 | 500) 67 | -c COOKIE, --cookie=COOKIE 68 | --ua=USERAGENT, --user-agent=USERAGENT 69 | -F, --follow-redirects 70 | -H HEADERS, --header=HEADERS 71 | Headers to add (example: --header "Referer: 72 | example.com" --header "User-Agent: IE" 73 | --random-agents, --random-user-agents 74 | 75 | Connection Settings: 76 | --timeout=TIMEOUT Connection timeout 77 | --ip=IP Resolve name to IP address 78 | --proxy=HTTPPROXY, --http-proxy=HTTPPROXY 79 | Http Proxy (example: localhost:8080 80 | --max-retries=MAXRETRIES 81 | -b, --request-by-hostname 82 | By default dirsearch will request by IP for speed. 83 | This forces requests by hostname 84 | 85 | Reports: 86 | --simple-report=SIMPLEOUTPUTFILE 87 | Only found paths 88 | --plain-text-report=PLAINTEXTOUTPUTFILE 89 | Found paths with status codes 90 | --json-report=JSONOUTPUTFILE 91 | 92 | ``` 93 | 94 | 95 | Operating Systems supported 96 | --------------------------- 97 | - Windows XP/7/8/10 98 | - GNU/Linux 99 | - MacOSX 100 | 101 | Features 102 | -------- 103 | - Multithreaded 104 | - Keep alive connections 105 | - Support for multiple extensions (-e|--extensions asp,php) 106 | - Reporting (plain text, JSON) 107 | - Heuristically detects invalid web pages 108 | - Recursive brute forcing 109 | - HTTP proxy support 110 | - User agent randomization 111 | - Batch processing 112 | - Request delaying 113 | 114 | About wordlists 115 | --------------- 116 | Dictionaries must be text files. Each line will be processed as such, except that the special word %EXT% is used, which will generate one entry for each extension (-e | --extension) passed as an argument. 117 | 118 | Example: 119 | - example/ 120 | - example.%EXT% 121 | 122 | Passing the extensions "asp" and "aspx" will generate the following dictionary: 123 | - example/ 124 | - example.asp 125 | - example.aspx 126 | 127 | You can also use -f | --force-extensions switch to append extensions to every word in the wordlists (like DirBuster). 128 | 129 | License 130 | ------- 131 | Copyright (C) Mauro Soria (maurosoria at gmail dot com) 132 | 133 | License: GNU General Public License, version 2 134 | 135 | 136 | Contributors 137 | --------- 138 | Special thanks for these people. 139 | 140 | - Damian89 141 | - Bo0oM 142 | - liamosaur 143 | - redshark1802 144 | - SUHAR1K 145 | - FireFart 146 | - k2l8m11n2 147 | - vlohacks 148 | - r0p0s3c 149 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | 7 | try: 8 | from http.client import HTTPSConnection 9 | except ImportError: 10 | from httplib import HTTPSConnection 11 | from logging import getLogger 12 | from ntlm import ntlm 13 | 14 | from urllib3 import HTTPSConnectionPool 15 | 16 | 17 | log = getLogger(__name__) 18 | 19 | 20 | class NTLMConnectionPool(HTTPSConnectionPool): 21 | """ 22 | Implements an NTLM authentication version of an urllib3 connection pool 23 | """ 24 | 25 | scheme = 'https' 26 | 27 | def __init__(self, user, pw, authurl, *args, **kwargs): 28 | """ 29 | authurl is a random URL on the server that is protected by NTLM. 30 | user is the Windows user, probably in the DOMAIN\\username format. 31 | pw is the password for the user. 32 | """ 33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 34 | self.authurl = authurl 35 | self.rawuser = user 36 | user_parts = user.split('\\', 1) 37 | self.domain = user_parts[0].upper() 38 | self.user = user_parts[1] 39 | self.pw = pw 40 | 41 | def _new_conn(self): 42 | # Performs the NTLM handshake that secures the connection. The socket 43 | # must be kept open while requests are performed. 44 | self.num_connections += 1 45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % 46 | (self.num_connections, self.host, self.authurl)) 47 | 48 | headers = {} 49 | headers['Connection'] = 'Keep-Alive' 50 | req_header = 'Authorization' 51 | resp_header = 'www-authenticate' 52 | 53 | conn = HTTPSConnection(host=self.host, port=self.port) 54 | 55 | # Send negotiation message 56 | headers[req_header] = ( 57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) 58 | log.debug('Request headers: %s' % headers) 59 | conn.request('GET', self.authurl, None, headers) 60 | res = conn.getresponse() 61 | reshdr = dict(res.getheaders()) 62 | log.debug('Response status: %s %s' % (res.status, res.reason)) 63 | log.debug('Response headers: %s' % reshdr) 64 | log.debug('Response data: %s [...]' % res.read(100)) 65 | 66 | # Remove the reference to the socket, so that it can not be closed by 67 | # the response object (we want to keep the socket open) 68 | res.fp = None 69 | 70 | # Server should respond with a challenge message 71 | auth_header_values = reshdr[resp_header].split(', ') 72 | auth_header_value = None 73 | for s in auth_header_values: 74 | if s[:5] == 'NTLM ': 75 | auth_header_value = s[5:] 76 | if auth_header_value is None: 77 | raise Exception('Unexpected %s response header: %s' % 78 | (resp_header, reshdr[resp_header])) 79 | 80 | # Send authentication message 81 | ServerChallenge, NegotiateFlags = \ 82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) 83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, 84 | self.user, 85 | self.domain, 86 | self.pw, 87 | NegotiateFlags) 88 | headers[req_header] = 'NTLM %s' % auth_msg 89 | log.debug('Request headers: %s' % headers) 90 | conn.request('GET', self.authurl, None, headers) 91 | res = conn.getresponse() 92 | log.debug('Response status: %s %s' % (res.status, res.reason)) 93 | log.debug('Response headers: %s' % dict(res.getheaders())) 94 | log.debug('Response data: %s [...]' % res.read()[:100]) 95 | if res.status != 200: 96 | if res.status == 401: 97 | raise Exception('Server rejected request: wrong ' 98 | 'username or password') 99 | raise Exception('Wrong server response: %s %s' % 100 | (res.status, res.reason)) 101 | 102 | res.fp = None 103 | log.debug('Connection established') 104 | return conn 105 | 106 | def urlopen(self, method, url, body=None, headers=None, retries=3, 107 | redirect=True, assert_same_host=True): 108 | if headers is None: 109 | headers = {} 110 | headers['Connection'] = 'Keep-Alive' 111 | return super(NTLMConnectionPool, self).urlopen(method, url, body, 112 | headers, retries, 113 | redirect, 114 | assert_same_host) 115 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | ## Base Exceptions 3 | 4 | class HTTPError(Exception): 5 | "Base exception used by this module." 6 | pass 7 | 8 | class HTTPWarning(Warning): 9 | "Base warning used by this module." 10 | pass 11 | 12 | 13 | 14 | class PoolError(HTTPError): 15 | "Base exception for errors caused within a pool." 16 | def __init__(self, pool, message): 17 | self.pool = pool 18 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 19 | 20 | def __reduce__(self): 21 | # For pickling purposes. 22 | return self.__class__, (None, None) 23 | 24 | 25 | class RequestError(PoolError): 26 | "Base exception for PoolErrors that have associated URLs." 27 | def __init__(self, pool, url, message): 28 | self.url = url 29 | PoolError.__init__(self, pool, message) 30 | 31 | def __reduce__(self): 32 | # For pickling purposes. 33 | return self.__class__, (None, self.url, None) 34 | 35 | 36 | class SSLError(HTTPError): 37 | "Raised when SSL certificate fails in an HTTPS connection." 38 | pass 39 | 40 | 41 | class ProxyError(HTTPError): 42 | "Raised when the connection to a proxy fails." 43 | pass 44 | 45 | 46 | class DecodeError(HTTPError): 47 | "Raised when automatic decoding based on Content-Type fails." 48 | pass 49 | 50 | 51 | class ProtocolError(HTTPError): 52 | "Raised when something unexpected happens mid-request/response." 53 | pass 54 | 55 | 56 | #: Renamed to ProtocolError but aliased for backwards compatibility. 57 | ConnectionError = ProtocolError 58 | 59 | 60 | ## Leaf Exceptions 61 | 62 | class MaxRetryError(RequestError): 63 | """Raised when the maximum number of retries is exceeded. 64 | 65 | :param pool: The connection pool 66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 67 | :param string url: The requested Url 68 | :param exceptions.Exception reason: The underlying error 69 | 70 | """ 71 | 72 | def __init__(self, pool, url, reason=None): 73 | self.reason = reason 74 | 75 | message = "Max retries exceeded with url: %s (Caused by %r)" % ( 76 | url, reason) 77 | 78 | RequestError.__init__(self, pool, url, message) 79 | 80 | 81 | class HostChangedError(RequestError): 82 | "Raised when an existing pool gets a request for a foreign host." 83 | 84 | def __init__(self, pool, url, retries=3): 85 | message = "Tried to open a foreign host with url: %s" % url 86 | RequestError.__init__(self, pool, url, message) 87 | self.retries = retries 88 | 89 | 90 | class TimeoutStateError(HTTPError): 91 | """ Raised when passing an invalid state to a timeout """ 92 | pass 93 | 94 | 95 | class TimeoutError(HTTPError): 96 | """ Raised when a socket timeout error occurs. 97 | 98 | Catching this error will catch both :exc:`ReadTimeoutErrors 99 | ` and :exc:`ConnectTimeoutErrors `. 100 | """ 101 | pass 102 | 103 | 104 | class ReadTimeoutError(TimeoutError, RequestError): 105 | "Raised when a socket timeout occurs while receiving data from a server" 106 | pass 107 | 108 | 109 | # This timeout error does not have a URL attached and needs to inherit from the 110 | # base HTTPError 111 | class ConnectTimeoutError(TimeoutError): 112 | "Raised when a socket timeout occurs while connecting to a server" 113 | pass 114 | 115 | 116 | class EmptyPoolError(PoolError): 117 | "Raised when a pool runs out of connections and no more are allowed." 118 | pass 119 | 120 | 121 | class ClosedPoolError(PoolError): 122 | "Raised when a request enters a pool after the pool has been closed." 123 | pass 124 | 125 | 126 | class LocationValueError(ValueError, HTTPError): 127 | "Raised when there is something wrong with a given URL input." 128 | pass 129 | 130 | 131 | class LocationParseError(LocationValueError): 132 | "Raised when get_host or similar fails to parse the URL input." 133 | 134 | def __init__(self, location): 135 | message = "Failed to parse: %s" % location 136 | HTTPError.__init__(self, message) 137 | 138 | self.location = location 139 | 140 | 141 | class ResponseError(HTTPError): 142 | "Used as a container for an error reason supplied in a MaxRetryError." 143 | GENERIC_ERROR = 'too many error responses' 144 | SPECIFIC_ERROR = 'too many {status_code} error responses' 145 | 146 | 147 | class SecurityWarning(HTTPWarning): 148 | "Warned when perfoming security reducing actions" 149 | pass 150 | 151 | 152 | class InsecureRequestWarning(SecurityWarning): 153 | "Warned when making an unverified HTTPS request." 154 | pass 155 | 156 | 157 | class SystemTimeWarning(SecurityWarning): 158 | "Warned when system time is suspected to be wrong" 159 | pass 160 | 161 | 162 | class InsecurePlatformWarning(SecurityWarning): 163 | "Warned when certain SSL configuration is not available on a platform." 164 | pass 165 | 166 | 167 | class ResponseNotChunked(ProtocolError, ValueError): 168 | "Response needs to be chunked in order to read it as chunks." 169 | pass 170 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import sys 30 | from . import constants 31 | from .charsetprober import CharSetProber 32 | from .compat import wrap_ord 33 | 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | SYMBOL_CAT_ORDER = 250 39 | NUMBER_OF_SEQ_CAT = 4 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 41 | #NEGATIVE_CAT = 0 42 | 43 | 44 | class SingleByteCharSetProber(CharSetProber): 45 | def __init__(self, model, reversed=False, nameProber=None): 46 | CharSetProber.__init__(self) 47 | self._mModel = model 48 | # TRUE if we need to reverse every pair in the model lookup 49 | self._mReversed = reversed 50 | # Optional auxiliary prober for name decision 51 | self._mNameProber = nameProber 52 | self.reset() 53 | 54 | def reset(self): 55 | CharSetProber.reset(self) 56 | # char order of last character 57 | self._mLastOrder = 255 58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT 59 | self._mTotalSeqs = 0 60 | self._mTotalChar = 0 61 | # characters that fall in our sampling range 62 | self._mFreqChar = 0 63 | 64 | def get_charset_name(self): 65 | if self._mNameProber: 66 | return self._mNameProber.get_charset_name() 67 | else: 68 | return self._mModel['charsetName'] 69 | 70 | def feed(self, aBuf): 71 | if not self._mModel['keepEnglishLetter']: 72 | aBuf = self.filter_without_english_letters(aBuf) 73 | aLen = len(aBuf) 74 | if not aLen: 75 | return self.get_state() 76 | for c in aBuf: 77 | order = self._mModel['charToOrderMap'][wrap_ord(c)] 78 | if order < SYMBOL_CAT_ORDER: 79 | self._mTotalChar += 1 80 | if order < SAMPLE_SIZE: 81 | self._mFreqChar += 1 82 | if self._mLastOrder < SAMPLE_SIZE: 83 | self._mTotalSeqs += 1 84 | if not self._mReversed: 85 | i = (self._mLastOrder * SAMPLE_SIZE) + order 86 | model = self._mModel['precedenceMatrix'][i] 87 | else: # reverse the order of the letters in the lookup 88 | i = (order * SAMPLE_SIZE) + self._mLastOrder 89 | model = self._mModel['precedenceMatrix'][i] 90 | self._mSeqCounters[model] += 1 91 | self._mLastOrder = order 92 | 93 | if self.get_state() == constants.eDetecting: 94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: 95 | cf = self.get_confidence() 96 | if cf > POSITIVE_SHORTCUT_THRESHOLD: 97 | if constants._debug: 98 | sys.stderr.write('%s confidence = %s, we have a' 99 | 'winner\n' % 100 | (self._mModel['charsetName'], cf)) 101 | self._mState = constants.eFoundIt 102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD: 103 | if constants._debug: 104 | sys.stderr.write('%s confidence = %s, below negative' 105 | 'shortcut threshhold %s\n' % 106 | (self._mModel['charsetName'], cf, 107 | NEGATIVE_SHORTCUT_THRESHOLD)) 108 | self._mState = constants.eNotMe 109 | 110 | return self.get_state() 111 | 112 | def get_confidence(self): 113 | r = 0.01 114 | if self._mTotalSeqs > 0: 115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs 116 | / self._mModel['mTypicalPositiveRatio']) 117 | r = r * self._mFreqChar / self._mTotalChar 118 | if r >= 1.0: 119 | r = 0.99 120 | return r 121 | -------------------------------------------------------------------------------- /lib/core/Dictionary.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import threading 20 | 21 | import urllib.error 22 | import urllib.parse 23 | import urllib.request 24 | 25 | from lib.utils.FileUtils import File 26 | from thirdparty.oset import * 27 | 28 | 29 | class Dictionary(object): 30 | 31 | def __init__(self, path, extensions, lowercase=False, forcedExtensions=False): 32 | self.entries = [] 33 | self.currentIndex = 0 34 | self.condition = threading.Lock() 35 | self._extensions = extensions 36 | self._path = path 37 | self._forcedExtensions = forcedExtensions 38 | self.lowercase = lowercase 39 | self.dictionaryFile = File(self.path) 40 | self.generate() 41 | 42 | @property 43 | def extensions(self): 44 | return self._extensions 45 | 46 | @extensions.setter 47 | def extensions(self, value): 48 | self._extensions = value 49 | 50 | @property 51 | def path(self): 52 | return self._path 53 | 54 | @path.setter 55 | def path(self, path): 56 | self._path = path 57 | 58 | @classmethod 59 | def quote(cls, string): 60 | return urllib.parse.quote(string, safe=":/~?%&+-=$") 61 | 62 | """ 63 | Dictionary.generate() behaviour 64 | 65 | Classic dirsearch wordlist: 66 | 1. If %EXT% keyword is present, append one with each extension REPLACED. 67 | 2. If the special word is no present, append line unmodified. 68 | 69 | Forced extensions wordlist (NEW): 70 | This type of wordlist processing is a mix between classic processing 71 | and DirBuster processing. 72 | 1. If %EXT% keyword is present in the line, immediately process as "classic dirsearch" (1). 73 | 2. If the line does not include the special word AND is NOT terminated by a slash, 74 | append one with each extension APPENDED (line.ext) and ONLYE ONE with a slash. 75 | 3. If the line does not include the special word and IS ALREADY terminated by slash, 76 | append line unmodified. 77 | """ 78 | 79 | def generate(self): 80 | result = [] 81 | for line in self.dictionaryFile.getLines(): 82 | 83 | # Skip comments 84 | if line.lstrip().startswith("#"): 85 | continue 86 | 87 | # Classic dirsearch wordlist processing (with %EXT% keyword) 88 | if '%EXT%' in line or '%ext%' in line: 89 | for extension in self._extensions: 90 | if '%EXT%' in line: 91 | newline = line.replace('%EXT%', extension) 92 | 93 | if '%ext%' in line: 94 | newline = line.replace('%ext%', extension) 95 | 96 | quote = self.quote(newline) 97 | result.append(quote) 98 | 99 | # If forced extensions is used and the path is not a directory ... (terminated by /) 100 | # process line like a forced extension. 101 | elif self._forcedExtensions and not line.rstrip().endswith("/"): 102 | quoted = self.quote(line) 103 | 104 | for extension in self._extensions: 105 | # Why? check https://github.com/maurosoria/dirsearch/issues/70 106 | if extension.strip() == '': 107 | result.append(quoted) 108 | else: 109 | result.append(quoted + '.' + extension) 110 | 111 | if quoted.strip() not in ['']: 112 | result.append(quoted + "/") 113 | 114 | # Append line unmodified. 115 | else: 116 | result.append(self.quote(line)) 117 | 118 | # oset library provides inserted ordered and unique collection. 119 | if self.lowercase: 120 | self.entries = list(oset(map(lambda l: l.lower(), result))) 121 | 122 | else: 123 | self.entries = list(oset(result)) 124 | 125 | del (result) 126 | 127 | def regenerate(self): 128 | self.generate(lowercase=self.lowercase) 129 | self.reset() 130 | 131 | def nextWithIndex(self, basePath=None): 132 | self.condition.acquire() 133 | 134 | try: 135 | result = self.entries[self.currentIndex] 136 | 137 | except IndexError: 138 | self.condition.release() 139 | raise StopIteration 140 | 141 | self.currentIndex = self.currentIndex + 1 142 | currentIndex = self.currentIndex 143 | self.condition.release() 144 | return currentIndex, result 145 | 146 | def __next__(self, basePath=None): 147 | _, path = self.nextWithIndex(basePath) 148 | return path 149 | 150 | def reset(self): 151 | self.condition.acquire() 152 | self.currentIndex = 0 153 | self.condition.release() 154 | 155 | def __len__(self): 156 | return len(self.entries) 157 | -------------------------------------------------------------------------------- /thirdparty/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import logging 30 | import re 31 | 32 | from .enums import ProbingState 33 | 34 | 35 | class CharSetProber(object): 36 | 37 | SHORTCUT_THRESHOLD = 0.95 38 | 39 | def __init__(self, lang_filter=None): 40 | self._state = None 41 | self.lang_filter = lang_filter 42 | self.logger = logging.getLogger(__name__) 43 | 44 | def reset(self): 45 | self._state = ProbingState.DETECTING 46 | 47 | @property 48 | def charset_name(self): 49 | return None 50 | 51 | def feed(self, buf): 52 | pass 53 | 54 | @property 55 | def state(self): 56 | return self._state 57 | 58 | def get_confidence(self): 59 | return 0.0 60 | 61 | @staticmethod 62 | def filter_high_byte_only(buf): 63 | buf = re.sub(b'([\x00-\x7F])+', b' ', buf) 64 | return buf 65 | 66 | @staticmethod 67 | def filter_international_words(buf): 68 | """ 69 | We define three types of bytes: 70 | alphabet: english alphabets [a-zA-Z] 71 | international: international characters [\x80-\xFF] 72 | marker: everything else [^a-zA-Z\x80-\xFF] 73 | 74 | The input buffer can be thought to contain a series of words delimited 75 | by markers. This function works to filter all words that contain at 76 | least one international character. All contiguous sequences of markers 77 | are replaced by a single space ascii character. 78 | 79 | This filter applies to all scripts which do not use English characters. 80 | """ 81 | filtered = bytearray() 82 | 83 | # This regex expression filters out only words that have at-least one 84 | # international character. The word may include one marker character at 85 | # the end. 86 | words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', 87 | buf) 88 | 89 | for word in words: 90 | filtered.extend(word[:-1]) 91 | 92 | # If the last character in the word is a marker, replace it with a 93 | # space as markers shouldn't affect our analysis (they are used 94 | # similarly across all languages and may thus have similar 95 | # frequencies). 96 | last_char = word[-1:] 97 | if not last_char.isalpha() and last_char < b'\x80': 98 | last_char = b' ' 99 | filtered.extend(last_char) 100 | 101 | return filtered 102 | 103 | @staticmethod 104 | def filter_with_english_letters(buf): 105 | """ 106 | Returns a copy of ``buf`` that retains only the sequences of English 107 | alphabet and high byte characters that are not between <> characters. 108 | Also retains English alphabet and high byte characters immediately 109 | before occurrences of >. 110 | 111 | This filter can be applied to all scripts which contain both English 112 | characters and extended ASCII characters, but is currently only used by 113 | ``Latin1Prober``. 114 | """ 115 | filtered = bytearray() 116 | in_tag = False 117 | prev = 0 118 | 119 | for curr in range(len(buf)): 120 | # Slice here to get bytes instead of an int with Python 3 121 | buf_char = buf[curr:curr + 1] 122 | # Check if we're coming out of or entering an HTML tag 123 | if buf_char == b'>': 124 | in_tag = False 125 | elif buf_char == b'<': 126 | in_tag = True 127 | 128 | # If current character is not extended-ASCII and not alphabetic... 129 | if buf_char < b'\x80' and not buf_char.isalpha(): 130 | # ...and we're not in a tag 131 | if curr > prev and not in_tag: 132 | # Keep everything after last non-extended-ASCII, 133 | # non-alphabetic character 134 | filtered.extend(buf[prev:curr]) 135 | # Output a space to delimit stretch we kept 136 | filtered.extend(b' ') 137 | prev = curr + 1 138 | 139 | # If we're not in a tag... 140 | if not in_tag: 141 | # Keep everything after last non-extended-ASCII, non-alphabetic 142 | # character 143 | filtered.extend(buf[prev:]) 144 | 145 | return filtered 146 | -------------------------------------------------------------------------------- /thirdparty/requests/packages/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .constants import eNotMe 31 | from .compat import wrap_ord 32 | 33 | FREQ_CAT_NUM = 4 34 | 35 | UDF = 0 # undefined 36 | OTH = 1 # other 37 | ASC = 2 # ascii capital letter 38 | ASS = 3 # ascii small letter 39 | ACV = 4 # accent capital vowel 40 | ACO = 5 # accent capital other 41 | ASV = 6 # accent small vowel 42 | ASO = 7 # accent small other 43 | CLASS_NUM = 8 # total classes 44 | 45 | Latin1_CharToClass = ( 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 78 | ) 79 | 80 | # 0 : illegal 81 | # 1 : very unlikely 82 | # 2 : normal 83 | # 3 : very likely 84 | Latin1ClassModel = ( 85 | # UDF OTH ASC ASS ACV ACO ASV ASO 86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 94 | ) 95 | 96 | 97 | class Latin1Prober(CharSetProber): 98 | def __init__(self): 99 | CharSetProber.__init__(self) 100 | self.reset() 101 | 102 | def reset(self): 103 | self._mLastCharClass = OTH 104 | self._mFreqCounter = [0] * FREQ_CAT_NUM 105 | CharSetProber.reset(self) 106 | 107 | def get_charset_name(self): 108 | return "windows-1252" 109 | 110 | def feed(self, aBuf): 111 | aBuf = self.filter_with_english_letters(aBuf) 112 | for c in aBuf: 113 | charClass = Latin1_CharToClass[wrap_ord(c)] 114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) 115 | + charClass] 116 | if freq == 0: 117 | self._mState = eNotMe 118 | break 119 | self._mFreqCounter[freq] += 1 120 | self._mLastCharClass = charClass 121 | 122 | return self.get_state() 123 | 124 | def get_confidence(self): 125 | if self.get_state() == eNotMe: 126 | return 0.01 127 | 128 | total = sum(self._mFreqCounter) 129 | if total < 0.01: 130 | confidence = 0.0 131 | else: 132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) 133 | / total) 134 | if confidence < 0.0: 135 | confidence = 0.0 136 | # lower the confidence of latin1 so that other more accurate 137 | # detector can take priority. 138 | confidence = confidence * 0.73 139 | return confidence 140 | --------------------------------------------------------------------------------