├── .gitignore ├── README.md ├── config.conf ├── dicts ├── web_dir.dic └── web_path.dic ├── lib ├── __init__.py ├── common │ ├── __init__.py │ ├── fuzzy_string_cmp.py │ ├── myexception.py │ ├── myfile.py │ ├── output.py │ ├── terminalsize.py │ └── util.py ├── controller │ ├── __init__.py │ ├── controller.py │ └── scanner.py ├── core │ ├── __init__.py │ ├── argument.py │ └── webscan.py └── net │ ├── __init__.py │ ├── myrequests.py │ └── myresponse.py ├── logs └── __init__.py ├── result └── __init__.py ├── thirdparty_libs ├── __init__.py ├── chardet │ ├── __init__.py │ ├── big5freq.py │ ├── big5prober.py │ ├── chardistribution.py │ ├── charsetgroupprober.py │ ├── charsetprober.py │ ├── cli │ │ ├── __init__.py │ │ └── chardetect.py │ ├── codingstatemachine.py │ ├── compat.py │ ├── cp949prober.py │ ├── enums.py │ ├── escprober.py │ ├── escsm.py │ ├── eucjpprober.py │ ├── euckrfreq.py │ ├── euckrprober.py │ ├── euctwfreq.py │ ├── euctwprober.py │ ├── gb2312freq.py │ ├── gb2312prober.py │ ├── hebrewprober.py │ ├── jisfreq.py │ ├── jpcntx.py │ ├── langbulgarianmodel.py │ ├── langcyrillicmodel.py │ ├── langgreekmodel.py │ ├── langhebrewmodel.py │ ├── langhungarianmodel.py │ ├── langthaimodel.py │ ├── langturkishmodel.py │ ├── latin1prober.py │ ├── mbcharsetprober.py │ ├── mbcsgroupprober.py │ ├── mbcssm.py │ ├── sbcharsetprober.py │ ├── sbcsgroupprober.py │ ├── sjisprober.py │ ├── universaldetector.py │ ├── utf8prober.py │ └── version.py ├── colorama │ ├── __init__.py │ ├── ansi.py │ ├── ansitowin32.py │ ├── initialise.py │ ├── win32.py │ └── winterm.py └── requests │ ├── __init__.py │ ├── adapters.py │ ├── api.py │ ├── auth.py │ ├── cacert.pem │ ├── certs.py │ ├── compat.py │ ├── cookies.py │ ├── exceptions.py │ ├── hooks.py │ ├── models.py │ ├── packages │ ├── __init__.py │ ├── charade │ │ ├── __init__.py │ │ ├── big5freq.py │ │ ├── big5prober.py │ │ ├── chardistribution.py │ │ ├── charsetgroupprober.py │ │ ├── charsetprober.py │ │ ├── codingstatemachine.py │ │ ├── compat.py │ │ ├── constants.py │ │ ├── cp949prober.py │ │ ├── escprober.py │ │ ├── escsm.py │ │ ├── eucjpprober.py │ │ ├── euckrfreq.py │ │ ├── euckrprober.py │ │ ├── euctwfreq.py │ │ ├── euctwprober.py │ │ ├── gb2312freq.py │ │ ├── gb2312prober.py │ │ ├── hebrewprober.py │ │ ├── jisfreq.py │ │ ├── jpcntx.py │ │ ├── langbulgarianmodel.py │ │ ├── langcyrillicmodel.py │ │ ├── langgreekmodel.py │ │ ├── langhebrewmodel.py │ │ ├── langhungarianmodel.py │ │ ├── langthaimodel.py │ │ ├── latin1prober.py │ │ ├── mbcharsetprober.py │ │ ├── mbcsgroupprober.py │ │ ├── mbcssm.py │ │ ├── sbcharsetprober.py │ │ ├── sbcsgroupprober.py │ │ ├── sjisprober.py │ │ ├── universaldetector.py │ │ └── utf8prober.py │ └── urllib3 │ │ ├── __init__.py │ │ ├── _collections.py │ │ ├── connectionpool.py │ │ ├── exceptions.py │ │ ├── filepost.py │ │ ├── packages │ │ ├── __init__.py │ │ ├── ordered_dict.py │ │ ├── six.py │ │ └── ssl_match_hostname │ │ │ └── __init__.py │ │ ├── poolmanager.py │ │ ├── request.py │ │ ├── response.py │ │ └── util.py │ ├── sessions.py │ ├── status_codes.py │ ├── structures.py │ └── utils.py ├── unittest ├── __init__.py └── webscan_test.py └── webdirdig.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | #env/ 11 | #bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | *.egg-info/ 21 | .installed.cfg 22 | *.egg 23 | #test 24 | temp/ 25 | output/ 26 | 27 | # Installer logs 28 | pip-log.txt 29 | pip-delete-this-directory.txt 30 | 31 | # Unit test / coverage reports 32 | htmlcov/ 33 | .tox/ 34 | .coverage 35 | nosetests.xml 36 | coverage.xml 37 | 38 | # Translations 39 | *.mo 40 | 41 | # Mr Developer 42 | .mr.developer.cfg 43 | .project 44 | .pydevproject 45 | 46 | # Rope 47 | .ropeproject 48 | 49 | # Django stuff: 50 | *.log 51 | *.pot 52 | 53 | # Sphinx documentation 54 | #docs/_build/ 55 | 56 | # CTag 57 | .tags* 58 | 59 | # mac 60 | .DS_Store 61 | 62 | #pycharm 63 | .idea/ 64 | 65 | #pyc 66 | .pyc 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | webdirdig 2 | =================================== 3 | web敏感目录\信息泄漏扫描脚本 4 | 5 | Basic usage 6 | =================================== 7 | 8 | ``` 9 | python webdirdig.py http://www.baidu.com 10 | ``` -------------------------------------------------------------------------------- /config.conf: -------------------------------------------------------------------------------- 1 | [dict] 2 | bakdir_exts = ['.zip', '.tar','.rar','.tar.gz','.tar.bz2', '.log'] 3 | bakfile_exts = ['.bak', '.swp', '.1' , '.old'] 4 | web_dic_path = ./dicts/web_dir.dic 5 | path_dic_path = ./dicts/web_path.dic -------------------------------------------------------------------------------- /dicts/web_dir.dic: -------------------------------------------------------------------------------- 1 | .cvs 2 | log 3 | logs 4 | 2014 5 | monitor 6 | invoker 7 | phpmyredis 8 | phpldapadmin 9 | .CVS 10 | zabbix 11 | nagios 12 | 0 13 | 1 14 | 10 15 | 100 16 | pmadb 17 | resin-admin 18 | resin-doc 19 | java 20 | Java 21 | simple 22 | soft 23 | Soft 24 | 2012 25 | 2013 26 | 01 27 | 02 28 | 03 29 | 04 30 | 05 31 | 06 32 | 07 33 | 08 34 | 09 35 | 1 36 | 2 37 | 3 38 | 4 39 | 5 40 | 6 41 | 7 42 | 8 43 | 9 44 | 11 45 | 10 46 | 12 47 | a 48 | A 49 | Add 50 | add 51 | Adm 52 | adm 53 | administrator 54 | admin 55 | Admin 56 | admin_bak 57 | Admin_Bak 58 | Admin_Login 59 | admin_login 60 | admin_user 61 | Admin_user 62 | admin1 63 | Admin1 64 | admin2 65 | Admin2 66 | app 67 | apps 68 | api 69 | archive 70 | archives 71 | article 72 | ajax 73 | BackUp 74 | backup 75 | Bak 76 | bak 77 | boss 78 | Boss 79 | bbs 80 | cache 81 | caches 82 | cacti 83 | Cacti 84 | cgi-bin 85 | check 86 | Check 87 | ctc 88 | classes 89 | convert 90 | console 91 | conf 92 | Conf 93 | config 94 | Config 95 | data 96 | database 97 | dblog 98 | dashboard 99 | developer 100 | dede 101 | download 102 | Edit 103 | edit 104 | Editor 105 | editor 106 | events 107 | eWebEditor 108 | ewebeditor 109 | FCKEditor 110 | fckeditor 111 | FCKeditor 112 | file 113 | File 114 | Files 115 | files 116 | gamelog 117 | graphlot 118 | help 119 | htdocs 120 | HouTai 121 | houtai 122 | houtaiguanli 123 | inc 124 | Inc 125 | include 126 | Include 127 | install 128 | jmx-console 129 | jenkins 130 | kindeditor 131 | listinfo 132 | Local 133 | local 134 | Log 135 | logs 136 | log 137 | login 138 | Login 139 | m 140 | M 141 | main 142 | Main 143 | mailman 144 | mailman 145 | Manage 146 | manage 147 | manager 148 | Manager 149 | manager_login 150 | Manager_Login 151 | master 152 | member 153 | Member 154 | MemberLogin 155 | memberlogin 156 | members 157 | Members 158 | Mgr 159 | mgr 160 | My 161 | my 162 | MyAdmin 163 | myadmin 164 | MySQL 165 | Mysql 166 | mysql 167 | mysqlserver 168 | new 169 | news 170 | news_admin 171 | News_Admin 172 | newsadmin 173 | NewsAdmin 174 | output 175 | phpMyAdmin 176 | phpmyadmin 177 | phpRedisAdmin 178 | PMA 179 | pma 180 | popup 181 | printenv 182 | Q 183 | q 184 | R 185 | r 186 | root 187 | Root 188 | script 189 | Script 190 | scripts 191 | servlet 192 | search 193 | sectool 194 | source 195 | shopadmin 196 | Site 197 | site 198 | sql 199 | Sql 200 | SQL_Manager 201 | SQL_System 202 | SqlConf 203 | sqlconf 204 | sqlmanager 205 | static 206 | status 207 | stats 208 | Sys 209 | sys 210 | sys_login 211 | Sys_Login 212 | T 213 | t 214 | Temp 215 | temp 216 | test 217 | Test 218 | tmp 219 | Tmp 220 | tool 221 | tools 222 | Top 223 | top 224 | Upload 225 | upload_files 226 | upload_images 227 | uploadfiles 228 | UploadFiles 229 | uploadimages 230 | ucenter 231 | update 232 | user 233 | User 234 | user_admin 235 | User_Admin 236 | user_login 237 | User_Login 238 | userfiles 239 | UserFiles 240 | users 241 | Users 242 | uc_server 243 | web 244 | Web 245 | web-console 246 | WEB-INF 247 | Web1 248 | web1 249 | Web2 250 | web2 251 | weblogs 252 | wp-content 253 | wp-admin 254 | wwwlog 255 | wwwlogs 256 | X 257 | x 258 | Xampp 259 | xampp 260 | Y 261 | y 262 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /lib/common/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /lib/common/fuzzy_string_cmp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | from difflib import SequenceMatcher 7 | import re 8 | 9 | from thirdparty_libs import chardet 10 | 11 | 12 | class DynamicContentParser: 13 | def __init__(self, requester, path, firstPage, secondPage, comparisons=2): 14 | self.DYNAMICITY_MARK_LENGTH = 32 15 | self.UPPER_RATIO_BOUND = 0.98 16 | self.requester = requester 17 | self.keyCallback = path 18 | self.comparisons = comparisons 19 | self.diff_marks = [] 20 | self.seqMatcher = SequenceMatcher() 21 | self.relative_distance_marks(firstPage, secondPage) 22 | 23 | def relative_distance_marks(self, firstPage, secondPage): 24 | if any(page is None for page in (firstPage, secondPage)): 25 | # No content 26 | return 27 | 28 | self.seqMatcher.set_seq1(firstPage) 29 | self.seqMatcher.set_seq2(secondPage) 30 | ratio = self.seqMatcher.quick_ratio() 31 | # In case of an intolerable difference turn on dynamicity removal engine 32 | if ratio <= self.UPPER_RATIO_BOUND: 33 | self.diff_marks += self.compare_diff(firstPage, secondPage) 34 | for i in range(self.comparisons): 35 | response = self.requester.request(self.keyCallback) 36 | secondPage = response.body 37 | self.diff_marks += self.compare_diff(firstPage, secondPage) 38 | self.cleanPage = self.remove_dynamic_content(firstPage, self.diff_marks) 39 | self.seqMatcher.set_seq1(self.cleanPage) 40 | self.seqMatcher.set_seq2(self.remove_dynamic_content(secondPage, self.diff_marks)) 41 | ratio = self.seqMatcher.quick_ratio() 42 | else: 43 | self.cleanPage = firstPage 44 | self.comparisonRatio = ratio 45 | 46 | def relative_distance(self, page): 47 | seqMatcher = SequenceMatcher() 48 | seqMatcher.set_seq1(self.cleanPage) 49 | seqMatcher.set_seq2(self.remove_dynamic_content(page, self.diff_marks)) 50 | ratio = seqMatcher.quick_ratio() 51 | return ratio 52 | 53 | def compare_diff(self, firstPage, secondPage): 54 | diff_marks = [] 55 | 56 | blocks = list(SequenceMatcher(None, firstPage, secondPage).get_matching_blocks()) 57 | 58 | # Removing too small matching blocks 59 | for block in blocks[:]: 60 | (_, _, length) = block 61 | 62 | if length <= self.DYNAMICITY_MARK_LENGTH: 63 | blocks.remove(block) 64 | 65 | # Making of dynamic markings based on prefix/suffix principle 66 | if len(blocks) > 0: 67 | blocks.insert(0, None) 68 | blocks.append(None) 69 | 70 | for i in range(len(blocks) - 1): 71 | prefix = firstPage[blocks[i][0]:blocks[i][0] + blocks[i][2]] if blocks[i] else None 72 | suffix = firstPage[blocks[i + 1][0]:blocks[i + 1][0] + blocks[i + 1][2]] if blocks[i + 1] else None 73 | 74 | if prefix is None and blocks[i + 1][0] == 0: 75 | continue 76 | 77 | if suffix is None and (blocks[i][0] + blocks[i][2] >= len(firstPage)): 78 | continue 79 | 80 | diff_marks.append((re.escape(prefix[int(-self.DYNAMICITY_MARK_LENGTH / 2):]) if prefix else None, 81 | re.escape(suffix[:int(self.DYNAMICITY_MARK_LENGTH / 2)]) if suffix else None)) 82 | 83 | return diff_marks 84 | 85 | def remove_dynamic_content(self, page, diff_marks): 86 | """ 87 | Removing dynamic content from supplied page basing removal on 88 | precalculated dynamic markings 89 | """ 90 | if page: 91 | encoding = chardet.detect(page)['encoding'] 92 | page = page.decode(encoding, errors='replace') 93 | for item in diff_marks: 94 | prefix, suffix = item 95 | if prefix is not None: 96 | prefix = prefix.decode(encoding, errors='replace') 97 | if suffix is not None: 98 | suffix = suffix.decode(encoding, errors='replace') 99 | 100 | if prefix is None and suffix is None: 101 | continue 102 | elif prefix is None: 103 | page = re.sub(r'(?s)^.+{0}'.format(re.escape(suffix)), suffix.replace('\\', r'\\'), page) 104 | elif suffix is None: 105 | page = re.sub(r'(?s){0}.+$'.format(re.escape(prefix)), prefix.replace('\\', r'\\'), page) 106 | else: 107 | page = re.sub(r'(?s){0}.+{1}'.format(re.escape(prefix), re.escape(suffix)), "{0}{1}".format(prefix.replace('\\', r'\\'), suffix.replace('\\', r'\\')), page) 108 | 109 | 110 | return page 111 | -------------------------------------------------------------------------------- /lib/common/myexception.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | class RequestException(Exception): 7 | pass 8 | 9 | class SkipTargetInterrupt(Exception): 10 | pass -------------------------------------------------------------------------------- /lib/common/myfile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # This program is free software; you can redistribute it and/or modify 3 | # it under the terms of the GNU General Public License as published by 4 | # the Free Software Foundation; either version 2 of the License, or 5 | # (at your option) any later version. 6 | # 7 | # This program is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | # GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License 13 | # along with this program; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 15 | # MA 02110-1301, USA. 16 | # 17 | # Author: Mauro Soria 18 | 19 | import os 20 | import os.path 21 | 22 | 23 | class File(object): 24 | def __init__(self, *pathComponents): 25 | self._path = FileUtils.buildPath(*pathComponents) 26 | self.content = None 27 | 28 | @property 29 | def path(self): 30 | return self._path 31 | 32 | @path.setter 33 | def path(self, value): 34 | raise NotImplemented 35 | 36 | def isValid(self): 37 | return FileUtils.isFile(self.path) 38 | 39 | def exists(self): 40 | return FileUtils.exists(self.path) 41 | 42 | def canRead(self): 43 | return FileUtils.canRead(self.path) 44 | 45 | def canWrite(self): 46 | return FileUtils.canWrite(self.path) 47 | 48 | def read(self): 49 | return FileUtils.read(self.path) 50 | 51 | def update(self): 52 | self.content = self.read() 53 | 54 | def content(self): 55 | if not self.content: 56 | self.content = FileUtils.read() 57 | return self.content() 58 | 59 | def getLines(self): 60 | for line in FileUtils.getLines(self.path): 61 | yield line 62 | 63 | def __cmp__(self, other): 64 | if not isinstance(other, File): 65 | raise NotImplemented 66 | return cmp(self.content(), other.content()) 67 | 68 | def __enter__(self): 69 | return self 70 | 71 | def __exit__(self, type, value, tb): 72 | pass 73 | 74 | 75 | class FileUtils(object): 76 | @staticmethod 77 | def buildPath(*pathComponents): 78 | if pathComponents: 79 | path = os.path.join(*pathComponents) 80 | else: 81 | path = '' 82 | return path 83 | 84 | @staticmethod 85 | def exists(fileName): 86 | return os.access(fileName, os.F_OK) 87 | 88 | @staticmethod 89 | def canRead(fileName): 90 | if not os.access(fileName, os.R_OK): 91 | return False 92 | try: 93 | with open(fileName): 94 | pass 95 | except IOError: 96 | return False 97 | return True 98 | 99 | @staticmethod 100 | def canWrite(fileName): 101 | return os.access(fileName, os.W_OK) 102 | 103 | @staticmethod 104 | def read(fileName): 105 | result = '' 106 | with open(fileName, 'r') as fd: 107 | for line in fd.readlines(): 108 | result += line 109 | return result 110 | 111 | @staticmethod 112 | def getLines(fileName): 113 | with open(fileName, 'r', errors="replace") as fd: 114 | return fd.read().splitlines() 115 | 116 | @staticmethod 117 | def isDir(fileName): 118 | return os.path.isdir(fileName) 119 | 120 | @staticmethod 121 | def isFile(fileName): 122 | return os.path.isfile(fileName) 123 | 124 | @staticmethod 125 | def createDirectory(directory): 126 | if not FileUtils.exists(directory): 127 | os.makedirs(directory) 128 | 129 | @staticmethod 130 | def sizeHuman(num): 131 | base = 1024 132 | for x in ['B ', 'KB', 'MB', 'GB']: 133 | if num < base and num > -base: 134 | return "%3.0f%s" % (num, x) 135 | num /= base 136 | return "%3.0f %s" % (num, 'TB') 137 | 138 | @staticmethod 139 | def writeLines(fileName, lines): 140 | content = None 141 | if type(lines) is list: 142 | content = "\n".join(lines) 143 | else: 144 | content = lines 145 | with open(fileName, "w") as f: 146 | f.writelines(content) 147 | -------------------------------------------------------------------------------- /lib/common/output.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | import threading 7 | import time 8 | import sys 9 | import platform 10 | from urlparse import urljoin 11 | 12 | from lib.common.myfile import * 13 | from thirdparty_libs.colorama import * 14 | from lib.common.terminalsize import get_terminal_size 15 | 16 | if platform.system() == 'Windows': 17 | from thirdparty_libs.colorama.win32 import * 18 | 19 | 20 | class ConsoleOutput(object): 21 | def __init__(self): 22 | init() 23 | self.lastLength = 0 24 | self.lastOutput = '' 25 | self.lastInLine = False 26 | self.mutex = threading.Lock() 27 | self.blacklists = {} 28 | self.mutexCheckedPaths = threading.Lock() 29 | self.basePath = None 30 | self.errors = 0 31 | 32 | def inLine(self, string): 33 | self.erase() 34 | sys.stdout.write(string) 35 | sys.stdout.flush() 36 | self.lastInLine = True 37 | 38 | def erase(self): 39 | if platform.system() == 'Windows': 40 | csbi = GetConsoleScreenBufferInfo() 41 | line = "\b" * int(csbi.dwCursorPosition.X) 42 | sys.stdout.write(line) 43 | width = csbi.dwCursorPosition.X 44 | csbi.dwCursorPosition.X = 0 45 | FillConsoleOutputCharacter(STDOUT, ' ', width, csbi.dwCursorPosition) 46 | sys.stdout.write(line) 47 | sys.stdout.flush() 48 | else: 49 | sys.stdout.write('\033[1K') 50 | sys.stdout.write('\033[0G') 51 | 52 | def newLine(self, string): 53 | if self.lastInLine == True: 54 | self.erase() 55 | if platform.system() == 'Windows': 56 | sys.stdout.write(string) 57 | sys.stdout.flush() 58 | sys.stdout.write('\n') 59 | sys.stdout.flush() 60 | else: 61 | sys.stdout.write(string + '\n') 62 | sys.stdout.flush() 63 | self.lastInLine = False 64 | sys.stdout.flush() 65 | 66 | def statusReport(self, path, response): 67 | with self.mutex: 68 | contentLength = None 69 | status = response.status 70 | 71 | # Check blacklist 72 | if status in self.blacklists and path in self.blacklists[status]: 73 | return 74 | 75 | # Format message 76 | try: 77 | size = int(response.headers['content-length']) 78 | except (KeyError, ValueError): 79 | size = len(response.body) 80 | finally: 81 | contentLength = FileUtils.sizeHuman(size) 82 | 83 | if self.basePath is None: 84 | showPath = urljoin("/", path) 85 | else: 86 | showPath = urljoin("/", self.basePath) 87 | showPath = urljoin(showPath, path) 88 | message = '[{0}] {1} - {2} - {3}'.format( 89 | time.strftime('%H:%M:%S'), 90 | status, 91 | contentLength.rjust(6, ' '), 92 | showPath 93 | ) 94 | 95 | if status == 200: 96 | message = Fore.GREEN + message + Style.RESET_ALL 97 | elif status == 403: 98 | message = Fore.BLUE + message + Style.RESET_ALL 99 | elif status == 401: 100 | message = Fore.YELLOW + message + Style.RESET_ALL 101 | # Check if redirect 102 | elif status in [301, 302, 307] and 'location' in [h.lower() for h in response.headers]: 103 | message = Fore.CYAN + message + Style.RESET_ALL 104 | message += ' -> {0}'.format(response.headers['location']) 105 | 106 | self.newLine(message) 107 | 108 | def lastPath(self, path, index, length): 109 | with self.mutex: 110 | percentage = lambda x, y: float(x) / float(y) * 100 111 | x, y = get_terminal_size() 112 | message = '{0:.2f}% - '.format(percentage(index, length)) 113 | if self.errors > 0: 114 | message += Style.BRIGHT + Fore.RED 115 | message += 'Errors: {0}'.format(self.errors) 116 | message += Style.RESET_ALL 117 | message += ' - ' 118 | message += 'Last request to: {0}'.format(path) 119 | if len(message) > x: 120 | message = message[:x] 121 | self.inLine(message) 122 | 123 | def addConnectionError(self): 124 | self.errors += 1 125 | 126 | def error(self, reason): 127 | with self.mutex: 128 | stripped = reason.strip() 129 | start = reason.find(stripped[0]) 130 | end = reason.find(stripped[-1]) + 1 131 | message = reason[0:start] 132 | message += Style.BRIGHT + Fore.WHITE + Back.RED 133 | message += reason[start:end] 134 | message += Style.RESET_ALL 135 | message += reason[end:] 136 | self.newLine(message) 137 | 138 | def warning(self, reason): 139 | message = Style.BRIGHT + Fore.YELLOW + reason + Style.RESET_ALL 140 | self.newLine(message) 141 | 142 | def header(self, text): 143 | message = Style.BRIGHT + Fore.MAGENTA + text + Style.RESET_ALL 144 | self.newLine(message) 145 | 146 | def config(self, extensions, threads, wordlistSize): 147 | separator = Fore.MAGENTA + ' | ' + Fore.YELLOW 148 | config = Style.BRIGHT + Fore.YELLOW 149 | config += 'Extensions: {0}'.format(Fore.CYAN + extensions + Fore.YELLOW) 150 | config += separator 151 | config += 'Threads: {0}'.format(Fore.CYAN + threads + Fore.YELLOW) 152 | config += separator 153 | config += 'Wordlist size: {0}'.format(Fore.CYAN + wordlistSize + Fore.YELLOW) 154 | config += Style.RESET_ALL 155 | self.newLine(config) 156 | 157 | def target(self, target): 158 | config = Style.BRIGHT + Fore.YELLOW 159 | config += '\nTarget: {0}\n'.format(Fore.CYAN + target + Fore.YELLOW) 160 | config += Style.RESET_ALL 161 | self.newLine(config) 162 | 163 | def debug(self, info): 164 | line = "[{0}] - {1}".format(time.strftime('%H:%M:%S'), info) 165 | self.newLine(line) 166 | 167 | output = ConsoleOutput() -------------------------------------------------------------------------------- /lib/common/terminalsize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | import os 7 | import shlex 8 | import struct 9 | import platform 10 | import subprocess 11 | 12 | 13 | def get_terminal_size(): 14 | """ getTerminalSize() 15 | - get width and height of console 16 | - works on linux,os x,windows,cygwin(windows) 17 | originally retrieved from: 18 | http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python 19 | """ 20 | current_os = platform.system() 21 | tuple_xy = None 22 | if current_os == 'Windows': 23 | tuple_xy = _get_terminal_size_windows() 24 | if tuple_xy is None: 25 | tuple_xy = _get_terminal_size_tput() 26 | # needed for window's python in cygwin's xterm! 27 | if current_os in ['Linux', 'Darwin', 'FreeBSD'] or current_os.startswith('CYGWIN'): 28 | tuple_xy = _get_terminal_size_linux() 29 | if tuple_xy is None: 30 | tuple_xy = (80, 25) # default value 31 | return tuple_xy 32 | 33 | 34 | def _get_terminal_size_windows(): 35 | try: 36 | from ctypes import windll, create_string_buffer 37 | # stdin handle is -10 38 | # stdout handle is -11 39 | # stderr handle is -12 40 | h = windll.kernel32.GetStdHandle(-12) 41 | csbi = create_string_buffer(22) 42 | res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi) 43 | if res: 44 | (bufx, bufy, curx, cury, wattr, 45 | left, top, right, bottom, 46 | maxx, maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw) 47 | sizex = right - left + 1 48 | sizey = bottom - top + 1 49 | return sizex, sizey 50 | except: 51 | pass 52 | 53 | 54 | def _get_terminal_size_tput(): 55 | # get terminal width 56 | # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width-height-of-a-terminal-window 57 | try: 58 | cols = int(subprocess.check_call(shlex.split('tput cols'))) 59 | rows = int(subprocess.check_call(shlex.split('tput lines'))) 60 | return (cols, rows) 61 | except: 62 | pass 63 | 64 | 65 | def _get_terminal_size_linux(): 66 | def ioctl_GWINSZ(fd): 67 | try: 68 | import fcntl 69 | import termios 70 | cr = struct.unpack('hh', 71 | fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234')) 72 | return cr 73 | except: 74 | pass 75 | cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2) 76 | if not cr: 77 | try: 78 | fd = os.open(os.ctermid(), os.O_RDONLY) 79 | cr = ioctl_GWINSZ(fd) 80 | os.close(fd) 81 | except: 82 | pass 83 | if not cr: 84 | try: 85 | cr = (os.environ['LINES'], os.environ['COLUMNS']) 86 | except: 87 | return None 88 | return int(cr[1]), int(cr[0]) 89 | 90 | if __name__ == "__main__": 91 | sizex, sizey = get_terminal_size() 92 | print('width =', sizex, 'height =', sizey) 93 | -------------------------------------------------------------------------------- /lib/common/util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | 7 | import random 8 | import string 9 | 10 | 11 | class RandomUtils(object): 12 | @classmethod 13 | def randString(cls, n=12, omit=None): 14 | seq = string.ascii_lowercase + string.ascii_uppercase + string.digits 15 | if omit: 16 | seq = list(set(seq) - set(omit)) 17 | return ''.join(random.choice(seq) for _ in range(n)) -------------------------------------------------------------------------------- /lib/controller/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /lib/controller/controller.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | from lib.core.argument import ArgumentParse as Argument 7 | from lib.controller.scanner import Scanner 8 | 9 | from lib.common.output import ConsoleOutput 10 | from lib.net.myrequests import Requester 11 | from lib.common.myexception import RequestException, SkipTargetInterrupt 12 | from lib.common.output import output 13 | 14 | class Controller(object): 15 | 16 | def __init__(self, url): 17 | 18 | self.arguments = Argument(url) 19 | 20 | output.debug('Start scan......') 21 | try: 22 | self.requester = Requester(url, cookie = self.arguments.cookie, 23 | useragent = self.arguments.useragent, 24 | maxPool = self.arguments.max_threads, 25 | maxRetries = self.arguments.max_retrys, 26 | delay = self.arguments.delay, 27 | timeout = self.arguments.http_timeout, 28 | proxy=self.arguments.proxy, 29 | redirect = True) 30 | self.requester.request("/") 31 | 32 | except RequestException as e: 33 | output.error(e.args[0]['message']) 34 | raise SkipTargetInterrupt 35 | 36 | #matchCallbacks = [self.matchCallback] 37 | 38 | self.scanner = Scanner(self.requester, 39 | concurrent_num = 20, 40 | internal_timeout = 60, 41 | dictionary = {'dir_dic' : self.arguments.dir_dic, 42 | 'file_dic' : self.arguments.file_dic, 43 | 'bakdir_exts' : self.arguments.bakdir_exts, 44 | 'bakfile_exts' : self.arguments.bakfile_exts}, 45 | match_callbacks = []) 46 | 47 | try: 48 | self.run() 49 | except RequestException as e: 50 | output.error("Fatal error during site scanning: " + e.args[0]['message']) 51 | raise SkipTargetInterrupt 52 | finally: 53 | pass 54 | 55 | output.warning('\nTask Completed') 56 | 57 | 58 | def run(self): 59 | self.scanner.start() -------------------------------------------------------------------------------- /lib/core/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /lib/core/argument.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | from ConfigParser import ConfigParser 7 | 8 | class ArgumentParse(object): 9 | 10 | def __init__(self, url): 11 | self.cookie = '' 12 | self.useragent = '' 13 | self.max_threads = 5 14 | self.max_retrys = 3 15 | self.delay = 0.5 16 | self.http_timeout = 30 17 | 18 | self.proxy = None 19 | 20 | conf = ConfigParser() 21 | conf.read("config.conf") 22 | self.bakdir_exts = eval(conf.get('dict', 'bakdir_exts')) 23 | self.bakfile_exts = eval(conf.get('dict', 'bakfile_exts')) 24 | self.__load_scan_dic(url, conf.get('dict', 'web_dic_path'), conf.get('dict', 'path_dic_path')) 25 | 26 | def __load_scan_dic(self, url, path_dic, file_dic): 27 | ''' 28 | 加载路径探测字典 29 | :param path_dic: 30 | :param file_dic:/ 31 | :return: 32 | ''' 33 | from urlparse import urlparse 34 | from IPy import IP 35 | with open(path_dic, 'r') as file: 36 | self.dir_dic = list(set([each.strip(' \r\n') for each in file.readlines()])) 37 | file.close() 38 | 39 | with open(file_dic, 'r') as file: 40 | self.file_dic = list(set([each.strip(' \r\n') for each in file.readlines()])) 41 | try: 42 | IP(urlparse(url).netloc.split(':')[0]) #域名形式 www.baidu.com.tar.gz 43 | except ValueError: 44 | self.file_dic.extend(['%s%s' % (urlparse(url).netloc.split(':')[0], webfile) for webfile in self.bakdir_exts]) 45 | file.close() 46 | 47 | -------------------------------------------------------------------------------- /lib/core/webscan.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | 7 | from lib.common.util import RandomUtils 8 | from lib.common.fuzzy_string_cmp import DynamicContentParser 9 | from difflib import SequenceMatcher 10 | 11 | import re 12 | 13 | class WebScan(object): 14 | 15 | def __init__(self, requester, test_path = None, suffix = None, bdir = False): 16 | 17 | ''' 18 | if test_path is None or test_path is "": 19 | self.test_path = RandomUtils.randString() 20 | else: 21 | self.test_path = test_path 22 | ''' 23 | self.test_path = test_path if test_path is not None else "" 24 | if suffix is None: 25 | self.suffix = RandomUtils.randString() 26 | else: 27 | self.suffix = suffix 28 | 29 | self.bdir = bdir 30 | self.requester = requester 31 | self.tester = None 32 | self.redirect_regexp = None 33 | self.invalid_status = None 34 | self.dynamic_parser = None 35 | self.ratio = 0.98 36 | self.redirect_status_codes = [301, 302, 307] 37 | self.__init_env() 38 | 39 | def __init_env(self): 40 | first_path = self.test_path + self.suffix + '%s' % ('/' if self.bdir else '') 41 | first_response = second_response = None 42 | try: 43 | first_response = self.requester.request(first_path) 44 | except Exception,e: 45 | return 46 | self.invalid_status = first_response.status 47 | if self.invalid_status == 404: 48 | # Using the response status code is enough :-} 49 | return 50 | 51 | # look for redirects 52 | second_path = self.test_path + RandomUtils.randString(omit=self.test_path) + '%s' % ('/' if self.bdir else '') 53 | try: 54 | second_response = self.requester.request(second_path) 55 | except Exception: 56 | return 57 | if first_response.status in self.redirect_status_codes and first_response.redirect and second_response.redirect: 58 | self.redirect_regexp = self.generate_redirect_regexp(first_response.redirect, second_response.redirect) 59 | 60 | # Analyze response bodies 61 | self.dynamic_parser = DynamicContentParser(self.requester, first_path, first_response.body, second_response.body) 62 | base_ratio = float("{0:.2f}".format(self.dynamic_parser.comparisonRatio)) # Rounding to 2 decimals 63 | # If response length is small, adjust ratio 64 | if len(first_response) < 2000: 65 | base_ratio -= 0.1 66 | if base_ratio < self.ratio: 67 | self.ratio = base_ratio 68 | 69 | def generate_redirect_regexp(self, first_location, second_location): 70 | if first_location is None or second_location is None: 71 | return None 72 | sm = SequenceMatcher(None, first_location, second_location) 73 | marks = [] 74 | for blocks in sm.get_matching_blocks(): 75 | i = blocks[0] 76 | n = blocks[2] 77 | # empty block 78 | if n == 0: 79 | continue 80 | mark = first_location[i:i + n] 81 | marks.append(mark) 82 | regexp = "^.*{0}.*$".format(".*".join(map(re.escape, marks))) 83 | return regexp 84 | 85 | def scan(self, path): 86 | response = None 87 | try: 88 | response = self.requester.request(path) 89 | except Exception: 90 | return False 91 | 92 | if hasattr(response.headers, 'Content-Length') and not int(response.headers.get('Content-Length')): #过滤掉空白页面 93 | return False 94 | if self.invalid_status == 404 and response.status == 404: 95 | return False 96 | if response.status >= 400 and response.status < 404: 97 | return False 98 | if self.invalid_status != response.status: 99 | return True 100 | redirect_to_invalid = False 101 | if self.redirect_regexp is not None and response.redirect is not None: 102 | redirect_to_invalid = re.match(self.redirect_regexp, response.redirect) is not None 103 | # If redirection doesn't match the rule, mark as found 104 | if not redirect_to_invalid: 105 | return True 106 | 107 | ratio = self.dynamic_parser.relative_distance(response.body) 108 | if ratio >= self.ratio: 109 | return False 110 | elif redirect_to_invalid and ratio >= (self.ratio - 0.15): 111 | return False 112 | return True 113 | -------------------------------------------------------------------------------- /lib/net/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /lib/net/myrequests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | import random 7 | import socket 8 | import time 9 | 10 | from urlparse import urlparse, urljoin 11 | import thirdparty_libs.requests as requests 12 | from requests.exceptions import RequestException 13 | from lib.net.myresponse import * 14 | from lib.common.myexception import * 15 | 16 | 17 | class Requester(object): 18 | 19 | headers = { 20 | 'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36', 21 | 'Accept-Language': 'en-us', 22 | 'Accept-Encoding': 'identity', 23 | 'Keep-Alive': '300', 24 | 'Connection': 'keep-alive', 25 | 'Cache-Control': 'max-age=0', 26 | } 27 | 28 | def __init__(self, url, 29 | cookie = None, 30 | useragent = None, 31 | maxPool = 1, 32 | maxRetries = 5, 33 | delay = 0, 34 | timeout = 60, 35 | proxy = None, 36 | redirect = True): 37 | # if no backslash, append one 38 | if not url.endswith('/'): 39 | url = url + '/' 40 | parsed = urlparse(url) 41 | self.basePath = parsed.path 42 | 43 | # if not protocol specified, set http by default 44 | if parsed.scheme != 'http' and parsed.scheme != 'https': 45 | parsed = urlparse('http://' + url) 46 | self.basePath = parsed.path 47 | self.protocol = parsed.scheme 48 | if self.protocol != 'http' and self.protocol != 'https': 49 | self.protocol = 'http' 50 | self.host = parsed.netloc.split(':')[0] 51 | 52 | # resolve DNS to decrease overhead 53 | ''' 54 | if ip is not None: 55 | self.ip = ip 56 | else: 57 | try: 58 | self.ip = socket.gethostbyname(self.host) 59 | except socket.gaierror: 60 | raise RequestException({'message': "Couldn't resolve DNS"}) 61 | ''' 62 | self.ip = None 63 | self.headers['Host'] = self.host 64 | 65 | # If no port specified, set default (80, 443) 66 | try: 67 | self.port = parsed.netloc.split(':')[1] 68 | except IndexError: 69 | self.port = (443 if self.protocol == 'https' else 80) 70 | 71 | # Set cookie and user-agent headers 72 | if cookie is not None: 73 | self.setHeader('Cookie', cookie) 74 | if useragent is not None: 75 | self.setHeader('User-agent', useragent) 76 | self.maxRetries = maxRetries 77 | self.maxPool = maxPool 78 | self.delay = delay 79 | self.timeout = timeout 80 | self.pool = None 81 | self.proxy = proxy 82 | self.redirect = redirect 83 | self.randomAgents = None 84 | self.session = requests.Session() 85 | 86 | def setHeader(self, header, content): 87 | self.headers[header] = content 88 | 89 | def setRandomAgents(self, agents): 90 | self.randomAgents = list(agents) 91 | 92 | def unsetRandomAgents(self): 93 | self.randomAgents = None 94 | 95 | def request(self, path): 96 | i = 0 97 | proxy = None 98 | result = None 99 | while i <= self.maxRetries: 100 | try: 101 | if self.proxy is not None: 102 | proxy = {"https" : self.proxy, "http" : self.proxy} 103 | if True: 104 | url = "{0}://{1}:{2}".format(self.protocol, self.host, self.port) 105 | else: 106 | url = "{0}://{1}:{2}".format(self.protocol, self.ip, self.port) 107 | url = urljoin(url, self.basePath) 108 | 109 | # Joining with concatenation because a urljoin bug with "::" 110 | if not url.endswith('/'): 111 | url += "/" 112 | if path.startswith('/'): 113 | path = path[1:] 114 | url += path 115 | 116 | headers = dict(self.headers) 117 | ''' 118 | if self.randomAgents is not None: 119 | headers["User-agent"] = random.choice(self.randomAgents) 120 | ''' 121 | headers["Host"] = self.host 122 | # include port in Host header if it's non-standard 123 | if (self.protocol == "https" and self.port != 443) or (self.protocol == "http" and self.port != 80): 124 | headers["Host"]+=":{0}".format(self.port) 125 | 126 | response = self.session.get(url, proxies=proxy, verify=False, allow_redirects=self.redirect, \ 127 | headers=headers, timeout=self.timeout) 128 | result = Response(response.status_code, response.reason, response.headers, response.content) 129 | time.sleep(self.delay) 130 | del headers 131 | break 132 | except RequestException,ex: 133 | continue 134 | finally: 135 | i = i + 1 136 | if i > self.maxRetries: 137 | raise RequestException(\ 138 | {'message': 'CONNECTION TIMEOUT: There was a problem in the request to: {0}'.format(path)} 139 | ) 140 | return result 141 | -------------------------------------------------------------------------------- /lib/net/myresponse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | class Response(object): 7 | 8 | def __init__(self, status, reason, headers, body): 9 | self.status = status 10 | self.reason = reason 11 | self.headers = headers 12 | self.body = body 13 | 14 | def __str__(self): 15 | return self.body 16 | 17 | def __int__(self): 18 | return self.status 19 | 20 | def __eq__(self, other): 21 | return self.status == other.status and self.body == other.body 22 | 23 | def __cmp__(self, other): 24 | return (self.body > other) - (self.body < other) 25 | 26 | def __len__(self): 27 | return len(self.body) 28 | 29 | def __hash__(self): 30 | return hash(self.body) 31 | 32 | def __del__(self): 33 | del self.body 34 | del self.headers 35 | del self.status 36 | del self.reason 37 | 38 | @property 39 | def redirect(self): 40 | headers = dict((key.lower(), value) for key, value in self.headers.items()) 41 | return headers.get("location") 42 | 43 | @property 44 | def pretty(self): 45 | try: 46 | from BeautifulSoup import BeautifulSoup 47 | except ImportError: 48 | raise Exception('BeautifulSoup must be installed to get pretty HTML =(') 49 | html = BeautifulSoup(self.body) 50 | return html.prettify() -------------------------------------------------------------------------------- /logs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /result/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /thirdparty_libs/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | 19 | from .compat import PY2, PY3 20 | from .universaldetector import UniversalDetector 21 | from .version import __version__, VERSION 22 | 23 | 24 | def detect(byte_str): 25 | """ 26 | Detect the encoding of the given byte string. 27 | 28 | :param byte_str: The byte sequence to examine. 29 | :type byte_str: ``bytes`` or ``bytearray`` 30 | """ 31 | if not isinstance(byte_str, bytearray): 32 | if not isinstance(byte_str, bytes): 33 | raise TypeError('Expected object of type bytes or bytearray, got: ' 34 | '{0}'.format(type(byte_str))) 35 | else: 36 | byte_str = bytearray(byte_str) 37 | detector = UniversalDetector() 38 | detector.feed(byte_str) 39 | return detector.close() 40 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import BIG5_SM_MODEL 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(Big5Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) 38 | self.distribution_analyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "Big5" 44 | 45 | @property 46 | def language(self): 47 | return "Chinese" 48 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState 29 | from .charsetprober import CharSetProber 30 | 31 | 32 | class CharSetGroupProber(CharSetProber): 33 | def __init__(self, lang_filter=None): 34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) 35 | self._active_num = 0 36 | self.probers = [] 37 | self._best_guess_prober = None 38 | 39 | def reset(self): 40 | super(CharSetGroupProber, self).reset() 41 | self._active_num = 0 42 | for prober in self.probers: 43 | if prober: 44 | prober.reset() 45 | prober.active = True 46 | self._active_num += 1 47 | self._best_guess_prober = None 48 | 49 | @property 50 | def charset_name(self): 51 | if not self._best_guess_prober: 52 | self.get_confidence() 53 | if not self._best_guess_prober: 54 | return None 55 | return self._best_guess_prober.charset_name 56 | 57 | @property 58 | def language(self): 59 | if not self._best_guess_prober: 60 | self.get_confidence() 61 | if not self._best_guess_prober: 62 | return None 63 | return self._best_guess_prober.language 64 | 65 | def feed(self, byte_str): 66 | for prober in self.probers: 67 | if not prober: 68 | continue 69 | if not prober.active: 70 | continue 71 | state = prober.feed(byte_str) 72 | if not state: 73 | continue 74 | if state == ProbingState.FOUND_IT: 75 | self._best_guess_prober = prober 76 | return self.state 77 | elif state == ProbingState.NOT_ME: 78 | prober.active = False 79 | self._active_num -= 1 80 | if self._active_num <= 0: 81 | self._state = ProbingState.NOT_ME 82 | return self.state 83 | return self.state 84 | 85 | def get_confidence(self): 86 | state = self.state 87 | if state == ProbingState.FOUND_IT: 88 | return 0.99 89 | elif state == ProbingState.NOT_ME: 90 | return 0.01 91 | best_conf = 0.0 92 | self._best_guess_prober = None 93 | for prober in self.probers: 94 | if not prober: 95 | continue 96 | if not prober.active: 97 | self.logger.debug('%s not active', prober.charset_name) 98 | continue 99 | conf = prober.get_confidence() 100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) 101 | if best_conf < conf: 102 | best_conf = conf 103 | self._best_guess_prober = prober 104 | if not self._best_guess_prober: 105 | return 0.0 106 | return best_conf 107 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import logging 30 | import re 31 | 32 | from .enums import ProbingState 33 | 34 | 35 | class CharSetProber(object): 36 | 37 | SHORTCUT_THRESHOLD = 0.95 38 | 39 | def __init__(self, lang_filter=None): 40 | self._state = None 41 | self.lang_filter = lang_filter 42 | self.logger = logging.getLogger(__name__) 43 | 44 | def reset(self): 45 | self._state = ProbingState.DETECTING 46 | 47 | @property 48 | def charset_name(self): 49 | return None 50 | 51 | def feed(self, buf): 52 | pass 53 | 54 | @property 55 | def state(self): 56 | return self._state 57 | 58 | def get_confidence(self): 59 | return 0.0 60 | 61 | @staticmethod 62 | def filter_high_byte_only(buf): 63 | buf = re.sub(b'([\x00-\x7F])+', b' ', buf) 64 | return buf 65 | 66 | @staticmethod 67 | def filter_international_words(buf): 68 | """ 69 | We define three types of bytes: 70 | alphabet: english alphabets [a-zA-Z] 71 | international: international characters [\x80-\xFF] 72 | marker: everything else [^a-zA-Z\x80-\xFF] 73 | 74 | The input buffer can be thought to contain a series of words delimited 75 | by markers. This function works to filter all words that contain at 76 | least one international character. All contiguous sequences of markers 77 | are replaced by a single space ascii character. 78 | 79 | This filter applies to all scripts which do not use English characters. 80 | """ 81 | filtered = bytearray() 82 | 83 | # This regex expression filters out only words that have at-least one 84 | # international character. The word may include one marker character at 85 | # the end. 86 | words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', 87 | buf) 88 | 89 | for word in words: 90 | filtered.extend(word[:-1]) 91 | 92 | # If the last character in the word is a marker, replace it with a 93 | # space as markers shouldn't affect our analysis (they are used 94 | # similarly across all languages and may thus have similar 95 | # frequencies). 96 | last_char = word[-1:] 97 | if not last_char.isalpha() and last_char < b'\x80': 98 | last_char = b' ' 99 | filtered.extend(last_char) 100 | 101 | return filtered 102 | 103 | @staticmethod 104 | def filter_with_english_letters(buf): 105 | """ 106 | Returns a copy of ``buf`` that retains only the sequences of English 107 | alphabet and high byte characters that are not between <> characters. 108 | Also retains English alphabet and high byte characters immediately 109 | before occurrences of >. 110 | 111 | This filter can be applied to all scripts which contain both English 112 | characters and extended ASCII characters, but is currently only used by 113 | ``Latin1Prober``. 114 | """ 115 | filtered = bytearray() 116 | in_tag = False 117 | prev = 0 118 | 119 | for curr in range(len(buf)): 120 | # Slice here to get bytes instead of an int with Python 3 121 | buf_char = buf[curr:curr + 1] 122 | # Check if we're coming out of or entering an HTML tag 123 | if buf_char == b'>': 124 | in_tag = False 125 | elif buf_char == b'<': 126 | in_tag = True 127 | 128 | # If current character is not extended-ASCII and not alphabetic... 129 | if buf_char < b'\x80' and not buf_char.isalpha(): 130 | # ...and we're not in a tag 131 | if curr > prev and not in_tag: 132 | # Keep everything after last non-extended-ASCII, 133 | # non-alphabetic character 134 | filtered.extend(buf[prev:curr]) 135 | # Output a space to delimit stretch we kept 136 | filtered.extend(b' ') 137 | prev = curr + 1 138 | 139 | # If we're not in a tag... 140 | if not in_tag: 141 | # Keep everything after last non-extended-ASCII, non-alphabetic 142 | # character 143 | filtered.extend(buf[prev:]) 144 | 145 | return filtered 146 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/cli/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/cli/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | 21 | from chardet import __version__ 22 | from chardet.compat import PY2 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | line = bytearray(line) 39 | u.feed(line) 40 | # shortcut out of the loop to save reading further - particularly useful if we read a BOM. 41 | if u.done: 42 | break 43 | u.close() 44 | result = u.result 45 | if PY2: 46 | name = name.decode(sys.getfilesystemencoding(), 'ignore') 47 | if result['encoding']: 48 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 49 | result['confidence']) 50 | else: 51 | return '{0}: no result'.format(name) 52 | 53 | 54 | def main(argv=None): 55 | """ 56 | Handles command line arguments and gets things started. 57 | 58 | :param argv: List of arguments, as if specified on the command-line. 59 | If None, ``sys.argv[1:]`` is used instead. 60 | :type argv: list of str 61 | """ 62 | # Get command line arguments 63 | parser = argparse.ArgumentParser( 64 | description="Takes one or more file paths and reports their detected \ 65 | encodings") 66 | parser.add_argument('input', 67 | help='File whose encoding we would like to determine. \ 68 | (default: stdin)', 69 | type=argparse.FileType('rb'), nargs='*', 70 | default=[sys.stdin if PY2 else sys.stdin.buffer]) 71 | parser.add_argument('--version', action='version', 72 | version='%(prog)s {0}'.format(__version__)) 73 | args = parser.parse_args(argv) 74 | 75 | for f in args.input: 76 | if f.isatty(): 77 | print("You are running chardetect interactively. Press " + 78 | "CTRL-D twice at the start of a blank line to signal the " + 79 | "end of your input. If you want help, run chardetect " + 80 | "--help\n", file=sys.stderr) 81 | print(description_of(f, f.name)) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import logging 29 | 30 | from .enums import MachineState 31 | 32 | 33 | class CodingStateMachine(object): 34 | """ 35 | A state machine to verify a byte sequence for a particular encoding. For 36 | each byte the detector receives, it will feed that byte to every active 37 | state machine available, one byte at a time. The state machine changes its 38 | state based on its previous state and the byte it receives. There are 3 39 | states in a state machine that are of interest to an auto-detector: 40 | 41 | START state: This is the state to start with, or a legal byte sequence 42 | (i.e. a valid code point) for character has been identified. 43 | 44 | ME state: This indicates that the state machine identified a byte sequence 45 | that is specific to the charset it is designed for and that 46 | there is no other possible encoding which can contain this byte 47 | sequence. This will to lead to an immediate positive answer for 48 | the detector. 49 | 50 | ERROR state: This indicates the state machine identified an illegal byte 51 | sequence for that encoding. This will lead to an immediate 52 | negative answer for this encoding. Detector will exclude this 53 | encoding from consideration from here on. 54 | """ 55 | def __init__(self, sm): 56 | self._model = sm 57 | self._curr_byte_pos = 0 58 | self._curr_char_len = 0 59 | self._curr_state = None 60 | self.logger = logging.getLogger(__name__) 61 | self.reset() 62 | 63 | def reset(self): 64 | self._curr_state = MachineState.START 65 | 66 | def next_state(self, c): 67 | # for each byte we get its class 68 | # if it is first byte, we also get byte length 69 | byte_class = self._model['class_table'][c] 70 | if self._curr_state == MachineState.START: 71 | self._curr_byte_pos = 0 72 | self._curr_char_len = self._model['char_len_table'][byte_class] 73 | # from byte's class and state_table, we get its next state 74 | curr_state = (self._curr_state * self._model['class_factor'] 75 | + byte_class) 76 | self._curr_state = self._model['state_table'][curr_state] 77 | self._curr_byte_pos += 1 78 | return self._curr_state 79 | 80 | def get_current_charlen(self): 81 | return self._curr_char_len 82 | 83 | def get_coding_state_machine(self): 84 | return self._model['name'] 85 | 86 | @property 87 | def language(self): 88 | return self._model['language'] 89 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Dan Blanchard 4 | # Ian Cordasco 5 | # 6 | # This library is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU Lesser General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2.1 of the License, or (at your option) any later version. 10 | # 11 | # This library is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public 17 | # License along with this library; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 19 | # 02110-1301 USA 20 | ######################### END LICENSE BLOCK ######################### 21 | 22 | import sys 23 | 24 | 25 | if sys.version_info < (3, 0): 26 | PY2 = True 27 | PY3 = False 28 | base_str = (str, unicode) 29 | text_type = unicode 30 | else: 31 | PY2 = False 32 | PY3 = True 33 | base_str = (bytes, str) 34 | text_type = str 35 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .chardistribution import EUCKRDistributionAnalysis 29 | from .codingstatemachine import CodingStateMachine 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .mbcssm import CP949_SM_MODEL 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(CP949Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(CP949_SM_MODEL) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self.distribution_analyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | @property 44 | def charset_name(self): 45 | return "CP949" 46 | 47 | @property 48 | def language(self): 49 | return "Korean" 50 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | All of the Enums that are used throughout the chardet package. 3 | 4 | :author: Dan Blanchard (dan.blanchard@gmail.com) 5 | """ 6 | 7 | 8 | class InputState(object): 9 | """ 10 | This enum represents the different states a universal detector can be in. 11 | """ 12 | PURE_ASCII = 0 13 | ESC_ASCII = 1 14 | HIGH_BYTE = 2 15 | 16 | 17 | class LanguageFilter(object): 18 | """ 19 | This enum represents the different language filters we can apply to a 20 | ``UniversalDetector``. 21 | """ 22 | CHINESE_SIMPLIFIED = 0x01 23 | CHINESE_TRADITIONAL = 0x02 24 | JAPANESE = 0x04 25 | KOREAN = 0x08 26 | NON_CJK = 0x10 27 | ALL = 0x1F 28 | CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 29 | CJK = CHINESE | JAPANESE | KOREAN 30 | 31 | 32 | class ProbingState(object): 33 | """ 34 | This enum represents the different states a prober can be in. 35 | """ 36 | DETECTING = 0 37 | FOUND_IT = 1 38 | NOT_ME = 2 39 | 40 | 41 | class MachineState(object): 42 | """ 43 | This enum represents the different states a state machine can be in. 44 | """ 45 | START = 0 46 | ERROR = 1 47 | ITS_ME = 2 48 | 49 | 50 | class SequenceLikelihood(object): 51 | """ 52 | This enum represents the likelihood of a character following the previous one. 53 | """ 54 | NEGATIVE = 0 55 | UNLIKELY = 1 56 | LIKELY = 2 57 | POSITIVE = 3 58 | 59 | @classmethod 60 | def get_num_categories(cls): 61 | """:returns: The number of likelihood categories in the enum.""" 62 | return 4 63 | 64 | 65 | class CharacterCategory(object): 66 | """ 67 | This enum represents the different categories language models for 68 | ``SingleByteCharsetProber`` put characters into. 69 | 70 | Anything less than CONTROL is considered a letter. 71 | """ 72 | UNDEFINED = 255 73 | LINE_BREAK = 254 74 | SYMBOL = 253 75 | DIGIT = 252 76 | CONTROL = 251 77 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .enums import LanguageFilter, ProbingState, MachineState 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, 32 | ISO2022KR_SM_MODEL) 33 | 34 | 35 | class EscCharSetProber(CharSetProber): 36 | """ 37 | This CharSetProber uses a "code scheme" approach for detecting encodings, 38 | whereby easily recognizable escape or shift sequences are relied on to 39 | identify these encodings. 40 | """ 41 | 42 | def __init__(self, lang_filter=None): 43 | super(EscCharSetProber, self).__init__(lang_filter=lang_filter) 44 | self.coding_sm = [] 45 | if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: 46 | self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) 47 | self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) 48 | if self.lang_filter & LanguageFilter.JAPANESE: 49 | self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) 50 | if self.lang_filter & LanguageFilter.KOREAN: 51 | self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) 52 | self.active_sm_count = None 53 | self._detected_charset = None 54 | self._detected_language = None 55 | self._state = None 56 | self.reset() 57 | 58 | def reset(self): 59 | super(EscCharSetProber, self).reset() 60 | for coding_sm in self.coding_sm: 61 | if not coding_sm: 62 | continue 63 | coding_sm.active = True 64 | coding_sm.reset() 65 | self.active_sm_count = len(self.coding_sm) 66 | self._detected_charset = None 67 | self._detected_language = None 68 | 69 | @property 70 | def charset_name(self): 71 | return self._detected_charset 72 | 73 | @property 74 | def language(self): 75 | return self._detected_language 76 | 77 | def get_confidence(self): 78 | if self._detected_charset: 79 | return 0.99 80 | else: 81 | return 0.00 82 | 83 | def feed(self, byte_str): 84 | for c in byte_str: 85 | for coding_sm in self.coding_sm: 86 | if not coding_sm or not coding_sm.active: 87 | continue 88 | coding_state = coding_sm.next_state(c) 89 | if coding_state == MachineState.ERROR: 90 | coding_sm.active = False 91 | self.active_sm_count -= 1 92 | if self.active_sm_count <= 0: 93 | self._state = ProbingState.NOT_ME 94 | return self.state 95 | elif coding_state == MachineState.ITS_ME: 96 | self._state = ProbingState.FOUND_IT 97 | self._detected_charset = coding_sm.get_coding_state_machine() 98 | self._detected_language = coding_sm.language 99 | return self.state 100 | 101 | return self.state 102 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState, MachineState 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import EUCJPDistributionAnalysis 32 | from .jpcntx import EUCJPContextAnalysis 33 | from .mbcssm import EUCJP_SM_MODEL 34 | 35 | 36 | class EUCJPProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(EUCJPProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) 40 | self.distribution_analyzer = EUCJPDistributionAnalysis() 41 | self.context_analyzer = EUCJPContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(EUCJPProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return "EUC-JP" 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte 59 | coding_state = self.coding_sm.next_state(byte_str[i]) 60 | if coding_state == MachineState.ERROR: 61 | self.logger.debug('%s %s prober hit error at byte %s', 62 | self.charset_name, self.language, i) 63 | self._state = ProbingState.NOT_ME 64 | break 65 | elif coding_state == MachineState.ITS_ME: 66 | self._state = ProbingState.FOUND_IT 67 | break 68 | elif coding_state == MachineState.START: 69 | char_len = self.coding_sm.get_current_charlen() 70 | if i == 0: 71 | self._last_char[1] = byte_str[0] 72 | self.context_analyzer.feed(self._last_char, char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i - 1:i + 1], 76 | char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKR_SM_MODEL 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(EUCKRProber, self).__init__() 37 | self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) 38 | self.distribution_analyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "EUC-KR" 44 | 45 | @property 46 | def language(self): 47 | return "Korean" 48 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTW_SM_MODEL 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(EUCTWProber, self).__init__() 36 | self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) 37 | self.distribution_analyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "EUC-TW" 43 | 44 | @property 45 | def language(self): 46 | return "Taiwan" 47 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312_SM_MODEL 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(GB2312Prober, self).__init__() 36 | self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) 37 | self.distribution_analyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "GB2312" 43 | 44 | @property 45 | def language(self): 46 | return "Chinese" 47 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import ProbingState 31 | 32 | FREQ_CAT_NUM = 4 33 | 34 | UDF = 0 # undefined 35 | OTH = 1 # other 36 | ASC = 2 # ascii capital letter 37 | ASS = 3 # ascii small letter 38 | ACV = 4 # accent capital vowel 39 | ACO = 5 # accent capital other 40 | ASV = 6 # accent small vowel 41 | ASO = 7 # accent small other 42 | CLASS_NUM = 8 # total classes 43 | 44 | Latin1_CharToClass = ( 45 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 53 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 54 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 56 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 57 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 58 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 60 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 61 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 62 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 63 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 64 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 65 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 69 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 71 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 72 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 73 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 75 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 76 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 77 | ) 78 | 79 | # 0 : illegal 80 | # 1 : very unlikely 81 | # 2 : normal 82 | # 3 : very likely 83 | Latin1ClassModel = ( 84 | # UDF OTH ASC ASS ACV ACO ASV ASO 85 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 86 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 87 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 88 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 89 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 90 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 91 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 92 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 93 | ) 94 | 95 | 96 | class Latin1Prober(CharSetProber): 97 | def __init__(self): 98 | super(Latin1Prober, self).__init__() 99 | self._last_char_class = None 100 | self._freq_counter = None 101 | self.reset() 102 | 103 | def reset(self): 104 | self._last_char_class = OTH 105 | self._freq_counter = [0] * FREQ_CAT_NUM 106 | CharSetProber.reset(self) 107 | 108 | @property 109 | def charset_name(self): 110 | return "ISO-8859-1" 111 | 112 | @property 113 | def language(self): 114 | return "" 115 | 116 | def feed(self, byte_str): 117 | byte_str = self.filter_with_english_letters(byte_str) 118 | for c in byte_str: 119 | char_class = Latin1_CharToClass[c] 120 | freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) 121 | + char_class] 122 | if freq == 0: 123 | self._state = ProbingState.NOT_ME 124 | break 125 | self._freq_counter[freq] += 1 126 | self._last_char_class = char_class 127 | 128 | return self.state 129 | 130 | def get_confidence(self): 131 | if self.state == ProbingState.NOT_ME: 132 | return 0.01 133 | 134 | total = sum(self._freq_counter) 135 | if total < 0.01: 136 | confidence = 0.0 137 | else: 138 | confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) 139 | / total) 140 | if confidence < 0.0: 141 | confidence = 0.0 142 | # lower the confidence of latin1 so that other more accurate 143 | # detector can take priority. 144 | confidence = confidence * 0.73 145 | return confidence 146 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetprober import CharSetProber 31 | from .enums import ProbingState, MachineState 32 | 33 | 34 | class MultiByteCharSetProber(CharSetProber): 35 | """ 36 | MultiByteCharSetProber 37 | """ 38 | 39 | def __init__(self, lang_filter=None): 40 | super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) 41 | self.distribution_analyzer = None 42 | self.coding_sm = None 43 | self._last_char = [0, 0] 44 | 45 | def reset(self): 46 | super(MultiByteCharSetProber, self).reset() 47 | if self.coding_sm: 48 | self.coding_sm.reset() 49 | if self.distribution_analyzer: 50 | self.distribution_analyzer.reset() 51 | self._last_char = [0, 0] 52 | 53 | @property 54 | def charset_name(self): 55 | raise NotImplementedError 56 | 57 | @property 58 | def language(self): 59 | raise NotImplementedError 60 | 61 | def feed(self, byte_str): 62 | for i in range(len(byte_str)): 63 | coding_state = self.coding_sm.next_state(byte_str[i]) 64 | if coding_state == MachineState.ERROR: 65 | self.logger.debug('%s %s prober hit error at byte %s', 66 | self.charset_name, self.language, i) 67 | self._state = ProbingState.NOT_ME 68 | break 69 | elif coding_state == MachineState.ITS_ME: 70 | self._state = ProbingState.FOUND_IT 71 | break 72 | elif coding_state == MachineState.START: 73 | char_len = self.coding_sm.get_current_charlen() 74 | if i == 0: 75 | self._last_char[1] = byte_str[0] 76 | self.distribution_analyzer.feed(self._last_char, char_len) 77 | else: 78 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 79 | char_len) 80 | 81 | self._last_char[0] = byte_str[-1] 82 | 83 | if self.state == ProbingState.DETECTING: 84 | if (self.distribution_analyzer.got_enough_data() and 85 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 86 | self._state = ProbingState.FOUND_IT 87 | 88 | return self.state 89 | 90 | def get_confidence(self): 91 | return self.distribution_analyzer.get_confidence() 92 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self, lang_filter=None): 43 | super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) 44 | self.probers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood 31 | 32 | 33 | class SingleByteCharSetProber(CharSetProber): 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | 39 | def __init__(self, model, reversed=False, name_prober=None): 40 | super(SingleByteCharSetProber, self).__init__() 41 | self._model = model 42 | # TRUE if we need to reverse every pair in the model lookup 43 | self._reversed = reversed 44 | # Optional auxiliary prober for name decision 45 | self._name_prober = name_prober 46 | self._last_order = None 47 | self._seq_counters = None 48 | self._total_seqs = None 49 | self._total_char = None 50 | self._freq_char = None 51 | self.reset() 52 | 53 | def reset(self): 54 | super(SingleByteCharSetProber, self).reset() 55 | # char order of last character 56 | self._last_order = 255 57 | self._seq_counters = [0] * SequenceLikelihood.get_num_categories() 58 | self._total_seqs = 0 59 | self._total_char = 0 60 | # characters that fall in our sampling range 61 | self._freq_char = 0 62 | 63 | @property 64 | def charset_name(self): 65 | if self._name_prober: 66 | return self._name_prober.charset_name 67 | else: 68 | return self._model['charset_name'] 69 | 70 | @property 71 | def language(self): 72 | if self._name_prober: 73 | return self._name_prober.language 74 | else: 75 | return self._model.get('language') 76 | 77 | def feed(self, byte_str): 78 | if not self._model['keep_english_letter']: 79 | byte_str = self.filter_international_words(byte_str) 80 | if not byte_str: 81 | return self.state 82 | char_to_order_map = self._model['char_to_order_map'] 83 | for i, c in enumerate(byte_str): 84 | # XXX: Order is in range 1-64, so one would think we want 0-63 here, 85 | # but that leads to 27 more test failures than before. 86 | order = char_to_order_map[c] 87 | # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but 88 | # CharacterCategory.SYMBOL is actually 253, so we use CONTROL 89 | # to make it closer to the original intent. The only difference 90 | # is whether or not we count digits and control characters for 91 | # _total_char purposes. 92 | if order < CharacterCategory.CONTROL: 93 | self._total_char += 1 94 | if order < self.SAMPLE_SIZE: 95 | self._freq_char += 1 96 | if self._last_order < self.SAMPLE_SIZE: 97 | self._total_seqs += 1 98 | if not self._reversed: 99 | i = (self._last_order * self.SAMPLE_SIZE) + order 100 | model = self._model['precedence_matrix'][i] 101 | else: # reverse the order of the letters in the lookup 102 | i = (order * self.SAMPLE_SIZE) + self._last_order 103 | model = self._model['precedence_matrix'][i] 104 | self._seq_counters[model] += 1 105 | self._last_order = order 106 | 107 | charset_name = self._model['charset_name'] 108 | if self.state == ProbingState.DETECTING: 109 | if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: 110 | confidence = self.get_confidence() 111 | if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: 112 | self.logger.debug('%s confidence = %s, we have a winner', 113 | charset_name, confidence) 114 | self._state = ProbingState.FOUND_IT 115 | elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: 116 | self.logger.debug('%s confidence = %s, below negative ' 117 | 'shortcut threshhold %s', charset_name, 118 | confidence, 119 | self.NEGATIVE_SHORTCUT_THRESHOLD) 120 | self._state = ProbingState.NOT_ME 121 | 122 | return self.state 123 | 124 | def get_confidence(self): 125 | r = 0.01 126 | if self._total_seqs > 0: 127 | r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / 128 | self._total_seqs / self._model['typical_positive_ratio']) 129 | r = r * self._freq_char / self._total_char 130 | if r >= 1.0: 131 | r = 0.99 132 | return r 133 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | from .langturkishmodel import Latin5TurkishModel 41 | 42 | 43 | class SBCSGroupProber(CharSetGroupProber): 44 | def __init__(self): 45 | super(SBCSGroupProber, self).__init__() 46 | self.probers = [ 47 | SingleByteCharSetProber(Win1251CyrillicModel), 48 | SingleByteCharSetProber(Koi8rModel), 49 | SingleByteCharSetProber(Latin5CyrillicModel), 50 | SingleByteCharSetProber(MacCyrillicModel), 51 | SingleByteCharSetProber(Ibm866Model), 52 | SingleByteCharSetProber(Ibm855Model), 53 | SingleByteCharSetProber(Latin7GreekModel), 54 | SingleByteCharSetProber(Win1253GreekModel), 55 | SingleByteCharSetProber(Latin5BulgarianModel), 56 | SingleByteCharSetProber(Win1251BulgarianModel), 57 | # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) 58 | # after we retrain model. 59 | # SingleByteCharSetProber(Latin2HungarianModel), 60 | # SingleByteCharSetProber(Win1250HungarianModel), 61 | SingleByteCharSetProber(TIS620ThaiModel), 62 | SingleByteCharSetProber(Latin5TurkishModel), 63 | ] 64 | hebrew_prober = HebrewProber() 65 | logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, 66 | False, hebrew_prober) 67 | visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, 68 | hebrew_prober) 69 | hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) 70 | self.probers.extend([hebrew_prober, logical_hebrew_prober, 71 | visual_hebrew_prober]) 72 | 73 | self.reset() 74 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import SJISDistributionAnalysis 31 | from .jpcntx import SJISContextAnalysis 32 | from .mbcssm import SJIS_SM_MODEL 33 | from .enums import ProbingState, MachineState 34 | 35 | 36 | class SJISProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(SJISProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) 40 | self.distribution_analyzer = SJISDistributionAnalysis() 41 | self.context_analyzer = SJISContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(SJISProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return self.context_analyzer.charset_name 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | coding_state = self.coding_sm.next_state(byte_str[i]) 59 | if coding_state == MachineState.ERROR: 60 | self.logger.debug('%s %s prober hit error at byte %s', 61 | self.charset_name, self.language, i) 62 | self._state = ProbingState.NOT_ME 63 | break 64 | elif coding_state == MachineState.ITS_ME: 65 | self._state = ProbingState.FOUND_IT 66 | break 67 | elif coding_state == MachineState.START: 68 | char_len = self.coding_sm.get_current_charlen() 69 | if i == 0: 70 | self._last_char[1] = byte_str[0] 71 | self.context_analyzer.feed(self._last_char[2 - char_len:], 72 | char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 76 | - char_len], char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .enums import ProbingState, MachineState 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8_SM_MODEL 32 | 33 | 34 | 35 | class UTF8Prober(CharSetProber): 36 | ONE_CHAR_PROB = 0.5 37 | 38 | def __init__(self): 39 | super(UTF8Prober, self).__init__() 40 | self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) 41 | self._num_mb_chars = None 42 | self.reset() 43 | 44 | def reset(self): 45 | super(UTF8Prober, self).reset() 46 | self.coding_sm.reset() 47 | self._num_mb_chars = 0 48 | 49 | @property 50 | def charset_name(self): 51 | return "utf-8" 52 | 53 | @property 54 | def language(self): 55 | return "" 56 | 57 | def feed(self, byte_str): 58 | for c in byte_str: 59 | coding_state = self.coding_sm.next_state(c) 60 | if coding_state == MachineState.ERROR: 61 | self._state = ProbingState.NOT_ME 62 | break 63 | elif coding_state == MachineState.ITS_ME: 64 | self._state = ProbingState.FOUND_IT 65 | break 66 | elif coding_state == MachineState.START: 67 | if self.coding_sm.get_current_charlen() >= 2: 68 | self._num_mb_chars += 1 69 | 70 | if self.state == ProbingState.DETECTING: 71 | if self.get_confidence() > self.SHORTCUT_THRESHOLD: 72 | self._state = ProbingState.FOUND_IT 73 | 74 | return self.state 75 | 76 | def get_confidence(self): 77 | unlike = 0.99 78 | if self._num_mb_chars < 6: 79 | unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars 80 | return 1.0 - unlike 81 | else: 82 | return unlike 83 | -------------------------------------------------------------------------------- /thirdparty_libs/chardet/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module exists only to simplify retrieving the version number of chardet 3 | from within setup.py and from chardet subpackages. 4 | 5 | :author: Dan Blanchard (dan.blanchard@gmail.com) 6 | """ 7 | 8 | __version__ = "3.0.4" 9 | VERSION = __version__.split('.') 10 | -------------------------------------------------------------------------------- /thirdparty_libs/colorama/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | from .initialise import init, deinit, reinit, colorama_text 3 | from .ansi import Fore, Back, Style, Cursor 4 | from .ansitowin32 import AnsiToWin32 5 | 6 | __version__ = '0.3.3' 7 | 8 | -------------------------------------------------------------------------------- /thirdparty_libs/colorama/ansi.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | ''' 3 | This module generates ANSI character codes to printing colors to terminals. 4 | See: http://en.wikipedia.org/wiki/ANSI_escape_code 5 | ''' 6 | 7 | CSI = '\033[' 8 | OSC = '\033]' 9 | BEL = '\007' 10 | 11 | 12 | def code_to_chars(code): 13 | return CSI + str(code) + 'm' 14 | 15 | def set_title(title): 16 | return OSC + '2;' + title + BEL 17 | 18 | def clear_screen(mode=2): 19 | return CSI + str(mode) + 'J' 20 | 21 | def clear_line(mode=2): 22 | return CSI + str(mode) + 'K' 23 | 24 | 25 | class AnsiCodes(object): 26 | def __init__(self): 27 | # the subclasses declare class attributes which are numbers. 28 | # Upon instantiation we define instance attributes, which are the same 29 | # as the class attributes but wrapped with the ANSI escape sequence 30 | for name in dir(self): 31 | if not name.startswith('_'): 32 | value = getattr(self, name) 33 | setattr(self, name, code_to_chars(value)) 34 | 35 | 36 | class AnsiCursor(object): 37 | def UP(self, n=1): 38 | return CSI + str(n) + 'A' 39 | def DOWN(self, n=1): 40 | return CSI + str(n) + 'B' 41 | def FORWARD(self, n=1): 42 | return CSI + str(n) + 'C' 43 | def BACK(self, n=1): 44 | return CSI + str(n) + 'D' 45 | def POS(self, x=1, y=1): 46 | return CSI + str(y) + ';' + str(x) + 'H' 47 | 48 | 49 | class AnsiFore(AnsiCodes): 50 | BLACK = 30 51 | RED = 31 52 | GREEN = 32 53 | YELLOW = 33 54 | BLUE = 34 55 | MAGENTA = 35 56 | CYAN = 36 57 | WHITE = 37 58 | RESET = 39 59 | 60 | # These are fairly well supported, but not part of the standard. 61 | LIGHTBLACK_EX = 90 62 | LIGHTRED_EX = 91 63 | LIGHTGREEN_EX = 92 64 | LIGHTYELLOW_EX = 93 65 | LIGHTBLUE_EX = 94 66 | LIGHTMAGENTA_EX = 95 67 | LIGHTCYAN_EX = 96 68 | LIGHTWHITE_EX = 97 69 | 70 | 71 | class AnsiBack(AnsiCodes): 72 | BLACK = 40 73 | RED = 41 74 | GREEN = 42 75 | YELLOW = 43 76 | BLUE = 44 77 | MAGENTA = 45 78 | CYAN = 46 79 | WHITE = 47 80 | RESET = 49 81 | 82 | # These are fairly well supported, but not part of the standard. 83 | LIGHTBLACK_EX = 100 84 | LIGHTRED_EX = 101 85 | LIGHTGREEN_EX = 102 86 | LIGHTYELLOW_EX = 103 87 | LIGHTBLUE_EX = 104 88 | LIGHTMAGENTA_EX = 105 89 | LIGHTCYAN_EX = 106 90 | LIGHTWHITE_EX = 107 91 | 92 | 93 | class AnsiStyle(AnsiCodes): 94 | BRIGHT = 1 95 | DIM = 2 96 | NORMAL = 22 97 | RESET_ALL = 0 98 | 99 | Fore = AnsiFore() 100 | Back = AnsiBack() 101 | Style = AnsiStyle() 102 | Cursor = AnsiCursor() 103 | -------------------------------------------------------------------------------- /thirdparty_libs/colorama/initialise.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | import atexit 3 | import contextlib 4 | import sys 5 | 6 | from .ansitowin32 import AnsiToWin32 7 | 8 | 9 | orig_stdout = None 10 | orig_stderr = None 11 | 12 | wrapped_stdout = None 13 | wrapped_stderr = None 14 | 15 | atexit_done = False 16 | 17 | 18 | def reset_all(): 19 | AnsiToWin32(orig_stdout).reset_all() 20 | 21 | 22 | def init(autoreset=False, convert=None, strip=None, wrap=True): 23 | 24 | if not wrap and any([autoreset, convert, strip]): 25 | raise ValueError('wrap=False conflicts with any other arg=True') 26 | 27 | global wrapped_stdout, wrapped_stderr 28 | global orig_stdout, orig_stderr 29 | 30 | orig_stdout = sys.stdout 31 | orig_stderr = sys.stderr 32 | 33 | if sys.stdout is None: 34 | wrapped_stdout = None 35 | else: 36 | sys.stdout = wrapped_stdout = \ 37 | wrap_stream(orig_stdout, convert, strip, autoreset, wrap) 38 | if sys.stderr is None: 39 | wrapped_stderr = None 40 | else: 41 | sys.stderr = wrapped_stderr = \ 42 | wrap_stream(orig_stderr, convert, strip, autoreset, wrap) 43 | 44 | global atexit_done 45 | if not atexit_done: 46 | atexit.register(reset_all) 47 | atexit_done = True 48 | 49 | 50 | def deinit(): 51 | if orig_stdout is not None: 52 | sys.stdout = orig_stdout 53 | if orig_stderr is not None: 54 | sys.stderr = orig_stderr 55 | 56 | 57 | @contextlib.contextmanager 58 | def colorama_text(*args, **kwargs): 59 | init(*args, **kwargs) 60 | try: 61 | yield 62 | finally: 63 | deinit() 64 | 65 | 66 | def reinit(): 67 | if wrapped_stdout is not None: 68 | sys.stdout = wrapped_stdout 69 | if wrapped_stderr is not None: 70 | sys.stderr = wrapped_stderr 71 | 72 | 73 | def wrap_stream(stream, convert, strip, autoreset, wrap): 74 | if wrap: 75 | wrapper = AnsiToWin32(stream, 76 | convert=convert, strip=strip, autoreset=autoreset) 77 | if wrapper.should_wrap(): 78 | stream = wrapper.stream 79 | return stream 80 | 81 | 82 | -------------------------------------------------------------------------------- /thirdparty_libs/colorama/win32.py: -------------------------------------------------------------------------------- 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. 2 | 3 | # from winbase.h 4 | STDOUT = -11 5 | STDERR = -12 6 | 7 | try: 8 | import ctypes 9 | from ctypes import LibraryLoader 10 | windll = LibraryLoader(ctypes.WinDLL) 11 | from ctypes import wintypes 12 | except (AttributeError, ImportError): 13 | windll = None 14 | SetConsoleTextAttribute = lambda *_: None 15 | winapi_test = lambda *_: None 16 | else: 17 | from ctypes import byref, Structure, c_char, POINTER 18 | 19 | COORD = wintypes._COORD 20 | 21 | class CONSOLE_SCREEN_BUFFER_INFO(Structure): 22 | """struct in wincon.h.""" 23 | _fields_ = [ 24 | ("dwSize", COORD), 25 | ("dwCursorPosition", COORD), 26 | ("wAttributes", wintypes.WORD), 27 | ("srWindow", wintypes.SMALL_RECT), 28 | ("dwMaximumWindowSize", COORD), 29 | ] 30 | def __str__(self): 31 | return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % ( 32 | self.dwSize.Y, self.dwSize.X 33 | , self.dwCursorPosition.Y, self.dwCursorPosition.X 34 | , self.wAttributes 35 | , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right 36 | , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X 37 | ) 38 | 39 | _GetStdHandle = windll.kernel32.GetStdHandle 40 | _GetStdHandle.argtypes = [ 41 | wintypes.DWORD, 42 | ] 43 | _GetStdHandle.restype = wintypes.HANDLE 44 | 45 | _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo 46 | _GetConsoleScreenBufferInfo.argtypes = [ 47 | wintypes.HANDLE, 48 | POINTER(CONSOLE_SCREEN_BUFFER_INFO), 49 | ] 50 | _GetConsoleScreenBufferInfo.restype = wintypes.BOOL 51 | 52 | _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute 53 | _SetConsoleTextAttribute.argtypes = [ 54 | wintypes.HANDLE, 55 | wintypes.WORD, 56 | ] 57 | _SetConsoleTextAttribute.restype = wintypes.BOOL 58 | 59 | _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition 60 | _SetConsoleCursorPosition.argtypes = [ 61 | wintypes.HANDLE, 62 | COORD, 63 | ] 64 | _SetConsoleCursorPosition.restype = wintypes.BOOL 65 | 66 | _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA 67 | _FillConsoleOutputCharacterA.argtypes = [ 68 | wintypes.HANDLE, 69 | c_char, 70 | wintypes.DWORD, 71 | COORD, 72 | POINTER(wintypes.DWORD), 73 | ] 74 | _FillConsoleOutputCharacterA.restype = wintypes.BOOL 75 | 76 | _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute 77 | _FillConsoleOutputAttribute.argtypes = [ 78 | wintypes.HANDLE, 79 | wintypes.WORD, 80 | wintypes.DWORD, 81 | COORD, 82 | POINTER(wintypes.DWORD), 83 | ] 84 | _FillConsoleOutputAttribute.restype = wintypes.BOOL 85 | 86 | _SetConsoleTitleW = windll.kernel32.SetConsoleTitleA 87 | _SetConsoleTitleW.argtypes = [ 88 | wintypes.LPCSTR 89 | ] 90 | _SetConsoleTitleW.restype = wintypes.BOOL 91 | 92 | handles = { 93 | STDOUT: _GetStdHandle(STDOUT), 94 | STDERR: _GetStdHandle(STDERR), 95 | } 96 | 97 | def winapi_test(): 98 | handle = handles[STDOUT] 99 | csbi = CONSOLE_SCREEN_BUFFER_INFO() 100 | success = _GetConsoleScreenBufferInfo( 101 | handle, byref(csbi)) 102 | return bool(success) 103 | 104 | def GetConsoleScreenBufferInfo(stream_id=STDOUT): 105 | handle = handles[stream_id] 106 | csbi = CONSOLE_SCREEN_BUFFER_INFO() 107 | success = _GetConsoleScreenBufferInfo( 108 | handle, byref(csbi)) 109 | return csbi 110 | 111 | def SetConsoleTextAttribute(stream_id, attrs): 112 | handle = handles[stream_id] 113 | return _SetConsoleTextAttribute(handle, attrs) 114 | 115 | def SetConsoleCursorPosition(stream_id, position, adjust=True): 116 | position = COORD(*position) 117 | # If the position is out of range, do nothing. 118 | if position.Y <= 0 or position.X <= 0: 119 | return 120 | # Adjust for Windows' SetConsoleCursorPosition: 121 | # 1. being 0-based, while ANSI is 1-based. 122 | # 2. expecting (x,y), while ANSI uses (y,x). 123 | adjusted_position = COORD(position.Y - 1, position.X - 1) 124 | if adjust: 125 | # Adjust for viewport's scroll position 126 | sr = GetConsoleScreenBufferInfo(STDOUT).srWindow 127 | adjusted_position.Y += sr.Top 128 | adjusted_position.X += sr.Left 129 | # Resume normal processing 130 | handle = handles[stream_id] 131 | return _SetConsoleCursorPosition(handle, adjusted_position) 132 | 133 | def FillConsoleOutputCharacter(stream_id, char, length, start): 134 | handle = handles[stream_id] 135 | char = c_char(char.encode()) 136 | length = wintypes.DWORD(length) 137 | num_written = wintypes.DWORD(0) 138 | # Note that this is hard-coded for ANSI (vs wide) bytes. 139 | success = _FillConsoleOutputCharacterA( 140 | handle, char, length, start, byref(num_written)) 141 | return num_written.value 142 | 143 | def FillConsoleOutputAttribute(stream_id, attr, length, start): 144 | ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )''' 145 | handle = handles[stream_id] 146 | attribute = wintypes.WORD(attr) 147 | length = wintypes.DWORD(length) 148 | num_written = wintypes.DWORD(0) 149 | # Note that this is hard-coded for ANSI (vs wide) bytes. 150 | return _FillConsoleOutputAttribute( 151 | handle, attribute, length, start, byref(num_written)) 152 | 153 | def SetConsoleTitle(title): 154 | return _SetConsoleTitleW(title) 155 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('http://python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post("http://httpbin.org/post", data=payload) 26 | >>> print r.text 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2013 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | 42 | """ 43 | 44 | __title__ = 'requests' 45 | __version__ = '1.2.3' 46 | __build__ = 0x010203 47 | __author__ = 'Kenneth Reitz' 48 | __license__ = 'Apache 2.0' 49 | __copyright__ = 'Copyright 2013 Kenneth Reitz' 50 | 51 | # Attempt to enable urllib3's SNI support, if possible 52 | try: 53 | from requests.packages.urllib3.contrib import pyopenssl 54 | pyopenssl.inject_into_urllib3() 55 | except ImportError: 56 | pass 57 | 58 | from . import utils 59 | from .models import Request, Response, PreparedRequest 60 | from .api import request, get, head, post, patch, put, delete, options 61 | from .sessions import session, Session 62 | from .status_codes import codes 63 | from .exceptions import ( 64 | RequestException, Timeout, URLRequired, 65 | TooManyRedirects, HTTPError, ConnectionError 66 | ) 67 | 68 | # Set default logging handler to avoid "No handler found" warnings. 69 | import logging 70 | try: # Python 2.7+ 71 | from logging import NullHandler 72 | except ImportError: 73 | class NullHandler(logging.Handler): 74 | def emit(self, record): 75 | pass 76 | 77 | logging.getLogger(__name__).addHandler(NullHandler()) 78 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.api 5 | ~~~~~~~~~~~~ 6 | 7 | This module implements the Requests API. 8 | 9 | :copyright: (c) 2012 by Kenneth Reitz. 10 | :license: Apache2, see LICENSE for more details. 11 | 12 | """ 13 | 14 | from . import sessions 15 | 16 | 17 | def request(method, url, **kwargs): 18 | """Constructs and sends a :class:`Request `. 19 | Returns :class:`Response ` object. 20 | 21 | :param method: method for the new :class:`Request` object. 22 | :param url: URL for the new :class:`Request` object. 23 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 24 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 25 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. 26 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. 27 | :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload. 28 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. 29 | :param timeout: (optional) Float describing the timeout of the request. 30 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. 31 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. 32 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. 33 | :param stream: (optional) if ``False``, the response content will be immediately downloaded. 34 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. 35 | 36 | Usage:: 37 | 38 | >>> import requests 39 | >>> req = requests.request('GET', 'http://httpbin.org/get') 40 | 41 | """ 42 | 43 | session = sessions.Session() 44 | return session.request(method=method, url=url, **kwargs) 45 | 46 | 47 | def get(url, **kwargs): 48 | """Sends a GET request. Returns :class:`Response` object. 49 | 50 | :param url: URL for the new :class:`Request` object. 51 | :param \*\*kwargs: Optional arguments that ``request`` takes. 52 | """ 53 | 54 | kwargs.setdefault('allow_redirects', True) 55 | return request('get', url, **kwargs) 56 | 57 | 58 | def options(url, **kwargs): 59 | """Sends a OPTIONS request. Returns :class:`Response` object. 60 | 61 | :param url: URL for the new :class:`Request` object. 62 | :param \*\*kwargs: Optional arguments that ``request`` takes. 63 | """ 64 | 65 | kwargs.setdefault('allow_redirects', True) 66 | return request('options', url, **kwargs) 67 | 68 | 69 | def head(url, **kwargs): 70 | """Sends a HEAD request. Returns :class:`Response` object. 71 | 72 | :param url: URL for the new :class:`Request` object. 73 | :param \*\*kwargs: Optional arguments that ``request`` takes. 74 | """ 75 | 76 | kwargs.setdefault('allow_redirects', False) 77 | return request('head', url, **kwargs) 78 | 79 | 80 | def post(url, data=None, **kwargs): 81 | """Sends a POST request. Returns :class:`Response` object. 82 | 83 | :param url: URL for the new :class:`Request` object. 84 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 85 | :param \*\*kwargs: Optional arguments that ``request`` takes. 86 | """ 87 | 88 | return request('post', url, data=data, **kwargs) 89 | 90 | 91 | def put(url, data=None, **kwargs): 92 | """Sends a PUT request. Returns :class:`Response` object. 93 | 94 | :param url: URL for the new :class:`Request` object. 95 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 96 | :param \*\*kwargs: Optional arguments that ``request`` takes. 97 | """ 98 | 99 | return request('put', url, data=data, **kwargs) 100 | 101 | 102 | def patch(url, data=None, **kwargs): 103 | """Sends a PATCH request. Returns :class:`Response` object. 104 | 105 | :param url: URL for the new :class:`Request` object. 106 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 107 | :param \*\*kwargs: Optional arguments that ``request`` takes. 108 | """ 109 | 110 | return request('patch', url, data=data, **kwargs) 111 | 112 | 113 | def delete(url, **kwargs): 114 | """Sends a DELETE request. Returns :class:`Response` object. 115 | 116 | :param url: URL for the new :class:`Request` object. 117 | :param \*\*kwargs: Optional arguments that ``request`` takes. 118 | """ 119 | 120 | return request('delete', url, **kwargs) 121 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | certs.py 6 | ~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | 15 | import os.path 16 | 17 | 18 | def where(): 19 | """Return the preferred certificate bundle.""" 20 | # vendored bundle inside Requests 21 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 22 | 23 | if __name__ == '__main__': 24 | print(where()) 25 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | pythoncompat 5 | """ 6 | 7 | from .packages import charade as chardet 8 | 9 | import sys 10 | 11 | # ------- 12 | # Pythons 13 | # ------- 14 | 15 | # Syntax sugar. 16 | _ver = sys.version_info 17 | 18 | #: Python 2.x? 19 | is_py2 = (_ver[0] == 2) 20 | 21 | #: Python 3.x? 22 | is_py3 = (_ver[0] == 3) 23 | 24 | #: Python 3.0.x 25 | is_py30 = (is_py3 and _ver[1] == 0) 26 | 27 | #: Python 3.1.x 28 | is_py31 = (is_py3 and _ver[1] == 1) 29 | 30 | #: Python 3.2.x 31 | is_py32 = (is_py3 and _ver[1] == 2) 32 | 33 | #: Python 3.3.x 34 | is_py33 = (is_py3 and _ver[1] == 3) 35 | 36 | #: Python 3.4.x 37 | is_py34 = (is_py3 and _ver[1] == 4) 38 | 39 | #: Python 2.7.x 40 | is_py27 = (is_py2 and _ver[1] == 7) 41 | 42 | #: Python 2.6.x 43 | is_py26 = (is_py2 and _ver[1] == 6) 44 | 45 | #: Python 2.5.x 46 | is_py25 = (is_py2 and _ver[1] == 5) 47 | 48 | #: Python 2.4.x 49 | is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. 50 | 51 | 52 | # --------- 53 | # Platforms 54 | # --------- 55 | 56 | 57 | # Syntax sugar. 58 | _ver = sys.version.lower() 59 | 60 | is_pypy = ('pypy' in _ver) 61 | is_jython = ('jython' in _ver) 62 | is_ironpython = ('iron' in _ver) 63 | 64 | # Assume CPython, if nothing else. 65 | is_cpython = not any((is_pypy, is_jython, is_ironpython)) 66 | 67 | # Windows-based system. 68 | is_windows = 'win32' in str(sys.platform).lower() 69 | 70 | # Standard Linux 2+ system. 71 | is_linux = ('linux' in str(sys.platform).lower()) 72 | is_osx = ('darwin' in str(sys.platform).lower()) 73 | is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. 74 | is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. 75 | 76 | try: 77 | import simplejson as json 78 | except ImportError: 79 | import json 80 | 81 | # --------- 82 | # Specifics 83 | # --------- 84 | 85 | if is_py2: 86 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 87 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 88 | from urllib2 import parse_http_list 89 | import cookielib 90 | from Cookie import Morsel 91 | from StringIO import StringIO 92 | from .packages.urllib3.packages.ordered_dict import OrderedDict 93 | from httplib import IncompleteRead 94 | 95 | builtin_str = str 96 | bytes = str 97 | str = unicode 98 | basestring = basestring 99 | numeric_types = (int, long, float) 100 | 101 | 102 | elif is_py3: 103 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 104 | from urllib.request import parse_http_list, getproxies, proxy_bypass 105 | from http import cookiejar as cookielib 106 | from http.cookies import Morsel 107 | from io import StringIO 108 | from collections import OrderedDict 109 | from http.client import IncompleteRead 110 | 111 | builtin_str = str 112 | str = str 113 | bytes = bytes 114 | basestring = (str, bytes) 115 | numeric_types = (int, float) 116 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | 9 | """ 10 | 11 | 12 | class RequestException(RuntimeError): 13 | """There was an ambiguous exception that occurred while handling your 14 | request.""" 15 | 16 | 17 | class HTTPError(RequestException): 18 | """An HTTP error occurred.""" 19 | 20 | def __init__(self, *args, **kwargs): 21 | """ Initializes HTTPError with optional `response` object. """ 22 | self.response = kwargs.pop('response', None) 23 | super(HTTPError, self).__init__(*args, **kwargs) 24 | 25 | 26 | class ConnectionError(RequestException): 27 | """A Connection error occurred.""" 28 | 29 | 30 | class SSLError(ConnectionError): 31 | """An SSL error occurred.""" 32 | 33 | 34 | class Timeout(RequestException): 35 | """The request timed out.""" 36 | 37 | 38 | class URLRequired(RequestException): 39 | """A valid URL is required to make a request.""" 40 | 41 | 42 | class TooManyRedirects(RequestException): 43 | """Too many redirects.""" 44 | 45 | 46 | class MissingSchema(RequestException, ValueError): 47 | """The URL schema (e.g. http or https) is missing.""" 48 | 49 | 50 | class InvalidSchema(RequestException, ValueError): 51 | """See defaults.py for valid schemas.""" 52 | 53 | 54 | class InvalidURL(RequestException, ValueError): 55 | """ The URL provided was somehow invalid. """ 56 | 57 | 58 | class ChunkedEncodingError(RequestException): 59 | """The server declared chunked encoding but sent an invalid chunk.""" 60 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | 14 | """ 15 | 16 | 17 | HOOKS = ['response'] 18 | 19 | 20 | def default_hooks(): 21 | hooks = {} 22 | for event in HOOKS: 23 | hooks[event] = [] 24 | return hooks 25 | 26 | # TODO: response is the only one 27 | 28 | 29 | def dispatch_hook(key, hooks, hook_data, **kwargs): 30 | """Dispatches a hook dictionary on a given piece of data.""" 31 | 32 | hooks = hooks or dict() 33 | 34 | if key in hooks: 35 | hooks = hooks.get(key) 36 | 37 | if hasattr(hooks, '__call__'): 38 | hooks = [hooks] 39 | 40 | for hook in hooks: 41 | _hook_data = hook(hook_data, **kwargs) 42 | if _hook_data is not None: 43 | hook_data = _hook_data 44 | 45 | return hook_data 46 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import urllib3 4 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "1.0.3" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .constants import eNotMe 31 | from .compat import wrap_ord 32 | 33 | FREQ_CAT_NUM = 4 34 | 35 | UDF = 0 # undefined 36 | OTH = 1 # other 37 | ASC = 2 # ascii capital letter 38 | ASS = 3 # ascii small letter 39 | ACV = 4 # accent capital vowel 40 | ACO = 5 # accent capital other 41 | ASV = 6 # accent small vowel 42 | ASO = 7 # accent small other 43 | CLASS_NUM = 8 # total classes 44 | 45 | Latin1_CharToClass = ( 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 78 | ) 79 | 80 | # 0 : illegal 81 | # 1 : very unlikely 82 | # 2 : normal 83 | # 3 : very likely 84 | Latin1ClassModel = ( 85 | # UDF OTH ASC ASS ACV ACO ASV ASO 86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 94 | ) 95 | 96 | 97 | class Latin1Prober(CharSetProber): 98 | def __init__(self): 99 | CharSetProber.__init__(self) 100 | self.reset() 101 | 102 | def reset(self): 103 | self._mLastCharClass = OTH 104 | self._mFreqCounter = [0] * FREQ_CAT_NUM 105 | CharSetProber.reset(self) 106 | 107 | def get_charset_name(self): 108 | return "windows-1252" 109 | 110 | def feed(self, aBuf): 111 | aBuf = self.filter_with_english_letters(aBuf) 112 | for c in aBuf: 113 | charClass = Latin1_CharToClass[wrap_ord(c)] 114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) 115 | + charClass] 116 | if freq == 0: 117 | self._mState = eNotMe 118 | break 119 | self._mFreqCounter[freq] += 1 120 | self._mLastCharClass = charClass 121 | 122 | return self.get_state() 123 | 124 | def get_confidence(self): 125 | if self.get_state() == eNotMe: 126 | return 0.01 127 | 128 | total = sum(self._mFreqCounter) 129 | if total < 0.01: 130 | confidence = 0.0 131 | else: 132 | confidence = ((self._mFreqCounter[3] / total) 133 | - (self._mFreqCounter[1] * 20.0 / total)) 134 | if confidence < 0.0: 135 | confidence = 0.0 136 | # lower the confidence of latin1 so that other more accurate 137 | # detector can take priority. 138 | confidence = confidence * 0.5 139 | return confidence 140 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import sys 30 | from . import constants 31 | from .charsetprober import CharSetProber 32 | from .compat import wrap_ord 33 | 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | SYMBOL_CAT_ORDER = 250 39 | NUMBER_OF_SEQ_CAT = 4 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 41 | #NEGATIVE_CAT = 0 42 | 43 | 44 | class SingleByteCharSetProber(CharSetProber): 45 | def __init__(self, model, reversed=False, nameProber=None): 46 | CharSetProber.__init__(self) 47 | self._mModel = model 48 | # TRUE if we need to reverse every pair in the model lookup 49 | self._mReversed = reversed 50 | # Optional auxiliary prober for name decision 51 | self._mNameProber = nameProber 52 | self.reset() 53 | 54 | def reset(self): 55 | CharSetProber.reset(self) 56 | # char order of last character 57 | self._mLastOrder = 255 58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT 59 | self._mTotalSeqs = 0 60 | self._mTotalChar = 0 61 | # characters that fall in our sampling range 62 | self._mFreqChar = 0 63 | 64 | def get_charset_name(self): 65 | if self._mNameProber: 66 | return self._mNameProber.get_charset_name() 67 | else: 68 | return self._mModel['charsetName'] 69 | 70 | def feed(self, aBuf): 71 | if not self._mModel['keepEnglishLetter']: 72 | aBuf = self.filter_without_english_letters(aBuf) 73 | aLen = len(aBuf) 74 | if not aLen: 75 | return self.get_state() 76 | for c in aBuf: 77 | order = self._mModel['charToOrderMap'][wrap_ord(c)] 78 | if order < SYMBOL_CAT_ORDER: 79 | self._mTotalChar += 1 80 | if order < SAMPLE_SIZE: 81 | self._mFreqChar += 1 82 | if self._mLastOrder < SAMPLE_SIZE: 83 | self._mTotalSeqs += 1 84 | if not self._mReversed: 85 | i = (self._mLastOrder * SAMPLE_SIZE) + order 86 | model = self._mModel['precedenceMatrix'][i] 87 | else: # reverse the order of the letters in the lookup 88 | i = (order * SAMPLE_SIZE) + self._mLastOrder 89 | model = self._mModel['precedenceMatrix'][i] 90 | self._mSeqCounters[model] += 1 91 | self._mLastOrder = order 92 | 93 | if self.get_state() == constants.eDetecting: 94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: 95 | cf = self.get_confidence() 96 | if cf > POSITIVE_SHORTCUT_THRESHOLD: 97 | if constants._debug: 98 | sys.stderr.write('%s confidence = %s, we have a' 99 | 'winner\n' % 100 | (self._mModel['charsetName'], cf)) 101 | self._mState = constants.eFoundIt 102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD: 103 | if constants._debug: 104 | sys.stderr.write('%s confidence = %s, below negative' 105 | 'shortcut threshhold %s\n' % 106 | (self._mModel['charsetName'], cf, 107 | NEGATIVE_SHORTCUT_THRESHOLD)) 108 | self._mState = constants.eNotMe 109 | 110 | return self.get_state() 111 | 112 | def get_confidence(self): 113 | r = 0.01 114 | if self._mTotalSeqs > 0: 115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs 116 | / self._mModel['mTypicalPositiveRatio']) 117 | r = r * self._mFreqChar / self._mTotalChar 118 | if r >= 1.0: 119 | r = 0.99 120 | return r 121 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "SHIFT_JIS" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/charade/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | # urllib3/__init__.py 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) 3 | # 4 | # This module is part of urllib3 and is released under 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php 6 | 7 | """ 8 | urllib3 - Thread-safe connection pooling and re-using. 9 | """ 10 | 11 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 12 | __license__ = 'MIT' 13 | __version__ = 'dev' 14 | 15 | 16 | from .connectionpool import ( 17 | HTTPConnectionPool, 18 | HTTPSConnectionPool, 19 | connection_from_url 20 | ) 21 | 22 | from . import exceptions 23 | from .filepost import encode_multipart_formdata 24 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 25 | from .response import HTTPResponse 26 | from .util import make_headers, get_host 27 | 28 | 29 | # Set default logging handler to avoid "No handler found" warnings. 30 | import logging 31 | try: # Python 2.7+ 32 | from logging import NullHandler 33 | except ImportError: 34 | class NullHandler(logging.Handler): 35 | def emit(self, record): 36 | pass 37 | 38 | logging.getLogger(__name__).addHandler(NullHandler()) 39 | 40 | def add_stderr_logger(level=logging.DEBUG): 41 | """ 42 | Helper for quickly adding a StreamHandler to the logger. Useful for 43 | debugging. 44 | 45 | Returns the handler after adding it. 46 | """ 47 | # This method needs to be in this __init__.py to get the __name__ correct 48 | # even if urllib3 is vendored within another package. 49 | logger = logging.getLogger(__name__) 50 | handler = logging.StreamHandler() 51 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 52 | logger.addHandler(handler) 53 | logger.setLevel(level) 54 | logger.debug('Added an stderr logging handler to logger: %s' % __name__) 55 | return handler 56 | 57 | # ... Clean up. 58 | del NullHandler 59 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/_collections.py: -------------------------------------------------------------------------------- 1 | # urllib3/_collections.py 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) 3 | # 4 | # This module is part of urllib3 and is released under 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php 6 | 7 | from collections import MutableMapping 8 | from threading import RLock 9 | 10 | try: # Python 2.7+ 11 | from collections import OrderedDict 12 | except ImportError: 13 | from .packages.ordered_dict import OrderedDict 14 | 15 | 16 | __all__ = ['RecentlyUsedContainer'] 17 | 18 | 19 | _Null = object() 20 | 21 | 22 | class RecentlyUsedContainer(MutableMapping): 23 | """ 24 | Provides a thread-safe dict-like container which maintains up to 25 | ``maxsize`` keys while throwing away the least-recently-used keys beyond 26 | ``maxsize``. 27 | 28 | :param maxsize: 29 | Maximum number of recent elements to retain. 30 | 31 | :param dispose_func: 32 | Every time an item is evicted from the container, 33 | ``dispose_func(value)`` is called. Callback which will get called 34 | """ 35 | 36 | ContainerCls = OrderedDict 37 | 38 | def __init__(self, maxsize=10, dispose_func=None): 39 | self._maxsize = maxsize 40 | self.dispose_func = dispose_func 41 | 42 | self._container = self.ContainerCls() 43 | self.lock = RLock() 44 | 45 | def __getitem__(self, key): 46 | # Re-insert the item, moving it to the end of the eviction line. 47 | with self.lock: 48 | item = self._container.pop(key) 49 | self._container[key] = item 50 | return item 51 | 52 | def __setitem__(self, key, value): 53 | evicted_value = _Null 54 | with self.lock: 55 | # Possibly evict the existing value of 'key' 56 | evicted_value = self._container.get(key, _Null) 57 | self._container[key] = value 58 | 59 | # If we didn't evict an existing value, we might have to evict the 60 | # least recently used item from the beginning of the container. 61 | if len(self._container) > self._maxsize: 62 | _key, evicted_value = self._container.popitem(last=False) 63 | 64 | if self.dispose_func and evicted_value is not _Null: 65 | self.dispose_func(evicted_value) 66 | 67 | def __delitem__(self, key): 68 | with self.lock: 69 | value = self._container.pop(key) 70 | 71 | if self.dispose_func: 72 | self.dispose_func(value) 73 | 74 | def __len__(self): 75 | with self.lock: 76 | return len(self._container) 77 | 78 | def __iter__(self): 79 | raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') 80 | 81 | def clear(self): 82 | with self.lock: 83 | # Copy pointers to all values, then wipe the mapping 84 | # under Python 2, this copies the list of values twice :-| 85 | values = list(self._container.values()) 86 | self._container.clear() 87 | 88 | if self.dispose_func: 89 | for value in values: 90 | self.dispose_func(value) 91 | 92 | def keys(self): 93 | with self.lock: 94 | return self._container.keys() 95 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | # urllib3/exceptions.py 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) 3 | # 4 | # This module is part of urllib3 and is released under 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php 6 | 7 | 8 | ## Base Exceptions 9 | 10 | class HTTPError(Exception): 11 | "Base exception used by this module." 12 | pass 13 | 14 | 15 | class PoolError(HTTPError): 16 | "Base exception for errors caused within a pool." 17 | def __init__(self, pool, message): 18 | self.pool = pool 19 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 20 | 21 | def __reduce__(self): 22 | # For pickling purposes. 23 | return self.__class__, (None, None) 24 | 25 | 26 | class RequestError(PoolError): 27 | "Base exception for PoolErrors that have associated URLs." 28 | def __init__(self, pool, url, message): 29 | self.url = url 30 | PoolError.__init__(self, pool, message) 31 | 32 | def __reduce__(self): 33 | # For pickling purposes. 34 | return self.__class__, (None, self.url, None) 35 | 36 | 37 | class SSLError(HTTPError): 38 | "Raised when SSL certificate fails in an HTTPS connection." 39 | pass 40 | 41 | 42 | class DecodeError(HTTPError): 43 | "Raised when automatic decoding based on Content-Type fails." 44 | pass 45 | 46 | 47 | ## Leaf Exceptions 48 | 49 | class MaxRetryError(RequestError): 50 | "Raised when the maximum number of retries is exceeded." 51 | 52 | def __init__(self, pool, url, reason=None): 53 | self.reason = reason 54 | 55 | message = "Max retries exceeded with url: %s" % url 56 | if reason: 57 | message += " (Caused by %s: %s)" % (type(reason), reason) 58 | else: 59 | message += " (Caused by redirect)" 60 | 61 | RequestError.__init__(self, pool, url, message) 62 | 63 | 64 | class HostChangedError(RequestError): 65 | "Raised when an existing pool gets a request for a foreign host." 66 | 67 | def __init__(self, pool, url, retries=3): 68 | message = "Tried to open a foreign host with url: %s" % url 69 | RequestError.__init__(self, pool, url, message) 70 | self.retries = retries 71 | 72 | 73 | class TimeoutError(RequestError): 74 | "Raised when a socket timeout occurs." 75 | pass 76 | 77 | 78 | class EmptyPoolError(PoolError): 79 | "Raised when a pool runs out of connections and no more are allowed." 80 | pass 81 | 82 | 83 | class ClosedPoolError(PoolError): 84 | "Raised when a request enters a pool after the pool has been closed." 85 | pass 86 | 87 | 88 | class LocationParseError(ValueError, HTTPError): 89 | "Raised when get_host or similar fails to parse the URL input." 90 | 91 | def __init__(self, location): 92 | message = "Failed to parse: %s" % location 93 | HTTPError.__init__(self, message) 94 | 95 | self.location = location 96 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | # urllib3/filepost.py 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt) 3 | # 4 | # This module is part of urllib3 and is released under 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php 6 | 7 | import codecs 8 | import mimetypes 9 | 10 | from uuid import uuid4 11 | from io import BytesIO 12 | 13 | from .packages import six 14 | from .packages.six import b 15 | 16 | writer = codecs.lookup('utf-8')[3] 17 | 18 | 19 | def choose_boundary(): 20 | """ 21 | Our embarassingly-simple replacement for mimetools.choose_boundary. 22 | """ 23 | return uuid4().hex 24 | 25 | 26 | def get_content_type(filename): 27 | return mimetypes.guess_type(filename)[0] or 'application/octet-stream' 28 | 29 | 30 | def iter_fields(fields): 31 | """ 32 | Iterate over fields. 33 | 34 | Supports list of (k, v) tuples and dicts. 35 | """ 36 | if isinstance(fields, dict): 37 | return ((k, v) for k, v in six.iteritems(fields)) 38 | 39 | return ((k, v) for k, v in fields) 40 | 41 | 42 | def encode_multipart_formdata(fields, boundary=None): 43 | """ 44 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 45 | 46 | :param fields: 47 | Dictionary of fields or list of (key, value) or (key, value, MIME type) 48 | field tuples. The key is treated as the field name, and the value as 49 | the body of the form-data bytes. If the value is a tuple of two 50 | elements, then the first element is treated as the filename of the 51 | form-data section and a suitable MIME type is guessed based on the 52 | filename. If the value is a tuple of three elements, then the third 53 | element is treated as an explicit MIME type of the form-data section. 54 | 55 | Field names and filenames must be unicode. 56 | 57 | :param boundary: 58 | If not specified, then a random boundary will be generated using 59 | :func:`mimetools.choose_boundary`. 60 | """ 61 | body = BytesIO() 62 | if boundary is None: 63 | boundary = choose_boundary() 64 | 65 | for fieldname, value in iter_fields(fields): 66 | body.write(b('--%s\r\n' % (boundary))) 67 | 68 | if isinstance(value, tuple): 69 | if len(value) == 3: 70 | filename, data, content_type = value 71 | else: 72 | filename, data = value 73 | content_type = get_content_type(filename) 74 | writer(body).write('Content-Disposition: form-data; name="%s"; ' 75 | 'filename="%s"\r\n' % (fieldname, filename)) 76 | body.write(b('Content-Type: %s\r\n\r\n' % 77 | (content_type,))) 78 | else: 79 | data = value 80 | writer(body).write('Content-Disposition: form-data; name="%s"\r\n' 81 | % (fieldname)) 82 | body.write(b'\r\n') 83 | 84 | if isinstance(data, int): 85 | data = str(data) # Backwards compatibility 86 | 87 | if isinstance(data, six.text_type): 88 | writer(body).write(data) 89 | else: 90 | body.write(data) 91 | 92 | body.write(b'\r\n') 93 | 94 | body.write(b('--%s--\r\n' % (boundary))) 95 | 96 | content_type = str('multipart/form-data; boundary=%s' % boundary) 97 | 98 | return body.getvalue(), content_type 99 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.2, essential when using SSL.""" 2 | 3 | import re 4 | 5 | __version__ = '3.2.2' 6 | 7 | class CertificateError(ValueError): 8 | pass 9 | 10 | def _dnsname_to_pat(dn): 11 | pats = [] 12 | for frag in dn.split(r'.'): 13 | if frag == '*': 14 | # When '*' is a fragment by itself, it matches a non-empty dotless 15 | # fragment. 16 | pats.append('[^.]+') 17 | else: 18 | # Otherwise, '*' matches any dotless fragment. 19 | frag = re.escape(frag) 20 | pats.append(frag.replace(r'\*', '[^.]*')) 21 | return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 22 | 23 | def match_hostname(cert, hostname): 24 | """Verify that *cert* (in decoded format as returned by 25 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules 26 | are mostly followed, but IP addresses are not accepted for *hostname*. 27 | 28 | CertificateError is raised on failure. On success, the function 29 | returns nothing. 30 | """ 31 | if not cert: 32 | raise ValueError("empty or no certificate") 33 | dnsnames = [] 34 | san = cert.get('subjectAltName', ()) 35 | for key, value in san: 36 | if key == 'DNS': 37 | if _dnsname_to_pat(value).match(hostname): 38 | return 39 | dnsnames.append(value) 40 | if not dnsnames: 41 | # The subject is only checked when there is no dNSName entry 42 | # in subjectAltName 43 | for sub in cert.get('subject', ()): 44 | for key, value in sub: 45 | # XXX according to RFC 2818, the most specific Common Name 46 | # must be used. 47 | if key == 'commonName': 48 | if _dnsname_to_pat(value).match(hostname): 49 | return 50 | dnsnames.append(value) 51 | if len(dnsnames) > 1: 52 | raise CertificateError("hostname %r " 53 | "doesn't match either of %s" 54 | % (hostname, ', '.join(map(repr, dnsnames)))) 55 | elif len(dnsnames) == 1: 56 | raise CertificateError("hostname %r " 57 | "doesn't match %r" 58 | % (hostname, dnsnames[0])) 59 | else: 60 | raise CertificateError("no appropriate commonName or " 61 | "subjectAltName fields were found") 62 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('resume_incomplete', 'resume'), 34 | 35 | # Client Error. 36 | 400: ('bad_request', 'bad'), 37 | 401: ('unauthorized',), 38 | 402: ('payment_required', 'payment'), 39 | 403: ('forbidden',), 40 | 404: ('not_found', '-o-'), 41 | 405: ('method_not_allowed', 'not_allowed'), 42 | 406: ('not_acceptable',), 43 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 44 | 408: ('request_timeout', 'timeout'), 45 | 409: ('conflict',), 46 | 410: ('gone',), 47 | 411: ('length_required',), 48 | 412: ('precondition_failed', 'precondition'), 49 | 413: ('request_entity_too_large',), 50 | 414: ('request_uri_too_large',), 51 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 52 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 53 | 417: ('expectation_failed',), 54 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 55 | 422: ('unprocessable_entity', 'unprocessable'), 56 | 423: ('locked',), 57 | 424: ('failed_dependency', 'dependency'), 58 | 425: ('unordered_collection', 'unordered'), 59 | 426: ('upgrade_required', 'upgrade'), 60 | 428: ('precondition_required', 'precondition'), 61 | 429: ('too_many_requests', 'too_many'), 62 | 431: ('header_fields_too_large', 'fields_too_large'), 63 | 444: ('no_response', 'none'), 64 | 449: ('retry_with', 'retry'), 65 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 66 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 67 | 499: ('client_closed_request',), 68 | 69 | # Server Error. 70 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 71 | 501: ('not_implemented',), 72 | 502: ('bad_gateway',), 73 | 503: ('service_unavailable', 'unavailable'), 74 | 504: ('gateway_timeout',), 75 | 505: ('http_version_not_supported', 'http_version'), 76 | 506: ('variant_also_negotiates',), 77 | 507: ('insufficient_storage',), 78 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 79 | 510: ('not_extended',), 80 | } 81 | 82 | codes = LookupDict(name='status_codes') 83 | 84 | for (code, titles) in list(_codes.items()): 85 | for title in titles: 86 | setattr(codes, title, code) 87 | if not title.startswith('\\'): 88 | setattr(codes, title.upper(), code) 89 | -------------------------------------------------------------------------------- /thirdparty_libs/requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | 9 | """ 10 | 11 | import os 12 | import collections 13 | from itertools import islice 14 | 15 | 16 | class IteratorProxy(object): 17 | """docstring for IteratorProxy""" 18 | def __init__(self, i): 19 | self.i = i 20 | # self.i = chain.from_iterable(i) 21 | 22 | def __iter__(self): 23 | return self.i 24 | 25 | def __len__(self): 26 | if hasattr(self.i, '__len__'): 27 | return len(self.i) 28 | if hasattr(self.i, 'len'): 29 | return self.i.len 30 | if hasattr(self.i, 'fileno'): 31 | return os.fstat(self.i.fileno()).st_size 32 | 33 | def read(self, n): 34 | return "".join(islice(self.i, None, n)) 35 | 36 | 37 | class CaseInsensitiveDict(collections.MutableMapping): 38 | """ 39 | A case-insensitive ``dict``-like object. 40 | 41 | Implements all methods and operations of 42 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 43 | provides ``lower_items``. 44 | 45 | All keys are expected to be strings. The structure remembers the 46 | case of the last key to be set, and ``iter(instance)``, 47 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 48 | will contain case-sensitive keys. However, querying and contains 49 | testing is case insensitive: 50 | 51 | cid = CaseInsensitiveDict() 52 | cid['Accept'] = 'application/json' 53 | cid['aCCEPT'] == 'application/json' # True 54 | list(cid) == ['Accept'] # True 55 | 56 | For example, ``headers['content-encoding']`` will return the 57 | value of a ``'Content-Encoding'`` response header, regardless 58 | of how the header name was originally stored. 59 | 60 | If the constructor, ``.update``, or equality comparison 61 | operations are given keys that have equal ``.lower()``s, the 62 | behavior is undefined. 63 | 64 | """ 65 | def __init__(self, data=None, **kwargs): 66 | self._store = dict() 67 | if data is None: 68 | data = {} 69 | self.update(data, **kwargs) 70 | 71 | def __setitem__(self, key, value): 72 | # Use the lowercased key for lookups, but store the actual 73 | # key alongside the value. 74 | self._store[key.lower()] = (key, value) 75 | 76 | def __getitem__(self, key): 77 | return self._store[key.lower()][1] 78 | 79 | def __delitem__(self, key): 80 | del self._store[key.lower()] 81 | 82 | def __iter__(self): 83 | return (casedkey for casedkey, mappedvalue in self._store.values()) 84 | 85 | def __len__(self): 86 | return len(self._store) 87 | 88 | def lower_items(self): 89 | """Like iteritems(), but with all lowercase keys.""" 90 | return ( 91 | (lowerkey, keyval[1]) 92 | for (lowerkey, keyval) 93 | in self._store.items() 94 | ) 95 | 96 | def __eq__(self, other): 97 | if isinstance(other, collections.Mapping): 98 | other = CaseInsensitiveDict(other) 99 | else: 100 | return NotImplemented 101 | # Compare insensitively 102 | return dict(self.lower_items()) == dict(other.lower_items()) 103 | 104 | # Copy is required 105 | def copy(self): 106 | return CaseInsensitiveDict(self._store.values()) 107 | 108 | def __repr__(self): 109 | return '%s(%r)' % (self.__class__.__name__, dict(self.items())) 110 | 111 | 112 | class LookupDict(dict): 113 | """Dictionary lookup object.""" 114 | 115 | def __init__(self, name=None): 116 | self.name = name 117 | super(LookupDict, self).__init__() 118 | 119 | def __repr__(self): 120 | return '' % (self.name) 121 | 122 | def __getitem__(self, key): 123 | # We allow fall-through here, so values default to None 124 | 125 | return self.__dict__.get(key, None) 126 | 127 | def get(self, key, default=None): 128 | return self.__dict__.get(key, default) 129 | -------------------------------------------------------------------------------- /unittest/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | import sys 7 | sys.path.append("/data/project/webdirdig") -------------------------------------------------------------------------------- /unittest/webscan_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | import sys 7 | sys.path.append("/data/project/webdirdig") 8 | 9 | from lib.core.webscan import WebScan 10 | from lib.net.myrequests import Requester 11 | 12 | def f(): 13 | print "haha" 14 | 15 | def g(): 16 | print 111 17 | 18 | 19 | def test(t_queue): 20 | t_queue.put("hahah") 21 | t_queue.put("bbbb") 22 | t_queue.put("cccc") 23 | 24 | if __name__ == '__main__': 25 | 26 | from gevent import queue 27 | s = queue.Queue() 28 | print s.qsize() 29 | test(s) 30 | print s.qsize() 31 | 32 | i = 0 33 | while i < s.qsize(): 34 | print s.peek() 35 | i = i+1 36 | 37 | ''' 38 | url = 'http://tx3.cbg.163.com/' 39 | try: 40 | requester = Requester(url) 41 | requester.request("/help/") 42 | 43 | except Exception as e: 44 | print (e.args[0]['message']) 45 | 46 | webscan = WebScan(requester, test_path = '/help/', suffix= None, bdir = True) 47 | print webscan.scan("/help/1/") 48 | 49 | for bakdir_ext in ['.tar.gz', '.zip']: 50 | exist_dir_suffix = '/help//' 51 | ox = exist_dir_suffix.split('/') 52 | print ox 53 | ox.remove('') 54 | ooxx = '' 55 | 56 | ooxx = ''.join(('/' + _) if _ != '' else (_ + bakdir_ext) for _ in ox) 57 | 58 | 59 | print ooxx 60 | ''' 61 | """ 62 | 63 | from gevent import queue 64 | from copy import deepcopy 65 | s = queue.PriorityQueue() 66 | p = queue.Queue() 67 | s.put("a") 68 | s.put("b") 69 | p.queue = deepcopy(s.queue) 70 | print p 71 | 72 | s.get() 73 | print p.qsize() 74 | p.put("test") 75 | print p.queue 76 | """ -------------------------------------------------------------------------------- /webdirdig.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env/python 2 | #-*- coding:utf-8 -*- 3 | 4 | __author__ = 'BlackYe.' 5 | 6 | 7 | from lib.controller.controller import Controller 8 | import sys 9 | 10 | def main(): 11 | Controller(sys.argv[1]) 12 | 13 | 14 | if __name__ == '__main__': main() --------------------------------------------------------------------------------