├── .gitignore
├── README.md
├── config.conf
├── dicts
    ├── web_dir.dic
    └── web_path.dic
├── lib
    ├── __init__.py
    ├── common
    │   ├── __init__.py
    │   ├── fuzzy_string_cmp.py
    │   ├── myexception.py
    │   ├── myfile.py
    │   ├── output.py
    │   ├── terminalsize.py
    │   └── util.py
    ├── controller
    │   ├── __init__.py
    │   ├── controller.py
    │   └── scanner.py
    ├── core
    │   ├── __init__.py
    │   ├── argument.py
    │   └── webscan.py
    └── net
    │   ├── __init__.py
    │   ├── myrequests.py
    │   └── myresponse.py
├── logs
    └── __init__.py
├── result
    └── __init__.py
├── thirdparty_libs
    ├── __init__.py
    ├── chardet
    │   ├── __init__.py
    │   ├── big5freq.py
    │   ├── big5prober.py
    │   ├── chardistribution.py
    │   ├── charsetgroupprober.py
    │   ├── charsetprober.py
    │   ├── cli
    │   │   ├── __init__.py
    │   │   └── chardetect.py
    │   ├── codingstatemachine.py
    │   ├── compat.py
    │   ├── cp949prober.py
    │   ├── enums.py
    │   ├── escprober.py
    │   ├── escsm.py
    │   ├── eucjpprober.py
    │   ├── euckrfreq.py
    │   ├── euckrprober.py
    │   ├── euctwfreq.py
    │   ├── euctwprober.py
    │   ├── gb2312freq.py
    │   ├── gb2312prober.py
    │   ├── hebrewprober.py
    │   ├── jisfreq.py
    │   ├── jpcntx.py
    │   ├── langbulgarianmodel.py
    │   ├── langcyrillicmodel.py
    │   ├── langgreekmodel.py
    │   ├── langhebrewmodel.py
    │   ├── langhungarianmodel.py
    │   ├── langthaimodel.py
    │   ├── langturkishmodel.py
    │   ├── latin1prober.py
    │   ├── mbcharsetprober.py
    │   ├── mbcsgroupprober.py
    │   ├── mbcssm.py
    │   ├── sbcharsetprober.py
    │   ├── sbcsgroupprober.py
    │   ├── sjisprober.py
    │   ├── universaldetector.py
    │   ├── utf8prober.py
    │   └── version.py
    ├── colorama
    │   ├── __init__.py
    │   ├── ansi.py
    │   ├── ansitowin32.py
    │   ├── initialise.py
    │   ├── win32.py
    │   └── winterm.py
    └── requests
    │   ├── __init__.py
    │   ├── adapters.py
    │   ├── api.py
    │   ├── auth.py
    │   ├── cacert.pem
    │   ├── certs.py
    │   ├── compat.py
    │   ├── cookies.py
    │   ├── exceptions.py
    │   ├── hooks.py
    │   ├── models.py
    │   ├── packages
    │       ├── __init__.py
    │       ├── charade
    │       │   ├── __init__.py
    │       │   ├── big5freq.py
    │       │   ├── big5prober.py
    │       │   ├── chardistribution.py
    │       │   ├── charsetgroupprober.py
    │       │   ├── charsetprober.py
    │       │   ├── codingstatemachine.py
    │       │   ├── compat.py
    │       │   ├── constants.py
    │       │   ├── cp949prober.py
    │       │   ├── escprober.py
    │       │   ├── escsm.py
    │       │   ├── eucjpprober.py
    │       │   ├── euckrfreq.py
    │       │   ├── euckrprober.py
    │       │   ├── euctwfreq.py
    │       │   ├── euctwprober.py
    │       │   ├── gb2312freq.py
    │       │   ├── gb2312prober.py
    │       │   ├── hebrewprober.py
    │       │   ├── jisfreq.py
    │       │   ├── jpcntx.py
    │       │   ├── langbulgarianmodel.py
    │       │   ├── langcyrillicmodel.py
    │       │   ├── langgreekmodel.py
    │       │   ├── langhebrewmodel.py
    │       │   ├── langhungarianmodel.py
    │       │   ├── langthaimodel.py
    │       │   ├── latin1prober.py
    │       │   ├── mbcharsetprober.py
    │       │   ├── mbcsgroupprober.py
    │       │   ├── mbcssm.py
    │       │   ├── sbcharsetprober.py
    │       │   ├── sbcsgroupprober.py
    │       │   ├── sjisprober.py
    │       │   ├── universaldetector.py
    │       │   └── utf8prober.py
    │       └── urllib3
    │       │   ├── __init__.py
    │       │   ├── _collections.py
    │       │   ├── connectionpool.py
    │       │   ├── exceptions.py
    │       │   ├── filepost.py
    │       │   ├── packages
    │       │       ├── __init__.py
    │       │       ├── ordered_dict.py
    │       │       ├── six.py
    │       │       └── ssl_match_hostname
    │       │       │   └── __init__.py
    │       │   ├── poolmanager.py
    │       │   ├── request.py
    │       │   ├── response.py
    │       │   └── util.py
    │   ├── sessions.py
    │   ├── status_codes.py
    │   ├── structures.py
    │   └── utils.py
├── unittest
    ├── __init__.py
    └── webscan_test.py
└── webdirdig.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | #env/
11 | #bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | *.egg-info/
21 | .installed.cfg
22 | *.egg
23 | #test
24 | temp/
25 | output/
26 | 
27 | # Installer logs
28 | pip-log.txt
29 | pip-delete-this-directory.txt
30 | 
31 | # Unit test / coverage reports
32 | htmlcov/
33 | .tox/
34 | .coverage
35 | nosetests.xml
36 | coverage.xml
37 | 
38 | # Translations
39 | *.mo
40 | 
41 | # Mr Developer
42 | .mr.developer.cfg
43 | .project
44 | .pydevproject
45 | 
46 | # Rope
47 | .ropeproject
48 | 
49 | # Django stuff:
50 | *.log
51 | *.pot
52 | 
53 | # Sphinx documentation
54 | #docs/_build/
55 | 
56 | # CTag
57 | .tags*
58 | 
59 | # mac
60 | .DS_Store
61 | 
62 | #pycharm
63 | .idea/
64 | 
65 | #pyc 
66 | .pyc
67 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | webdirdig
 2 | ===================================  
 3 | web敏感目录\信息泄漏扫描脚本
 4 | 
 5 | Basic usage
 6 | ===================================  
 7 | 
 8 | ```
 9 | python webdirdig.py http://www.baidu.com
10 | ```


--------------------------------------------------------------------------------
/config.conf:
--------------------------------------------------------------------------------
1 | [dict]
2 | bakdir_exts = ['.zip', '.tar','.rar','.tar.gz','.tar.bz2', '.log']
3 | bakfile_exts = ['.bak', '.swp', '.1' , '.old']
4 | web_dic_path = ./dicts/web_dir.dic
5 | path_dic_path = ./dicts/web_path.dic


--------------------------------------------------------------------------------
/dicts/web_dir.dic:
--------------------------------------------------------------------------------
  1 | .cvs
  2 | log
  3 | logs
  4 | 2014
  5 | monitor
  6 | invoker
  7 | phpmyredis
  8 | phpldapadmin
  9 | .CVS
 10 | zabbix
 11 | nagios
 12 | 0
 13 | 1
 14 | 10
 15 | 100
 16 | pmadb
 17 | resin-admin
 18 | resin-doc
 19 | java
 20 | Java
 21 | simple
 22 | soft
 23 | Soft
 24 | 2012
 25 | 2013
 26 | 01
 27 | 02
 28 | 03
 29 | 04
 30 | 05
 31 | 06
 32 | 07
 33 | 08
 34 | 09
 35 | 1
 36 | 2
 37 | 3
 38 | 4
 39 | 5
 40 | 6
 41 | 7
 42 | 8
 43 | 9
 44 | 11
 45 | 10
 46 | 12
 47 | a
 48 | A
 49 | Add
 50 | add
 51 | Adm
 52 | adm
 53 | administrator
 54 | admin
 55 | Admin
 56 | admin_bak
 57 | Admin_Bak
 58 | Admin_Login
 59 | admin_login
 60 | admin_user
 61 | Admin_user
 62 | admin1
 63 | Admin1
 64 | admin2
 65 | Admin2
 66 | app
 67 | apps
 68 | api
 69 | archive
 70 | archives
 71 | article
 72 | ajax
 73 | BackUp
 74 | backup
 75 | Bak
 76 | bak
 77 | boss
 78 | Boss
 79 | bbs
 80 | cache
 81 | caches
 82 | cacti
 83 | Cacti
 84 | cgi-bin
 85 | check
 86 | Check
 87 | ctc
 88 | classes
 89 | convert
 90 | console
 91 | conf
 92 | Conf
 93 | config
 94 | Config
 95 | data
 96 | database
 97 | dblog
 98 | dashboard
 99 | developer
100 | dede
101 | download
102 | Edit
103 | edit
104 | Editor
105 | editor
106 | events
107 | eWebEditor
108 | ewebeditor
109 | FCKEditor
110 | fckeditor
111 | FCKeditor
112 | file
113 | File
114 | Files
115 | files
116 | gamelog
117 | graphlot
118 | help
119 | htdocs
120 | HouTai
121 | houtai
122 | houtaiguanli
123 | inc
124 | Inc
125 | include
126 | Include
127 | install
128 | jmx-console
129 | jenkins
130 | kindeditor
131 | listinfo
132 | Local
133 | local
134 | Log
135 | logs
136 | log
137 | login
138 | Login
139 | m
140 | M
141 | main
142 | Main
143 | mailman
144 | mailman
145 | Manage
146 | manage
147 | manager
148 | Manager
149 | manager_login
150 | Manager_Login
151 | master
152 | member
153 | Member
154 | MemberLogin
155 | memberlogin
156 | members
157 | Members
158 | Mgr
159 | mgr
160 | My
161 | my
162 | MyAdmin
163 | myadmin
164 | MySQL
165 | Mysql
166 | mysql
167 | mysqlserver
168 | new
169 | news
170 | news_admin
171 | News_Admin
172 | newsadmin
173 | NewsAdmin
174 | output
175 | phpMyAdmin
176 | phpmyadmin
177 | phpRedisAdmin
178 | PMA
179 | pma
180 | popup
181 | printenv
182 | Q
183 | q
184 | R
185 | r
186 | root
187 | Root
188 | script
189 | Script
190 | scripts
191 | servlet
192 | search
193 | sectool
194 | source
195 | shopadmin
196 | Site
197 | site
198 | sql
199 | Sql
200 | SQL_Manager
201 | SQL_System
202 | SqlConf
203 | sqlconf
204 | sqlmanager
205 | static
206 | status
207 | stats
208 | Sys
209 | sys
210 | sys_login
211 | Sys_Login
212 | T
213 | t
214 | Temp
215 | temp
216 | test
217 | Test
218 | tmp
219 | Tmp
220 | tool
221 | tools
222 | Top
223 | top
224 | Upload
225 | upload_files
226 | upload_images
227 | uploadfiles
228 | UploadFiles
229 | uploadimages
230 | ucenter
231 | update
232 | user
233 | User
234 | user_admin
235 | User_Admin
236 | user_login
237 | User_Login
238 | userfiles
239 | UserFiles
240 | users
241 | Users
242 | uc_server
243 | web
244 | Web
245 | web-console
246 | WEB-INF
247 | Web1
248 | web1
249 | Web2
250 | web2
251 | weblogs
252 | wp-content
253 | wp-admin
254 | wwwlog
255 | wwwlogs
256 | X
257 | x
258 | Xampp
259 | xampp
260 | Y
261 | y
262 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/lib/common/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/lib/common/fuzzy_string_cmp.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env/python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | __author__ = 'BlackYe.'
  5 | 
  6 | from difflib import SequenceMatcher
  7 | import re
  8 | 
  9 | from thirdparty_libs import chardet
 10 | 
 11 | 
 12 | class DynamicContentParser:
 13 |     def __init__(self, requester, path, firstPage, secondPage, comparisons=2):
 14 |         self.DYNAMICITY_MARK_LENGTH = 32
 15 |         self.UPPER_RATIO_BOUND = 0.98
 16 |         self.requester = requester
 17 |         self.keyCallback = path
 18 |         self.comparisons = comparisons
 19 |         self.diff_marks = []
 20 |         self.seqMatcher = SequenceMatcher()
 21 |         self.relative_distance_marks(firstPage, secondPage)
 22 | 
 23 |     def relative_distance_marks(self, firstPage, secondPage):
 24 |         if any(page is None for page in (firstPage, secondPage)):
 25 |             # No content
 26 |             return
 27 | 
 28 |         self.seqMatcher.set_seq1(firstPage)
 29 |         self.seqMatcher.set_seq2(secondPage)
 30 |         ratio = self.seqMatcher.quick_ratio()
 31 |         # In case of an intolerable difference turn on dynamicity removal engine
 32 |         if ratio <= self.UPPER_RATIO_BOUND:
 33 |             self.diff_marks += self.compare_diff(firstPage, secondPage)
 34 |             for i in range(self.comparisons):
 35 |                 response = self.requester.request(self.keyCallback)
 36 |                 secondPage = response.body
 37 |                 self.diff_marks += self.compare_diff(firstPage, secondPage)
 38 |             self.cleanPage = self.remove_dynamic_content(firstPage, self.diff_marks)
 39 |             self.seqMatcher.set_seq1(self.cleanPage)
 40 |             self.seqMatcher.set_seq2(self.remove_dynamic_content(secondPage, self.diff_marks))
 41 |             ratio = self.seqMatcher.quick_ratio()
 42 |         else:
 43 |             self.cleanPage = firstPage
 44 |         self.comparisonRatio = ratio
 45 | 
 46 |     def relative_distance(self, page):
 47 |         seqMatcher = SequenceMatcher()
 48 |         seqMatcher.set_seq1(self.cleanPage)
 49 |         seqMatcher.set_seq2(self.remove_dynamic_content(page, self.diff_marks))
 50 |         ratio = seqMatcher.quick_ratio()
 51 |         return ratio
 52 | 
 53 |     def compare_diff(self, firstPage, secondPage):
 54 |         diff_marks = []
 55 | 
 56 |         blocks = list(SequenceMatcher(None, firstPage, secondPage).get_matching_blocks())
 57 | 
 58 |         # Removing too small matching blocks
 59 |         for block in blocks[:]:
 60 |             (_, _, length) = block
 61 | 
 62 |             if length <= self.DYNAMICITY_MARK_LENGTH:
 63 |                 blocks.remove(block)
 64 | 
 65 |         # Making of dynamic markings based on prefix/suffix principle
 66 |         if len(blocks) > 0:
 67 |             blocks.insert(0, None)
 68 |             blocks.append(None)
 69 | 
 70 |             for i in range(len(blocks) - 1):
 71 |                 prefix = firstPage[blocks[i][0]:blocks[i][0] + blocks[i][2]] if blocks[i] else None
 72 |                 suffix = firstPage[blocks[i + 1][0]:blocks[i + 1][0] + blocks[i + 1][2]] if blocks[i + 1] else None
 73 | 
 74 |                 if prefix is None and blocks[i + 1][0] == 0:
 75 |                     continue
 76 | 
 77 |                 if suffix is None and (blocks[i][0] + blocks[i][2] >= len(firstPage)):
 78 |                     continue
 79 | 
 80 |                 diff_marks.append((re.escape(prefix[int(-self.DYNAMICITY_MARK_LENGTH / 2):]) if prefix else None,
 81 |                                      re.escape(suffix[:int(self.DYNAMICITY_MARK_LENGTH / 2)]) if suffix else None))
 82 | 
 83 |         return diff_marks
 84 | 
 85 |     def remove_dynamic_content(self, page, diff_marks):
 86 |         """
 87 |         Removing dynamic content from supplied page basing removal on
 88 |         precalculated dynamic markings
 89 |         """
 90 |         if page:
 91 |             encoding = chardet.detect(page)['encoding']
 92 |             page = page.decode(encoding, errors='replace')
 93 |             for item in diff_marks:
 94 |                 prefix, suffix = item
 95 |                 if prefix is not None:
 96 |                     prefix = prefix.decode(encoding, errors='replace')
 97 |                 if suffix is not None:
 98 |                     suffix = suffix.decode(encoding, errors='replace')
 99 | 
100 |                 if prefix is None and suffix is None:
101 |                     continue
102 |                 elif prefix is None:
103 |                     page = re.sub(r'(?s)^.+{0}'.format(re.escape(suffix)), suffix.replace('\\', r'\\'), page)
104 |                 elif suffix is None:
105 |                     page = re.sub(r'(?s){0}.+$'.format(re.escape(prefix)), prefix.replace('\\', r'\\'), page)
106 |                 else:
107 |                     page = re.sub(r'(?s){0}.+{1}'.format(re.escape(prefix), re.escape(suffix)), "{0}{1}".format(prefix.replace('\\', r'\\'), suffix.replace('\\', r'\\')), page)
108 | 
109 | 
110 |         return page
111 | 


--------------------------------------------------------------------------------
/lib/common/myexception.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | class RequestException(Exception):
 7 |     pass
 8 | 
 9 | class SkipTargetInterrupt(Exception):
10 |     pass


--------------------------------------------------------------------------------
/lib/common/myfile.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #  This program is free software; you can redistribute it and/or modify
  3 | #  it under the terms of the GNU General Public License as published by
  4 | #  the Free Software Foundation; either version 2 of the License, or
  5 | #  (at your option) any later version.
  6 | #
  7 | #  This program is distributed in the hope that it will be useful,
  8 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  9 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 10 | #  GNU General Public License for more details.
 11 | #
 12 | #  You should have received a copy of the GNU General Public License
 13 | #  along with this program; if not, write to the Free Software
 14 | #  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 15 | #  MA 02110-1301, USA.
 16 | #
 17 | #  Author: Mauro Soria
 18 | 
 19 | import os
 20 | import os.path
 21 | 
 22 | 
 23 | class File(object):
 24 |     def __init__(self, *pathComponents):
 25 |         self._path = FileUtils.buildPath(*pathComponents)
 26 |         self.content = None
 27 | 
 28 |     @property
 29 |     def path(self):
 30 |         return self._path
 31 | 
 32 |     @path.setter
 33 |     def path(self, value):
 34 |         raise NotImplemented
 35 | 
 36 |     def isValid(self):
 37 |         return FileUtils.isFile(self.path)
 38 | 
 39 |     def exists(self):
 40 |         return FileUtils.exists(self.path)
 41 | 
 42 |     def canRead(self):
 43 |         return FileUtils.canRead(self.path)
 44 | 
 45 |     def canWrite(self):
 46 |         return FileUtils.canWrite(self.path)
 47 | 
 48 |     def read(self):
 49 |         return FileUtils.read(self.path)
 50 | 
 51 |     def update(self):
 52 |         self.content = self.read()
 53 | 
 54 |     def content(self):
 55 |         if not self.content:
 56 |             self.content = FileUtils.read()
 57 |         return self.content()
 58 | 
 59 |     def getLines(self):
 60 |         for line in FileUtils.getLines(self.path):
 61 |             yield line
 62 | 
 63 |     def __cmp__(self, other):
 64 |         if not isinstance(other, File):
 65 |             raise NotImplemented
 66 |         return cmp(self.content(), other.content())
 67 | 
 68 |     def __enter__(self):
 69 |         return self
 70 | 
 71 |     def __exit__(self, type, value, tb):
 72 |         pass
 73 | 
 74 | 
 75 | class FileUtils(object):
 76 |     @staticmethod
 77 |     def buildPath(*pathComponents):
 78 |         if pathComponents:
 79 |             path = os.path.join(*pathComponents)
 80 |         else:
 81 |             path = ''
 82 |         return path
 83 | 
 84 |     @staticmethod
 85 |     def exists(fileName):
 86 |         return os.access(fileName, os.F_OK)
 87 | 
 88 |     @staticmethod
 89 |     def canRead(fileName):
 90 |         if not os.access(fileName, os.R_OK):
 91 |             return False
 92 |         try:
 93 |             with open(fileName):
 94 |                 pass
 95 |         except IOError:
 96 |             return False
 97 |         return True
 98 | 
 99 |     @staticmethod
100 |     def canWrite(fileName):
101 |         return os.access(fileName, os.W_OK)
102 | 
103 |     @staticmethod
104 |     def read(fileName):
105 |         result = ''
106 |         with open(fileName, 'r') as fd:
107 |             for line in fd.readlines():
108 |                 result += line
109 |         return result
110 | 
111 |     @staticmethod
112 |     def getLines(fileName):
113 |         with open(fileName, 'r', errors="replace") as fd:
114 |             return fd.read().splitlines()
115 | 
116 |     @staticmethod
117 |     def isDir(fileName):
118 |         return os.path.isdir(fileName)
119 | 
120 |     @staticmethod
121 |     def isFile(fileName):
122 |         return os.path.isfile(fileName)
123 | 
124 |     @staticmethod
125 |     def createDirectory(directory):
126 |         if not FileUtils.exists(directory):
127 |             os.makedirs(directory)
128 | 
129 |     @staticmethod
130 |     def sizeHuman(num):
131 |         base = 1024
132 |         for x in ['B ', 'KB', 'MB', 'GB']:
133 |             if num < base and num > -base:
134 |                 return "%3.0f%s" % (num, x)
135 |             num /= base
136 |         return "%3.0f %s" % (num, 'TB')
137 | 
138 |     @staticmethod
139 |     def writeLines(fileName, lines):
140 |         content = None
141 |         if type(lines) is list:
142 |             content = "\n".join(lines)
143 |         else:
144 |             content = lines
145 |         with open(fileName, "w") as f:
146 |             f.writelines(content)
147 | 


--------------------------------------------------------------------------------
/lib/common/output.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env/python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | __author__ = 'BlackYe.'
  5 | 
  6 | import threading
  7 | import time
  8 | import sys
  9 | import platform
 10 | from urlparse import urljoin
 11 | 
 12 | from lib.common.myfile import *
 13 | from thirdparty_libs.colorama import *
 14 | from lib.common.terminalsize import get_terminal_size
 15 | 
 16 | if platform.system() == 'Windows':
 17 |     from thirdparty_libs.colorama.win32 import *
 18 | 
 19 | 
 20 | class ConsoleOutput(object):
 21 |     def __init__(self):
 22 |         init()
 23 |         self.lastLength = 0
 24 |         self.lastOutput = ''
 25 |         self.lastInLine = False
 26 |         self.mutex = threading.Lock()
 27 |         self.blacklists = {}
 28 |         self.mutexCheckedPaths = threading.Lock()
 29 |         self.basePath = None
 30 |         self.errors = 0
 31 | 
 32 |     def inLine(self, string):
 33 |         self.erase()
 34 |         sys.stdout.write(string)
 35 |         sys.stdout.flush()
 36 |         self.lastInLine = True
 37 | 
 38 |     def erase(self):
 39 |         if platform.system() == 'Windows':
 40 |             csbi = GetConsoleScreenBufferInfo()
 41 |             line = "\b" * int(csbi.dwCursorPosition.X)
 42 |             sys.stdout.write(line)
 43 |             width = csbi.dwCursorPosition.X
 44 |             csbi.dwCursorPosition.X = 0
 45 |             FillConsoleOutputCharacter(STDOUT, ' ', width, csbi.dwCursorPosition)
 46 |             sys.stdout.write(line)
 47 |             sys.stdout.flush()
 48 |         else:
 49 |             sys.stdout.write('\033[1K')
 50 |             sys.stdout.write('\033[0G')
 51 | 
 52 |     def newLine(self, string):
 53 |         if self.lastInLine == True:
 54 |             self.erase()
 55 |         if platform.system() == 'Windows':
 56 |             sys.stdout.write(string)
 57 |             sys.stdout.flush()
 58 |             sys.stdout.write('\n')
 59 |             sys.stdout.flush()
 60 |         else:
 61 |             sys.stdout.write(string + '\n')
 62 |         sys.stdout.flush()
 63 |         self.lastInLine = False
 64 |         sys.stdout.flush()
 65 | 
 66 |     def statusReport(self, path, response):
 67 |         with self.mutex:
 68 |             contentLength = None
 69 |             status = response.status
 70 | 
 71 |             # Check blacklist
 72 |             if status in self.blacklists and path in self.blacklists[status]:
 73 |                 return
 74 | 
 75 |             # Format message
 76 |             try:
 77 |                 size = int(response.headers['content-length'])
 78 |             except (KeyError, ValueError):
 79 |                 size = len(response.body)
 80 |             finally:
 81 |                 contentLength = FileUtils.sizeHuman(size)
 82 | 
 83 |             if self.basePath is None:
 84 |                 showPath = urljoin("/", path)
 85 |             else:
 86 |                 showPath = urljoin("/", self.basePath)
 87 |                 showPath = urljoin(showPath, path)
 88 |             message = '[{0}] {1} - {2} - {3}'.format(
 89 |                 time.strftime('%H:%M:%S'),
 90 |                 status,
 91 |                 contentLength.rjust(6, ' '),
 92 |                 showPath
 93 |             )
 94 | 
 95 |             if status == 200:
 96 |                 message = Fore.GREEN + message + Style.RESET_ALL
 97 |             elif status == 403:
 98 |                 message = Fore.BLUE + message + Style.RESET_ALL
 99 |             elif status == 401:
100 |                 message = Fore.YELLOW + message + Style.RESET_ALL
101 |             # Check if redirect
102 |             elif status in [301, 302, 307] and 'location' in [h.lower() for h in response.headers]:
103 |                 message = Fore.CYAN + message + Style.RESET_ALL
104 |                 message += '  ->  {0}'.format(response.headers['location'])
105 | 
106 |             self.newLine(message)
107 | 
108 |     def lastPath(self, path, index, length):
109 |         with self.mutex:
110 |             percentage = lambda x, y: float(x) / float(y) * 100
111 |             x, y = get_terminal_size()
112 |             message = '{0:.2f}% - '.format(percentage(index, length))
113 |             if self.errors > 0:
114 |                 message += Style.BRIGHT + Fore.RED
115 |                 message += 'Errors: {0}'.format(self.errors)
116 |                 message += Style.RESET_ALL
117 |                 message += ' - '
118 |             message += 'Last request to: {0}'.format(path)
119 |             if len(message) > x:
120 |                 message = message[:x]
121 |             self.inLine(message)
122 | 
123 |     def addConnectionError(self):
124 |         self.errors += 1
125 | 
126 |     def error(self, reason):
127 |         with self.mutex:
128 |             stripped = reason.strip()
129 |             start = reason.find(stripped[0])
130 |             end = reason.find(stripped[-1]) + 1
131 |             message = reason[0:start]
132 |             message += Style.BRIGHT + Fore.WHITE + Back.RED
133 |             message += reason[start:end]
134 |             message += Style.RESET_ALL
135 |             message += reason[end:]
136 |             self.newLine(message)
137 | 
138 |     def warning(self, reason):
139 |         message = Style.BRIGHT + Fore.YELLOW + reason + Style.RESET_ALL
140 |         self.newLine(message)
141 | 
142 |     def header(self, text):
143 |         message = Style.BRIGHT + Fore.MAGENTA + text + Style.RESET_ALL
144 |         self.newLine(message)
145 | 
146 |     def config(self, extensions, threads, wordlistSize):
147 |         separator = Fore.MAGENTA + ' | ' + Fore.YELLOW
148 |         config = Style.BRIGHT + Fore.YELLOW
149 |         config += 'Extensions: {0}'.format(Fore.CYAN + extensions + Fore.YELLOW)
150 |         config += separator
151 |         config += 'Threads: {0}'.format(Fore.CYAN + threads + Fore.YELLOW)
152 |         config += separator
153 |         config += 'Wordlist size: {0}'.format(Fore.CYAN + wordlistSize + Fore.YELLOW)
154 |         config += Style.RESET_ALL
155 |         self.newLine(config)
156 | 
157 |     def target(self, target):
158 |         config = Style.BRIGHT + Fore.YELLOW
159 |         config += '\nTarget: {0}\n'.format(Fore.CYAN + target + Fore.YELLOW)
160 |         config += Style.RESET_ALL
161 |         self.newLine(config)
162 | 
163 |     def debug(self, info):
164 |         line = "[{0}] - {1}".format(time.strftime('%H:%M:%S'), info)
165 |         self.newLine(line)
166 | 
167 | output = ConsoleOutput()


--------------------------------------------------------------------------------
/lib/common/terminalsize.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | import os
 7 | import shlex
 8 | import struct
 9 | import platform
10 | import subprocess
11 | 
12 | 
13 | def get_terminal_size():
14 |     """ getTerminalSize()
15 |      - get width and height of console
16 |      - works on linux,os x,windows,cygwin(windows)
17 |      originally retrieved from:
18 |      http://stackoverflow.com/questions/566746/how-to-get-console-window-width-in-python
19 |     """
20 |     current_os = platform.system()
21 |     tuple_xy = None
22 |     if current_os == 'Windows':
23 |         tuple_xy = _get_terminal_size_windows()
24 |         if tuple_xy is None:
25 |             tuple_xy = _get_terminal_size_tput()
26 |             # needed for window's python in cygwin's xterm!
27 |     if current_os in ['Linux', 'Darwin', 'FreeBSD'] or current_os.startswith('CYGWIN'):
28 |         tuple_xy = _get_terminal_size_linux()
29 |     if tuple_xy is None:
30 |         tuple_xy = (80, 25)      # default value
31 |     return tuple_xy
32 | 
33 | 
34 | def _get_terminal_size_windows():
35 |     try:
36 |         from ctypes import windll, create_string_buffer
37 |         # stdin handle is -10
38 |         # stdout handle is -11
39 |         # stderr handle is -12
40 |         h = windll.kernel32.GetStdHandle(-12)
41 |         csbi = create_string_buffer(22)
42 |         res = windll.kernel32.GetConsoleScreenBufferInfo(h, csbi)
43 |         if res:
44 |             (bufx, bufy, curx, cury, wattr,
45 |              left, top, right, bottom,
46 |              maxx, maxy) = struct.unpack("hhhhHhhhhhh", csbi.raw)
47 |             sizex = right - left + 1
48 |             sizey = bottom - top + 1
49 |             return sizex, sizey
50 |     except:
51 |         pass
52 | 
53 | 
54 | def _get_terminal_size_tput():
55 |     # get terminal width
56 |     # src: http://stackoverflow.com/questions/263890/how-do-i-find-the-width-height-of-a-terminal-window
57 |     try:
58 |         cols = int(subprocess.check_call(shlex.split('tput cols')))
59 |         rows = int(subprocess.check_call(shlex.split('tput lines')))
60 |         return (cols, rows)
61 |     except:
62 |         pass
63 | 
64 | 
65 | def _get_terminal_size_linux():
66 |     def ioctl_GWINSZ(fd):
67 |         try:
68 |             import fcntl
69 |             import termios
70 |             cr = struct.unpack('hh',
71 |                                fcntl.ioctl(fd, termios.TIOCGWINSZ, '1234'))
72 |             return cr
73 |         except:
74 |             pass
75 |     cr = ioctl_GWINSZ(0) or ioctl_GWINSZ(1) or ioctl_GWINSZ(2)
76 |     if not cr:
77 |         try:
78 |             fd = os.open(os.ctermid(), os.O_RDONLY)
79 |             cr = ioctl_GWINSZ(fd)
80 |             os.close(fd)
81 |         except:
82 |             pass
83 |     if not cr:
84 |         try:
85 |             cr = (os.environ['LINES'], os.environ['COLUMNS'])
86 |         except:
87 |             return None
88 |     return int(cr[1]), int(cr[0])
89 | 
90 | if __name__ == "__main__":
91 |     sizex, sizey = get_terminal_size()
92 |     print('width =', sizex, 'height =', sizey)
93 | 


--------------------------------------------------------------------------------
/lib/common/util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | 
 7 | import random
 8 | import string
 9 | 
10 | 
11 | class RandomUtils(object):
12 |     @classmethod
13 |     def randString(cls, n=12, omit=None):
14 |         seq = string.ascii_lowercase + string.ascii_uppercase + string.digits
15 |         if omit:
16 |             seq = list(set(seq) - set(omit))
17 |         return ''.join(random.choice(seq) for _ in range(n))


--------------------------------------------------------------------------------
/lib/controller/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/lib/controller/controller.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | from lib.core.argument import ArgumentParse as Argument
 7 | from lib.controller.scanner import Scanner
 8 | 
 9 | from lib.common.output import ConsoleOutput
10 | from lib.net.myrequests import Requester
11 | from lib.common.myexception import RequestException, SkipTargetInterrupt
12 | from lib.common.output import output
13 | 
14 | class Controller(object):
15 | 
16 |     def __init__(self, url):
17 | 
18 |         self.arguments = Argument(url)
19 | 
20 |         output.debug('Start scan......')
21 |         try:
22 |             self.requester = Requester(url, cookie = self.arguments.cookie,
23 |                                             useragent = self.arguments.useragent,
24 |                                             maxPool = self.arguments.max_threads,
25 |                                             maxRetries = self.arguments.max_retrys,
26 |                                             delay = self.arguments.delay,
27 |                                             timeout = self.arguments.http_timeout,
28 |                                             proxy=self.arguments.proxy,
29 |                                             redirect = True)
30 |             self.requester.request("/")
31 | 
32 |         except RequestException as e:
33 |             output.error(e.args[0]['message'])
34 |             raise SkipTargetInterrupt
35 | 
36 |         #matchCallbacks = [self.matchCallback]
37 | 
38 |         self.scanner = Scanner(self.requester,
39 |                                concurrent_num = 20,
40 |                                internal_timeout = 60,
41 |                                dictionary = {'dir_dic' : self.arguments.dir_dic,
42 |                                              'file_dic' : self.arguments.file_dic,
43 |                                              'bakdir_exts' : self.arguments.bakdir_exts,
44 |                                              'bakfile_exts' : self.arguments.bakfile_exts},
45 |                                match_callbacks = [])
46 | 
47 |         try:
48 |             self.run()
49 |         except RequestException as e:
50 |             output.error("Fatal error during site scanning: " + e.args[0]['message'])
51 |             raise SkipTargetInterrupt
52 |         finally:
53 |             pass
54 | 
55 |         output.warning('\nTask Completed')
56 | 
57 | 
58 |     def run(self):
59 |         self.scanner.start()


--------------------------------------------------------------------------------
/lib/core/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/lib/core/argument.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | from ConfigParser import ConfigParser
 7 | 
 8 | class ArgumentParse(object):
 9 | 
10 |     def __init__(self, url):
11 |         self.cookie = ''
12 |         self.useragent = ''
13 |         self.max_threads = 5
14 |         self.max_retrys =  3
15 |         self.delay = 0.5
16 |         self.http_timeout = 30
17 | 
18 |         self.proxy = None
19 | 
20 |         conf = ConfigParser()
21 |         conf.read("config.conf")
22 |         self.bakdir_exts = eval(conf.get('dict', 'bakdir_exts'))
23 |         self.bakfile_exts = eval(conf.get('dict', 'bakfile_exts'))
24 |         self.__load_scan_dic(url, conf.get('dict', 'web_dic_path'), conf.get('dict', 'path_dic_path'))
25 | 
26 |     def __load_scan_dic(self, url, path_dic, file_dic):
27 |         '''
28 |         加载路径探测字典
29 |         :param path_dic:
30 |         :param file_dic:/
31 |         :return:
32 |         '''
33 |         from urlparse import urlparse
34 |         from IPy import IP
35 |         with open(path_dic, 'r') as file:
36 |             self.dir_dic = list(set([each.strip(' \r\n') for each in file.readlines()]))
37 |         file.close()
38 | 
39 |         with open(file_dic, 'r') as file:
40 |             self.file_dic = list(set([each.strip(' \r\n') for each in file.readlines()]))
41 |             try:
42 |                 IP(urlparse(url).netloc.split(':')[0]) #域名形式 www.baidu.com.tar.gz
43 |             except ValueError:
44 |                 self.file_dic.extend(['%s%s' % (urlparse(url).netloc.split(':')[0], webfile) for webfile in self.bakdir_exts])
45 |         file.close()
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/core/webscan.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env/python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | __author__ = 'BlackYe.'
  5 | 
  6 | 
  7 | from lib.common.util import RandomUtils
  8 | from lib.common.fuzzy_string_cmp import DynamicContentParser
  9 | from difflib import SequenceMatcher
 10 | 
 11 | import re
 12 | 
 13 | class WebScan(object):
 14 | 
 15 |     def __init__(self, requester, test_path = None, suffix = None, bdir = False):
 16 | 
 17 |         '''
 18 |         if test_path is None or test_path is "":
 19 |             self.test_path = RandomUtils.randString()
 20 |         else:
 21 |             self.test_path = test_path
 22 |         '''
 23 |         self.test_path = test_path if test_path is not None else ""
 24 |         if suffix is None:
 25 |             self.suffix = RandomUtils.randString()
 26 |         else:
 27 |             self.suffix = suffix
 28 | 
 29 |         self.bdir = bdir
 30 |         self.requester = requester
 31 |         self.tester = None
 32 |         self.redirect_regexp = None
 33 |         self.invalid_status = None
 34 |         self.dynamic_parser = None
 35 |         self.ratio = 0.98
 36 |         self.redirect_status_codes = [301, 302, 307]
 37 |         self.__init_env()
 38 | 
 39 |     def __init_env(self):
 40 |         first_path = self.test_path + self.suffix + '%s' % ('/' if self.bdir else '')
 41 |         first_response = second_response = None
 42 |         try:
 43 |             first_response = self.requester.request(first_path)
 44 |         except Exception,e:
 45 |             return
 46 |         self.invalid_status = first_response.status
 47 |         if self.invalid_status == 404:
 48 |             # Using the response status code is enough :-}
 49 |             return
 50 | 
 51 |         # look for redirects
 52 |         second_path = self.test_path + RandomUtils.randString(omit=self.test_path) + '%s' % ('/' if self.bdir else '')
 53 |         try:
 54 |             second_response = self.requester.request(second_path)
 55 |         except Exception:
 56 |             return
 57 |         if first_response.status in self.redirect_status_codes and first_response.redirect and second_response.redirect:
 58 |             self.redirect_regexp = self.generate_redirect_regexp(first_response.redirect, second_response.redirect)
 59 | 
 60 |         # Analyze response bodies
 61 |         self.dynamic_parser = DynamicContentParser(self.requester, first_path, first_response.body, second_response.body)
 62 |         base_ratio = float("{0:.2f}".format(self.dynamic_parser.comparisonRatio))  # Rounding to 2 decimals
 63 |         # If response length is small, adjust ratio
 64 |         if len(first_response) < 2000:
 65 |             base_ratio -= 0.1
 66 |         if base_ratio < self.ratio:
 67 |             self.ratio = base_ratio
 68 | 
 69 |     def generate_redirect_regexp(self, first_location, second_location):
 70 |         if first_location is None or second_location is None:
 71 |             return None
 72 |         sm = SequenceMatcher(None, first_location, second_location)
 73 |         marks = []
 74 |         for blocks in sm.get_matching_blocks():
 75 |             i = blocks[0]
 76 |             n = blocks[2]
 77 |             # empty block
 78 |             if n == 0:
 79 |                 continue
 80 |             mark = first_location[i:i + n]
 81 |             marks.append(mark)
 82 |         regexp = "^.*{0}.*$".format(".*".join(map(re.escape, marks)))
 83 |         return regexp
 84 | 
 85 |     def scan(self, path):
 86 |         response = None
 87 |         try:
 88 |             response = self.requester.request(path)
 89 |         except Exception:
 90 |             return False
 91 | 
 92 |         if hasattr(response.headers, 'Content-Length') and not int(response.headers.get('Content-Length')): #过滤掉空白页面
 93 |             return False
 94 |         if self.invalid_status == 404 and response.status == 404:
 95 |             return False
 96 |         if response.status >= 400 and response.status < 404:
 97 |             return False
 98 |         if self.invalid_status != response.status:
 99 |             return True
100 |         redirect_to_invalid = False
101 |         if self.redirect_regexp is not None and response.redirect is not None:
102 |             redirect_to_invalid = re.match(self.redirect_regexp, response.redirect) is not None
103 |             # If redirection doesn't match the rule, mark as found
104 |             if not redirect_to_invalid:
105 |                 return True
106 | 
107 |         ratio = self.dynamic_parser.relative_distance(response.body)
108 |         if ratio >= self.ratio:
109 |             return False
110 |         elif redirect_to_invalid and ratio >= (self.ratio - 0.15):
111 |             return False
112 |         return True
113 | 


--------------------------------------------------------------------------------
/lib/net/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/lib/net/myrequests.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env/python
  2 | #-*- coding:utf-8 -*-
  3 | 
  4 | __author__ = 'BlackYe.'
  5 | 
  6 | import random
  7 | import socket
  8 | import time
  9 | 
 10 | from urlparse import urlparse, urljoin
 11 | import thirdparty_libs.requests as requests
 12 | from requests.exceptions import RequestException
 13 | from lib.net.myresponse import *
 14 | from lib.common.myexception import *
 15 | 
 16 | 
 17 | class Requester(object):
 18 | 
 19 |     headers = {
 20 |         'User-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
 21 |         'Accept-Language': 'en-us',
 22 |         'Accept-Encoding': 'identity',
 23 |         'Keep-Alive': '300',
 24 |         'Connection': 'keep-alive',
 25 |         'Cache-Control': 'max-age=0',
 26 |         }
 27 | 
 28 |     def __init__(self, url,
 29 |                      cookie = None,
 30 |                      useragent = None,
 31 |                      maxPool = 1,
 32 |                      maxRetries = 5,
 33 |                      delay = 0,
 34 |                      timeout = 60,
 35 |                      proxy = None,
 36 |                      redirect = True):
 37 |         # if no backslash, append one
 38 |         if not url.endswith('/'):
 39 |             url = url + '/'
 40 |         parsed = urlparse(url)
 41 |         self.basePath = parsed.path
 42 | 
 43 |         # if not protocol specified, set http by default
 44 |         if parsed.scheme != 'http' and parsed.scheme != 'https':
 45 |             parsed = urlparse('http://' + url)
 46 |             self.basePath = parsed.path
 47 |         self.protocol = parsed.scheme
 48 |         if self.protocol != 'http' and self.protocol != 'https':
 49 |             self.protocol = 'http'
 50 |         self.host = parsed.netloc.split(':')[0]
 51 | 
 52 |         # resolve DNS to decrease overhead
 53 |         '''
 54 |         if ip is not None:
 55 |             self.ip = ip
 56 |         else:
 57 |             try:
 58 |                 self.ip = socket.gethostbyname(self.host)
 59 |             except socket.gaierror:
 60 |                 raise RequestException({'message': "Couldn't resolve DNS"})
 61 |         '''
 62 |         self.ip = None
 63 |         self.headers['Host'] = self.host
 64 | 
 65 |         # If no port specified, set default (80, 443)
 66 |         try:
 67 |             self.port = parsed.netloc.split(':')[1]
 68 |         except IndexError:
 69 |             self.port = (443 if self.protocol == 'https' else 80)
 70 | 
 71 |         # Set cookie and user-agent headers
 72 |         if cookie is not None:
 73 |             self.setHeader('Cookie', cookie)
 74 |         if useragent is not None:
 75 |             self.setHeader('User-agent', useragent)
 76 |         self.maxRetries = maxRetries
 77 |         self.maxPool = maxPool
 78 |         self.delay = delay
 79 |         self.timeout = timeout
 80 |         self.pool = None
 81 |         self.proxy = proxy
 82 |         self.redirect = redirect
 83 |         self.randomAgents = None
 84 |         self.session = requests.Session()
 85 | 
 86 |     def setHeader(self, header, content):
 87 |         self.headers[header] = content
 88 | 
 89 |     def setRandomAgents(self, agents):
 90 |         self.randomAgents = list(agents)
 91 | 
 92 |     def unsetRandomAgents(self):
 93 |         self.randomAgents = None
 94 | 
 95 |     def request(self, path):
 96 |         i = 0
 97 |         proxy = None
 98 |         result = None
 99 |         while i <= self.maxRetries:
100 |             try:
101 |                 if self.proxy is not None:
102 |                     proxy = {"https" : self.proxy, "http" : self.proxy}
103 |                 if True:
104 |                     url = "{0}://{1}:{2}".format(self.protocol, self.host, self.port)
105 |                 else:
106 |                     url = "{0}://{1}:{2}".format(self.protocol, self.ip, self.port)
107 |                 url = urljoin(url, self.basePath)
108 | 
109 |                 # Joining with concatenation because a urljoin bug with "::"
110 |                 if not url.endswith('/'):
111 |                     url += "/"
112 |                 if path.startswith('/'):
113 |                     path = path[1:]
114 |                 url += path
115 | 
116 |                 headers = dict(self.headers)
117 |                 '''
118 |                 if self.randomAgents is not None:
119 |                     headers["User-agent"] = random.choice(self.randomAgents)
120 |                 '''
121 |                 headers["Host"] = self.host
122 |                 # include port in Host header if it's non-standard
123 |                 if (self.protocol == "https" and self.port != 443) or (self.protocol == "http" and self.port != 80):
124 |                     headers["Host"]+=":{0}".format(self.port)
125 | 
126 |                 response = self.session.get(url, proxies=proxy, verify=False, allow_redirects=self.redirect, \
127 |                                         headers=headers, timeout=self.timeout)
128 |                 result = Response(response.status_code, response.reason, response.headers, response.content)
129 |                 time.sleep(self.delay)
130 |                 del headers
131 |                 break
132 |             except RequestException,ex:
133 |                 continue
134 |             finally:
135 |                 i = i + 1
136 |         if i > self.maxRetries:
137 |             raise RequestException(\
138 |                 {'message': 'CONNECTION TIMEOUT: There was a problem in the request to: {0}'.format(path)}
139 |                 )
140 |         return result
141 | 


--------------------------------------------------------------------------------
/lib/net/myresponse.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | class Response(object):
 7 | 
 8 |     def __init__(self, status, reason, headers, body):
 9 |         self.status = status
10 |         self.reason = reason
11 |         self.headers = headers
12 |         self.body = body
13 | 
14 |     def __str__(self):
15 |         return self.body
16 | 
17 |     def __int__(self):
18 |         return self.status
19 | 
20 |     def __eq__(self, other):
21 |         return self.status == other.status and self.body == other.body
22 | 
23 |     def __cmp__(self, other):
24 |         return (self.body > other) - (self.body < other)
25 | 
26 |     def __len__(self):
27 |         return len(self.body)
28 | 
29 |     def __hash__(self):
30 |         return hash(self.body)
31 | 
32 |     def __del__(self):
33 |         del self.body
34 |         del self.headers
35 |         del self.status
36 |         del self.reason
37 | 
38 |     @property
39 |     def redirect(self):
40 |         headers = dict((key.lower(), value) for key, value in self.headers.items())
41 |         return headers.get("location")
42 | 
43 |     @property
44 |     def pretty(self):
45 |         try:
46 |             from BeautifulSoup import BeautifulSoup
47 |         except ImportError:
48 |             raise Exception('BeautifulSoup must be installed to get pretty HTML =(')
49 |         html = BeautifulSoup(self.body)
50 |         return html.prettify()


--------------------------------------------------------------------------------
/logs/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/result/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/thirdparty_libs/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | 
19 | from .compat import PY2, PY3
20 | from .universaldetector import UniversalDetector
21 | from .version import __version__, VERSION
22 | 
23 | 
24 | def detect(byte_str):
25 |     """
26 |     Detect the encoding of the given byte string.
27 | 
28 |     :param byte_str:     The byte sequence to examine.
29 |     :type byte_str:      ``bytes`` or ``bytearray``
30 |     """
31 |     if not isinstance(byte_str, bytearray):
32 |         if not isinstance(byte_str, bytes):
33 |             raise TypeError('Expected object of type bytes or bytearray, got: '
34 |                             '{0}'.format(type(byte_str)))
35 |         else:
36 |             byte_str = bytearray(byte_str)
37 |     detector = UniversalDetector()
38 |     detector.feed(byte_str)
39 |     return detector.close()
40 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import BIG5_SM_MODEL
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(Big5Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
38 |         self.distribution_analyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "Big5"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Chinese"
48 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .enums import ProbingState
 29 | from .charsetprober import CharSetProber
 30 | 
 31 | 
 32 | class CharSetGroupProber(CharSetProber):
 33 |     def __init__(self, lang_filter=None):
 34 |         super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
 35 |         self._active_num = 0
 36 |         self.probers = []
 37 |         self._best_guess_prober = None
 38 | 
 39 |     def reset(self):
 40 |         super(CharSetGroupProber, self).reset()
 41 |         self._active_num = 0
 42 |         for prober in self.probers:
 43 |             if prober:
 44 |                 prober.reset()
 45 |                 prober.active = True
 46 |                 self._active_num += 1
 47 |         self._best_guess_prober = None
 48 | 
 49 |     @property
 50 |     def charset_name(self):
 51 |         if not self._best_guess_prober:
 52 |             self.get_confidence()
 53 |             if not self._best_guess_prober:
 54 |                 return None
 55 |         return self._best_guess_prober.charset_name
 56 | 
 57 |     @property
 58 |     def language(self):
 59 |         if not self._best_guess_prober:
 60 |             self.get_confidence()
 61 |             if not self._best_guess_prober:
 62 |                 return None
 63 |         return self._best_guess_prober.language
 64 | 
 65 |     def feed(self, byte_str):
 66 |         for prober in self.probers:
 67 |             if not prober:
 68 |                 continue
 69 |             if not prober.active:
 70 |                 continue
 71 |             state = prober.feed(byte_str)
 72 |             if not state:
 73 |                 continue
 74 |             if state == ProbingState.FOUND_IT:
 75 |                 self._best_guess_prober = prober
 76 |                 return self.state
 77 |             elif state == ProbingState.NOT_ME:
 78 |                 prober.active = False
 79 |                 self._active_num -= 1
 80 |                 if self._active_num <= 0:
 81 |                     self._state = ProbingState.NOT_ME
 82 |                     return self.state
 83 |         return self.state
 84 | 
 85 |     def get_confidence(self):
 86 |         state = self.state
 87 |         if state == ProbingState.FOUND_IT:
 88 |             return 0.99
 89 |         elif state == ProbingState.NOT_ME:
 90 |             return 0.01
 91 |         best_conf = 0.0
 92 |         self._best_guess_prober = None
 93 |         for prober in self.probers:
 94 |             if not prober:
 95 |                 continue
 96 |             if not prober.active:
 97 |                 self.logger.debug('%s not active', prober.charset_name)
 98 |                 continue
 99 |             conf = prober.get_confidence()
100 |             self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
101 |             if best_conf < conf:
102 |                 best_conf = conf
103 |                 self._best_guess_prober = prober
104 |         if not self._best_guess_prober:
105 |             return 0.0
106 |         return best_conf
107 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/charsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | import logging
 30 | import re
 31 | 
 32 | from .enums import ProbingState
 33 | 
 34 | 
 35 | class CharSetProber(object):
 36 | 
 37 |     SHORTCUT_THRESHOLD = 0.95
 38 | 
 39 |     def __init__(self, lang_filter=None):
 40 |         self._state = None
 41 |         self.lang_filter = lang_filter
 42 |         self.logger = logging.getLogger(__name__)
 43 | 
 44 |     def reset(self):
 45 |         self._state = ProbingState.DETECTING
 46 | 
 47 |     @property
 48 |     def charset_name(self):
 49 |         return None
 50 | 
 51 |     def feed(self, buf):
 52 |         pass
 53 | 
 54 |     @property
 55 |     def state(self):
 56 |         return self._state
 57 | 
 58 |     def get_confidence(self):
 59 |         return 0.0
 60 | 
 61 |     @staticmethod
 62 |     def filter_high_byte_only(buf):
 63 |         buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
 64 |         return buf
 65 | 
 66 |     @staticmethod
 67 |     def filter_international_words(buf):
 68 |         """
 69 |         We define three types of bytes:
 70 |         alphabet: english alphabets [a-zA-Z]
 71 |         international: international characters [\x80-\xFF]
 72 |         marker: everything else [^a-zA-Z\x80-\xFF]
 73 | 
 74 |         The input buffer can be thought to contain a series of words delimited
 75 |         by markers. This function works to filter all words that contain at
 76 |         least one international character. All contiguous sequences of markers
 77 |         are replaced by a single space ascii character.
 78 | 
 79 |         This filter applies to all scripts which do not use English characters.
 80 |         """
 81 |         filtered = bytearray()
 82 | 
 83 |         # This regex expression filters out only words that have at-least one
 84 |         # international character. The word may include one marker character at
 85 |         # the end.
 86 |         words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
 87 |                            buf)
 88 | 
 89 |         for word in words:
 90 |             filtered.extend(word[:-1])
 91 | 
 92 |             # If the last character in the word is a marker, replace it with a
 93 |             # space as markers shouldn't affect our analysis (they are used
 94 |             # similarly across all languages and may thus have similar
 95 |             # frequencies).
 96 |             last_char = word[-1:]
 97 |             if not last_char.isalpha() and last_char < b'\x80':
 98 |                 last_char = b' '
 99 |             filtered.extend(last_char)
100 | 
101 |         return filtered
102 | 
103 |     @staticmethod
104 |     def filter_with_english_letters(buf):
105 |         """
106 |         Returns a copy of ``buf`` that retains only the sequences of English
107 |         alphabet and high byte characters that are not between <> characters.
108 |         Also retains English alphabet and high byte characters immediately
109 |         before occurrences of >.
110 | 
111 |         This filter can be applied to all scripts which contain both English
112 |         characters and extended ASCII characters, but is currently only used by
113 |         ``Latin1Prober``.
114 |         """
115 |         filtered = bytearray()
116 |         in_tag = False
117 |         prev = 0
118 | 
119 |         for curr in range(len(buf)):
120 |             # Slice here to get bytes instead of an int with Python 3
121 |             buf_char = buf[curr:curr + 1]
122 |             # Check if we're coming out of or entering an HTML tag
123 |             if buf_char == b'>':
124 |                 in_tag = False
125 |             elif buf_char == b'<':
126 |                 in_tag = True
127 | 
128 |             # If current character is not extended-ASCII and not alphabetic...
129 |             if buf_char < b'\x80' and not buf_char.isalpha():
130 |                 # ...and we're not in a tag
131 |                 if curr > prev and not in_tag:
132 |                     # Keep everything after last non-extended-ASCII,
133 |                     # non-alphabetic character
134 |                     filtered.extend(buf[prev:curr])
135 |                     # Output a space to delimit stretch we kept
136 |                     filtered.extend(b' ')
137 |                 prev = curr + 1
138 | 
139 |         # If we're not in a tag...
140 |         if not in_tag:
141 |             # Keep everything after last non-extended-ASCII, non-alphabetic
142 |             # character
143 |             filtered.extend(buf[prev:])
144 | 
145 |         return filtered
146 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/cli/chardetect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script which takes one or more file paths and reports on their detected
 4 | encodings
 5 | 
 6 | Example::
 7 | 
 8 |     % chardetect somefile someotherfile
 9 |     somefile: windows-1252 with confidence 0.5
10 |     someotherfile: ascii with confidence 1.0
11 | 
12 | If no paths are provided, it takes its input from stdin.
13 | 
14 | """
15 | 
16 | from __future__ import absolute_import, print_function, unicode_literals
17 | 
18 | import argparse
19 | import sys
20 | 
21 | from chardet import __version__
22 | from chardet.compat import PY2
23 | from chardet.universaldetector import UniversalDetector
24 | 
25 | 
26 | def description_of(lines, name='stdin'):
27 |     """
28 |     Return a string describing the probable encoding of a file or
29 |     list of strings.
30 | 
31 |     :param lines: The lines to get the encoding of.
32 |     :type lines: Iterable of bytes
33 |     :param name: Name of file or collection of lines
34 |     :type name: str
35 |     """
36 |     u = UniversalDetector()
37 |     for line in lines:
38 |         line = bytearray(line)
39 |         u.feed(line)
40 |         # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
41 |         if u.done:
42 |             break
43 |     u.close()
44 |     result = u.result
45 |     if PY2:
46 |         name = name.decode(sys.getfilesystemencoding(), 'ignore')
47 |     if result['encoding']:
48 |         return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
49 |                                                      result['confidence'])
50 |     else:
51 |         return '{0}: no result'.format(name)
52 | 
53 | 
54 | def main(argv=None):
55 |     """
56 |     Handles command line arguments and gets things started.
57 | 
58 |     :param argv: List of arguments, as if specified on the command-line.
59 |                  If None, ``sys.argv[1:]`` is used instead.
60 |     :type argv: list of str
61 |     """
62 |     # Get command line arguments
63 |     parser = argparse.ArgumentParser(
64 |         description="Takes one or more file paths and reports their detected \
65 |                      encodings")
66 |     parser.add_argument('input',
67 |                         help='File whose encoding we would like to determine. \
68 |                               (default: stdin)',
69 |                         type=argparse.FileType('rb'), nargs='*',
70 |                         default=[sys.stdin if PY2 else sys.stdin.buffer])
71 |     parser.add_argument('--version', action='version',
72 |                         version='%(prog)s {0}'.format(__version__))
73 |     args = parser.parse_args(argv)
74 | 
75 |     for f in args.input:
76 |         if f.isatty():
77 |             print("You are running chardetect interactively. Press " +
78 |                   "CTRL-D twice at the start of a blank line to signal the " +
79 |                   "end of your input. If you want help, run chardetect " +
80 |                   "--help\n", file=sys.stderr)
81 |         print(description_of(f, f.name))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import logging
29 | 
30 | from .enums import MachineState
31 | 
32 | 
33 | class CodingStateMachine(object):
34 |     """
35 |     A state machine to verify a byte sequence for a particular encoding. For
36 |     each byte the detector receives, it will feed that byte to every active
37 |     state machine available, one byte at a time. The state machine changes its
38 |     state based on its previous state and the byte it receives. There are 3
39 |     states in a state machine that are of interest to an auto-detector:
40 | 
41 |     START state: This is the state to start with, or a legal byte sequence
42 |                  (i.e. a valid code point) for character has been identified.
43 | 
44 |     ME state:  This indicates that the state machine identified a byte sequence
45 |                that is specific to the charset it is designed for and that
46 |                there is no other possible encoding which can contain this byte
47 |                sequence. This will to lead to an immediate positive answer for
48 |                the detector.
49 | 
50 |     ERROR state: This indicates the state machine identified an illegal byte
51 |                  sequence for that encoding. This will lead to an immediate
52 |                  negative answer for this encoding. Detector will exclude this
53 |                  encoding from consideration from here on.
54 |     """
55 |     def __init__(self, sm):
56 |         self._model = sm
57 |         self._curr_byte_pos = 0
58 |         self._curr_char_len = 0
59 |         self._curr_state = None
60 |         self.logger = logging.getLogger(__name__)
61 |         self.reset()
62 | 
63 |     def reset(self):
64 |         self._curr_state = MachineState.START
65 | 
66 |     def next_state(self, c):
67 |         # for each byte we get its class
68 |         # if it is first byte, we also get byte length
69 |         byte_class = self._model['class_table'][c]
70 |         if self._curr_state == MachineState.START:
71 |             self._curr_byte_pos = 0
72 |             self._curr_char_len = self._model['char_len_table'][byte_class]
73 |         # from byte's class and state_table, we get its next state
74 |         curr_state = (self._curr_state * self._model['class_factor']
75 |                       + byte_class)
76 |         self._curr_state = self._model['state_table'][curr_state]
77 |         self._curr_byte_pos += 1
78 |         return self._curr_state
79 | 
80 |     def get_current_charlen(self):
81 |         return self._curr_char_len
82 | 
83 |     def get_coding_state_machine(self):
84 |         return self._model['name']
85 | 
86 |     @property
87 |     def language(self):
88 |         return self._model['language']
89 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Dan Blanchard
 4 | #   Ian Cordasco
 5 | #
 6 | # This library is free software; you can redistribute it and/or
 7 | # modify it under the terms of the GNU Lesser General Public
 8 | # License as published by the Free Software Foundation; either
 9 | # version 2.1 of the License, or (at your option) any later version.
10 | #
11 | # This library is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 | # Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public
17 | # License along with this library; if not, write to the Free Software
18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
19 | # 02110-1301  USA
20 | ######################### END LICENSE BLOCK #########################
21 | 
22 | import sys
23 | 
24 | 
25 | if sys.version_info < (3, 0):
26 |     PY2 = True
27 |     PY3 = False
28 |     base_str = (str, unicode)
29 |     text_type = unicode
30 | else:
31 |     PY2 = False
32 |     PY3 = True
33 |     base_str = (bytes, str)
34 |     text_type = str
35 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .chardistribution import EUCKRDistributionAnalysis
29 | from .codingstatemachine import CodingStateMachine
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .mbcssm import CP949_SM_MODEL
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(CP949Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     @property
44 |     def charset_name(self):
45 |         return "CP949"
46 | 
47 |     @property
48 |     def language(self):
49 |         return "Korean"
50 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/enums.py:
--------------------------------------------------------------------------------
 1 | """
 2 | All of the Enums that are used throughout the chardet package.
 3 | 
 4 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 5 | """
 6 | 
 7 | 
 8 | class InputState(object):
 9 |     """
10 |     This enum represents the different states a universal detector can be in.
11 |     """
12 |     PURE_ASCII = 0
13 |     ESC_ASCII = 1
14 |     HIGH_BYTE = 2
15 | 
16 | 
17 | class LanguageFilter(object):
18 |     """
19 |     This enum represents the different language filters we can apply to a
20 |     ``UniversalDetector``.
21 |     """
22 |     CHINESE_SIMPLIFIED = 0x01
23 |     CHINESE_TRADITIONAL = 0x02
24 |     JAPANESE = 0x04
25 |     KOREAN = 0x08
26 |     NON_CJK = 0x10
27 |     ALL = 0x1F
28 |     CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
29 |     CJK = CHINESE | JAPANESE | KOREAN
30 | 
31 | 
32 | class ProbingState(object):
33 |     """
34 |     This enum represents the different states a prober can be in.
35 |     """
36 |     DETECTING = 0
37 |     FOUND_IT = 1
38 |     NOT_ME = 2
39 | 
40 | 
41 | class MachineState(object):
42 |     """
43 |     This enum represents the different states a state machine can be in.
44 |     """
45 |     START = 0
46 |     ERROR = 1
47 |     ITS_ME = 2
48 | 
49 | 
50 | class SequenceLikelihood(object):
51 |     """
52 |     This enum represents the likelihood of a character following the previous one.
53 |     """
54 |     NEGATIVE = 0
55 |     UNLIKELY = 1
56 |     LIKELY = 2
57 |     POSITIVE = 3
58 | 
59 |     @classmethod
60 |     def get_num_categories(cls):
61 |         """:returns: The number of likelihood categories in the enum."""
62 |         return 4
63 | 
64 | 
65 | class CharacterCategory(object):
66 |     """
67 |     This enum represents the different categories language models for
68 |     ``SingleByteCharsetProber`` put characters into.
69 | 
70 |     Anything less than CONTROL is considered a letter.
71 |     """
72 |     UNDEFINED = 255
73 |     LINE_BREAK = 254
74 |     SYMBOL = 253
75 |     DIGIT = 252
76 |     CONTROL = 251
77 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/escprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is mozilla.org code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .charsetprober import CharSetProber
 29 | from .codingstatemachine import CodingStateMachine
 30 | from .enums import LanguageFilter, ProbingState, MachineState
 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
 32 |                     ISO2022KR_SM_MODEL)
 33 | 
 34 | 
 35 | class EscCharSetProber(CharSetProber):
 36 |     """
 37 |     This CharSetProber uses a "code scheme" approach for detecting encodings,
 38 |     whereby easily recognizable escape or shift sequences are relied on to
 39 |     identify these encodings.
 40 |     """
 41 | 
 42 |     def __init__(self, lang_filter=None):
 43 |         super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
 44 |         self.coding_sm = []
 45 |         if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
 46 |             self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
 47 |             self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
 48 |         if self.lang_filter & LanguageFilter.JAPANESE:
 49 |             self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
 50 |         if self.lang_filter & LanguageFilter.KOREAN:
 51 |             self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
 52 |         self.active_sm_count = None
 53 |         self._detected_charset = None
 54 |         self._detected_language = None
 55 |         self._state = None
 56 |         self.reset()
 57 | 
 58 |     def reset(self):
 59 |         super(EscCharSetProber, self).reset()
 60 |         for coding_sm in self.coding_sm:
 61 |             if not coding_sm:
 62 |                 continue
 63 |             coding_sm.active = True
 64 |             coding_sm.reset()
 65 |         self.active_sm_count = len(self.coding_sm)
 66 |         self._detected_charset = None
 67 |         self._detected_language = None
 68 | 
 69 |     @property
 70 |     def charset_name(self):
 71 |         return self._detected_charset
 72 | 
 73 |     @property
 74 |     def language(self):
 75 |         return self._detected_language
 76 | 
 77 |     def get_confidence(self):
 78 |         if self._detected_charset:
 79 |             return 0.99
 80 |         else:
 81 |             return 0.00
 82 | 
 83 |     def feed(self, byte_str):
 84 |         for c in byte_str:
 85 |             for coding_sm in self.coding_sm:
 86 |                 if not coding_sm or not coding_sm.active:
 87 |                     continue
 88 |                 coding_state = coding_sm.next_state(c)
 89 |                 if coding_state == MachineState.ERROR:
 90 |                     coding_sm.active = False
 91 |                     self.active_sm_count -= 1
 92 |                     if self.active_sm_count <= 0:
 93 |                         self._state = ProbingState.NOT_ME
 94 |                         return self.state
 95 |                 elif coding_state == MachineState.ITS_ME:
 96 |                     self._state = ProbingState.FOUND_IT
 97 |                     self._detected_charset = coding_sm.get_coding_state_machine()
 98 |                     self._detected_language = coding_sm.language
 99 |                     return self.state
100 | 
101 |         return self.state
102 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .enums import ProbingState, MachineState
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import EUCJPDistributionAnalysis
32 | from .jpcntx import EUCJPContextAnalysis
33 | from .mbcssm import EUCJP_SM_MODEL
34 | 
35 | 
36 | class EUCJPProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(EUCJPProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
40 |         self.distribution_analyzer = EUCJPDistributionAnalysis()
41 |         self.context_analyzer = EUCJPContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(EUCJPProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return "EUC-JP"
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
59 |             coding_state = self.coding_sm.next_state(byte_str[i])
60 |             if coding_state == MachineState.ERROR:
61 |                 self.logger.debug('%s %s prober hit error at byte %s',
62 |                                   self.charset_name, self.language, i)
63 |                 self._state = ProbingState.NOT_ME
64 |                 break
65 |             elif coding_state == MachineState.ITS_ME:
66 |                 self._state = ProbingState.FOUND_IT
67 |                 break
68 |             elif coding_state == MachineState.START:
69 |                 char_len = self.coding_sm.get_current_charlen()
70 |                 if i == 0:
71 |                     self._last_char[1] = byte_str[0]
72 |                     self.context_analyzer.feed(self._last_char, char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i - 1:i + 1],
76 |                                                 char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                      char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKR_SM_MODEL
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(EUCKRProber, self).__init__()
37 |         self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
38 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "EUC-KR"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Korean"
48 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTW_SM_MODEL
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(EUCTWProber, self).__init__()
36 |         self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
37 |         self.distribution_analyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "EUC-TW"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Taiwan"
47 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312_SM_MODEL
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(GB2312Prober, self).__init__()
36 |         self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
37 |         self.distribution_analyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "GB2312"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Chinese"
47 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/latin1prober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import ProbingState
 31 | 
 32 | FREQ_CAT_NUM = 4
 33 | 
 34 | UDF = 0  # undefined
 35 | OTH = 1  # other
 36 | ASC = 2  # ascii capital letter
 37 | ASS = 3  # ascii small letter
 38 | ACV = 4  # accent capital vowel
 39 | ACO = 5  # accent capital other
 40 | ASV = 6  # accent small vowel
 41 | ASO = 7  # accent small other
 42 | CLASS_NUM = 8  # total classes
 43 | 
 44 | Latin1_CharToClass = (
 45 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
 46 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
 47 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
 48 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
 49 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
 50 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
 51 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
 52 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
 53 |     OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
 54 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
 55 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
 56 |     ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
 57 |     OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
 58 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
 59 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
 60 |     ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
 61 |     OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
 62 |     OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
 63 |     UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
 64 |     OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
 65 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
 66 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
 67 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
 68 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
 69 |     ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
 70 |     ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
 71 |     ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
 72 |     ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
 73 |     ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
 74 |     ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
 75 |     ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
 76 |     ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 77 | )
 78 | 
 79 | # 0 : illegal
 80 | # 1 : very unlikely
 81 | # 2 : normal
 82 | # 3 : very likely
 83 | Latin1ClassModel = (
 84 | # UDF OTH ASC ASS ACV ACO ASV ASO
 85 |     0,  0,  0,  0,  0,  0,  0,  0,  # UDF
 86 |     0,  3,  3,  3,  3,  3,  3,  3,  # OTH
 87 |     0,  3,  3,  3,  3,  3,  3,  3,  # ASC
 88 |     0,  3,  3,  3,  1,  1,  3,  3,  # ASS
 89 |     0,  3,  3,  3,  1,  2,  1,  2,  # ACV
 90 |     0,  3,  3,  3,  3,  3,  3,  3,  # ACO
 91 |     0,  3,  1,  3,  1,  1,  1,  3,  # ASV
 92 |     0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 93 | )
 94 | 
 95 | 
 96 | class Latin1Prober(CharSetProber):
 97 |     def __init__(self):
 98 |         super(Latin1Prober, self).__init__()
 99 |         self._last_char_class = None
100 |         self._freq_counter = None
101 |         self.reset()
102 | 
103 |     def reset(self):
104 |         self._last_char_class = OTH
105 |         self._freq_counter = [0] * FREQ_CAT_NUM
106 |         CharSetProber.reset(self)
107 | 
108 |     @property
109 |     def charset_name(self):
110 |         return "ISO-8859-1"
111 | 
112 |     @property
113 |     def language(self):
114 |         return ""
115 | 
116 |     def feed(self, byte_str):
117 |         byte_str = self.filter_with_english_letters(byte_str)
118 |         for c in byte_str:
119 |             char_class = Latin1_CharToClass[c]
120 |             freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
121 |                                     + char_class]
122 |             if freq == 0:
123 |                 self._state = ProbingState.NOT_ME
124 |                 break
125 |             self._freq_counter[freq] += 1
126 |             self._last_char_class = char_class
127 | 
128 |         return self.state
129 | 
130 |     def get_confidence(self):
131 |         if self.state == ProbingState.NOT_ME:
132 |             return 0.01
133 | 
134 |         total = sum(self._freq_counter)
135 |         if total < 0.01:
136 |             confidence = 0.0
137 |         else:
138 |             confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
139 |                           / total)
140 |         if confidence < 0.0:
141 |             confidence = 0.0
142 |         # lower the confidence of latin1 so that other more accurate
143 |         # detector can take priority.
144 |         confidence = confidence * 0.73
145 |         return confidence
146 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetprober import CharSetProber
31 | from .enums import ProbingState, MachineState
32 | 
33 | 
34 | class MultiByteCharSetProber(CharSetProber):
35 |     """
36 |     MultiByteCharSetProber
37 |     """
38 | 
39 |     def __init__(self, lang_filter=None):
40 |         super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
41 |         self.distribution_analyzer = None
42 |         self.coding_sm = None
43 |         self._last_char = [0, 0]
44 | 
45 |     def reset(self):
46 |         super(MultiByteCharSetProber, self).reset()
47 |         if self.coding_sm:
48 |             self.coding_sm.reset()
49 |         if self.distribution_analyzer:
50 |             self.distribution_analyzer.reset()
51 |         self._last_char = [0, 0]
52 | 
53 |     @property
54 |     def charset_name(self):
55 |         raise NotImplementedError
56 | 
57 |     @property
58 |     def language(self):
59 |         raise NotImplementedError
60 | 
61 |     def feed(self, byte_str):
62 |         for i in range(len(byte_str)):
63 |             coding_state = self.coding_sm.next_state(byte_str[i])
64 |             if coding_state == MachineState.ERROR:
65 |                 self.logger.debug('%s %s prober hit error at byte %s',
66 |                                   self.charset_name, self.language, i)
67 |                 self._state = ProbingState.NOT_ME
68 |                 break
69 |             elif coding_state == MachineState.ITS_ME:
70 |                 self._state = ProbingState.FOUND_IT
71 |                 break
72 |             elif coding_state == MachineState.START:
73 |                 char_len = self.coding_sm.get_current_charlen()
74 |                 if i == 0:
75 |                     self._last_char[1] = byte_str[0]
76 |                     self.distribution_analyzer.feed(self._last_char, char_len)
77 |                 else:
78 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
79 |                                                     char_len)
80 | 
81 |         self._last_char[0] = byte_str[-1]
82 | 
83 |         if self.state == ProbingState.DETECTING:
84 |             if (self.distribution_analyzer.got_enough_data() and
85 |                     (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
86 |                 self._state = ProbingState.FOUND_IT
87 | 
88 |         return self.state
89 | 
90 |     def get_confidence(self):
91 |         return self.distribution_analyzer.get_confidence()
92 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self, lang_filter=None):
43 |         super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
44 |         self.probers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/sbcharsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood
 31 | 
 32 | 
 33 | class SingleByteCharSetProber(CharSetProber):
 34 |     SAMPLE_SIZE = 64
 35 |     SB_ENOUGH_REL_THRESHOLD = 1024  #  0.25 * SAMPLE_SIZE^2
 36 |     POSITIVE_SHORTCUT_THRESHOLD = 0.95
 37 |     NEGATIVE_SHORTCUT_THRESHOLD = 0.05
 38 | 
 39 |     def __init__(self, model, reversed=False, name_prober=None):
 40 |         super(SingleByteCharSetProber, self).__init__()
 41 |         self._model = model
 42 |         # TRUE if we need to reverse every pair in the model lookup
 43 |         self._reversed = reversed
 44 |         # Optional auxiliary prober for name decision
 45 |         self._name_prober = name_prober
 46 |         self._last_order = None
 47 |         self._seq_counters = None
 48 |         self._total_seqs = None
 49 |         self._total_char = None
 50 |         self._freq_char = None
 51 |         self.reset()
 52 | 
 53 |     def reset(self):
 54 |         super(SingleByteCharSetProber, self).reset()
 55 |         # char order of last character
 56 |         self._last_order = 255
 57 |         self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
 58 |         self._total_seqs = 0
 59 |         self._total_char = 0
 60 |         # characters that fall in our sampling range
 61 |         self._freq_char = 0
 62 | 
 63 |     @property
 64 |     def charset_name(self):
 65 |         if self._name_prober:
 66 |             return self._name_prober.charset_name
 67 |         else:
 68 |             return self._model['charset_name']
 69 | 
 70 |     @property
 71 |     def language(self):
 72 |         if self._name_prober:
 73 |             return self._name_prober.language
 74 |         else:
 75 |             return self._model.get('language')
 76 | 
 77 |     def feed(self, byte_str):
 78 |         if not self._model['keep_english_letter']:
 79 |             byte_str = self.filter_international_words(byte_str)
 80 |         if not byte_str:
 81 |             return self.state
 82 |         char_to_order_map = self._model['char_to_order_map']
 83 |         for i, c in enumerate(byte_str):
 84 |             # XXX: Order is in range 1-64, so one would think we want 0-63 here,
 85 |             #      but that leads to 27 more test failures than before.
 86 |             order = char_to_order_map[c]
 87 |             # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
 88 |             #      CharacterCategory.SYMBOL is actually 253, so we use CONTROL
 89 |             #      to make it closer to the original intent. The only difference
 90 |             #      is whether or not we count digits and control characters for
 91 |             #      _total_char purposes.
 92 |             if order < CharacterCategory.CONTROL:
 93 |                 self._total_char += 1
 94 |             if order < self.SAMPLE_SIZE:
 95 |                 self._freq_char += 1
 96 |                 if self._last_order < self.SAMPLE_SIZE:
 97 |                     self._total_seqs += 1
 98 |                     if not self._reversed:
 99 |                         i = (self._last_order * self.SAMPLE_SIZE) + order
100 |                         model = self._model['precedence_matrix'][i]
101 |                     else:  # reverse the order of the letters in the lookup
102 |                         i = (order * self.SAMPLE_SIZE) + self._last_order
103 |                         model = self._model['precedence_matrix'][i]
104 |                     self._seq_counters[model] += 1
105 |             self._last_order = order
106 | 
107 |         charset_name = self._model['charset_name']
108 |         if self.state == ProbingState.DETECTING:
109 |             if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
110 |                 confidence = self.get_confidence()
111 |                 if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
112 |                     self.logger.debug('%s confidence = %s, we have a winner',
113 |                                       charset_name, confidence)
114 |                     self._state = ProbingState.FOUND_IT
115 |                 elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
116 |                     self.logger.debug('%s confidence = %s, below negative '
117 |                                       'shortcut threshhold %s', charset_name,
118 |                                       confidence,
119 |                                       self.NEGATIVE_SHORTCUT_THRESHOLD)
120 |                     self._state = ProbingState.NOT_ME
121 | 
122 |         return self.state
123 | 
124 |     def get_confidence(self):
125 |         r = 0.01
126 |         if self._total_seqs > 0:
127 |             r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
128 |                  self._total_seqs / self._model['typical_positive_ratio'])
129 |             r = r * self._freq_char / self._total_char
130 |             if r >= 1.0:
131 |                 r = 0.99
132 |         return r
133 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | from .langturkishmodel import Latin5TurkishModel
41 | 
42 | 
43 | class SBCSGroupProber(CharSetGroupProber):
44 |     def __init__(self):
45 |         super(SBCSGroupProber, self).__init__()
46 |         self.probers = [
47 |             SingleByteCharSetProber(Win1251CyrillicModel),
48 |             SingleByteCharSetProber(Koi8rModel),
49 |             SingleByteCharSetProber(Latin5CyrillicModel),
50 |             SingleByteCharSetProber(MacCyrillicModel),
51 |             SingleByteCharSetProber(Ibm866Model),
52 |             SingleByteCharSetProber(Ibm855Model),
53 |             SingleByteCharSetProber(Latin7GreekModel),
54 |             SingleByteCharSetProber(Win1253GreekModel),
55 |             SingleByteCharSetProber(Latin5BulgarianModel),
56 |             SingleByteCharSetProber(Win1251BulgarianModel),
57 |             # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
58 |             #       after we retrain model.
59 |             # SingleByteCharSetProber(Latin2HungarianModel),
60 |             # SingleByteCharSetProber(Win1250HungarianModel),
61 |             SingleByteCharSetProber(TIS620ThaiModel),
62 |             SingleByteCharSetProber(Latin5TurkishModel),
63 |         ]
64 |         hebrew_prober = HebrewProber()
65 |         logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
66 |                                                         False, hebrew_prober)
67 |         visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
68 |                                                        hebrew_prober)
69 |         hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
70 |         self.probers.extend([hebrew_prober, logical_hebrew_prober,
71 |                              visual_hebrew_prober])
72 | 
73 |         self.reset()
74 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/sjisprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import SJISDistributionAnalysis
31 | from .jpcntx import SJISContextAnalysis
32 | from .mbcssm import SJIS_SM_MODEL
33 | from .enums import ProbingState, MachineState
34 | 
35 | 
36 | class SJISProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(SJISProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
40 |         self.distribution_analyzer = SJISDistributionAnalysis()
41 |         self.context_analyzer = SJISContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(SJISProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return self.context_analyzer.charset_name
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             coding_state = self.coding_sm.next_state(byte_str[i])
59 |             if coding_state == MachineState.ERROR:
60 |                 self.logger.debug('%s %s prober hit error at byte %s',
61 |                                   self.charset_name, self.language, i)
62 |                 self._state = ProbingState.NOT_ME
63 |                 break
64 |             elif coding_state == MachineState.ITS_ME:
65 |                 self._state = ProbingState.FOUND_IT
66 |                 break
67 |             elif coding_state == MachineState.START:
68 |                 char_len = self.coding_sm.get_current_charlen()
69 |                 if i == 0:
70 |                     self._last_char[1] = byte_str[0]
71 |                     self.context_analyzer.feed(self._last_char[2 - char_len:],
72 |                                                char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
76 |                                                         - char_len], char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                     char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .charsetprober import CharSetProber
29 | from .enums import ProbingState, MachineState
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8_SM_MODEL
32 | 
33 | 
34 | 
35 | class UTF8Prober(CharSetProber):
36 |     ONE_CHAR_PROB = 0.5
37 | 
38 |     def __init__(self):
39 |         super(UTF8Prober, self).__init__()
40 |         self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
41 |         self._num_mb_chars = None
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(UTF8Prober, self).reset()
46 |         self.coding_sm.reset()
47 |         self._num_mb_chars = 0
48 | 
49 |     @property
50 |     def charset_name(self):
51 |         return "utf-8"
52 | 
53 |     @property
54 |     def language(self):
55 |         return ""
56 | 
57 |     def feed(self, byte_str):
58 |         for c in byte_str:
59 |             coding_state = self.coding_sm.next_state(c)
60 |             if coding_state == MachineState.ERROR:
61 |                 self._state = ProbingState.NOT_ME
62 |                 break
63 |             elif coding_state == MachineState.ITS_ME:
64 |                 self._state = ProbingState.FOUND_IT
65 |                 break
66 |             elif coding_state == MachineState.START:
67 |                 if self.coding_sm.get_current_charlen() >= 2:
68 |                     self._num_mb_chars += 1
69 | 
70 |         if self.state == ProbingState.DETECTING:
71 |             if self.get_confidence() > self.SHORTCUT_THRESHOLD:
72 |                 self._state = ProbingState.FOUND_IT
73 | 
74 |         return self.state
75 | 
76 |     def get_confidence(self):
77 |         unlike = 0.99
78 |         if self._num_mb_chars < 6:
79 |             unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
80 |             return 1.0 - unlike
81 |         else:
82 |             return unlike
83 | 


--------------------------------------------------------------------------------
/thirdparty_libs/chardet/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module exists only to simplify retrieving the version number of chardet
 3 | from within setup.py and from chardet subpackages.
 4 | 
 5 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 6 | """
 7 | 
 8 | __version__ = "3.0.4"
 9 | VERSION = __version__.split('.')
10 | 


--------------------------------------------------------------------------------
/thirdparty_libs/colorama/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
2 | from .initialise import init, deinit, reinit, colorama_text
3 | from .ansi import Fore, Back, Style, Cursor
4 | from .ansitowin32 import AnsiToWin32
5 | 
6 | __version__ = '0.3.3'
7 | 
8 | 


--------------------------------------------------------------------------------
/thirdparty_libs/colorama/ansi.py:
--------------------------------------------------------------------------------
  1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
  2 | '''
  3 | This module generates ANSI character codes to printing colors to terminals.
  4 | See: http://en.wikipedia.org/wiki/ANSI_escape_code
  5 | '''
  6 | 
  7 | CSI = '\033['
  8 | OSC = '\033]'
  9 | BEL = '\007'
 10 | 
 11 | 
 12 | def code_to_chars(code):
 13 |     return CSI + str(code) + 'm'
 14 | 
 15 | def set_title(title):
 16 |     return OSC + '2;' + title + BEL
 17 | 
 18 | def clear_screen(mode=2):
 19 |     return CSI + str(mode) + 'J'
 20 | 
 21 | def clear_line(mode=2):
 22 |     return CSI + str(mode) + 'K'
 23 | 
 24 | 
 25 | class AnsiCodes(object):
 26 |     def __init__(self):
 27 |         # the subclasses declare class attributes which are numbers.
 28 |         # Upon instantiation we define instance attributes, which are the same
 29 |         # as the class attributes but wrapped with the ANSI escape sequence
 30 |         for name in dir(self):
 31 |             if not name.startswith('_'):
 32 |                 value = getattr(self, name)
 33 |                 setattr(self, name, code_to_chars(value))
 34 | 
 35 | 
 36 | class AnsiCursor(object):
 37 |     def UP(self, n=1):
 38 |         return CSI + str(n) + 'A'
 39 |     def DOWN(self, n=1):
 40 |         return CSI + str(n) + 'B'
 41 |     def FORWARD(self, n=1):
 42 |         return CSI + str(n) + 'C'
 43 |     def BACK(self, n=1):
 44 |         return CSI + str(n) + 'D'
 45 |     def POS(self, x=1, y=1):
 46 |         return CSI + str(y) + ';' + str(x) + 'H'
 47 | 
 48 | 
 49 | class AnsiFore(AnsiCodes):
 50 |     BLACK           = 30
 51 |     RED             = 31
 52 |     GREEN           = 32
 53 |     YELLOW          = 33
 54 |     BLUE            = 34
 55 |     MAGENTA         = 35
 56 |     CYAN            = 36
 57 |     WHITE           = 37
 58 |     RESET           = 39
 59 | 
 60 |     # These are fairly well supported, but not part of the standard.
 61 |     LIGHTBLACK_EX   = 90
 62 |     LIGHTRED_EX     = 91
 63 |     LIGHTGREEN_EX   = 92
 64 |     LIGHTYELLOW_EX  = 93
 65 |     LIGHTBLUE_EX    = 94
 66 |     LIGHTMAGENTA_EX = 95
 67 |     LIGHTCYAN_EX    = 96
 68 |     LIGHTWHITE_EX   = 97
 69 | 
 70 | 
 71 | class AnsiBack(AnsiCodes):
 72 |     BLACK           = 40
 73 |     RED             = 41
 74 |     GREEN           = 42
 75 |     YELLOW          = 43
 76 |     BLUE            = 44
 77 |     MAGENTA         = 45
 78 |     CYAN            = 46
 79 |     WHITE           = 47
 80 |     RESET           = 49
 81 | 
 82 |     # These are fairly well supported, but not part of the standard.
 83 |     LIGHTBLACK_EX   = 100
 84 |     LIGHTRED_EX     = 101
 85 |     LIGHTGREEN_EX   = 102
 86 |     LIGHTYELLOW_EX  = 103
 87 |     LIGHTBLUE_EX    = 104
 88 |     LIGHTMAGENTA_EX = 105
 89 |     LIGHTCYAN_EX    = 106
 90 |     LIGHTWHITE_EX   = 107
 91 | 
 92 | 
 93 | class AnsiStyle(AnsiCodes):
 94 |     BRIGHT    = 1
 95 |     DIM       = 2
 96 |     NORMAL    = 22
 97 |     RESET_ALL = 0
 98 | 
 99 | Fore   = AnsiFore()
100 | Back   = AnsiBack()
101 | Style  = AnsiStyle()
102 | Cursor = AnsiCursor()
103 | 


--------------------------------------------------------------------------------
/thirdparty_libs/colorama/initialise.py:
--------------------------------------------------------------------------------
 1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
 2 | import atexit
 3 | import contextlib
 4 | import sys
 5 | 
 6 | from .ansitowin32 import AnsiToWin32
 7 | 
 8 | 
 9 | orig_stdout = None
10 | orig_stderr = None
11 | 
12 | wrapped_stdout = None
13 | wrapped_stderr = None
14 | 
15 | atexit_done = False
16 | 
17 | 
18 | def reset_all():
19 |     AnsiToWin32(orig_stdout).reset_all()
20 | 
21 | 
22 | def init(autoreset=False, convert=None, strip=None, wrap=True):
23 | 
24 |     if not wrap and any([autoreset, convert, strip]):
25 |         raise ValueError('wrap=False conflicts with any other arg=True')
26 | 
27 |     global wrapped_stdout, wrapped_stderr
28 |     global orig_stdout, orig_stderr
29 | 
30 |     orig_stdout = sys.stdout
31 |     orig_stderr = sys.stderr
32 | 
33 |     if sys.stdout is None:
34 |         wrapped_stdout = None
35 |     else:
36 |         sys.stdout = wrapped_stdout = \
37 |             wrap_stream(orig_stdout, convert, strip, autoreset, wrap)
38 |     if sys.stderr is None:
39 |         wrapped_stderr = None
40 |     else:
41 |         sys.stderr = wrapped_stderr = \
42 |             wrap_stream(orig_stderr, convert, strip, autoreset, wrap)
43 | 
44 |     global atexit_done
45 |     if not atexit_done:
46 |         atexit.register(reset_all)
47 |         atexit_done = True
48 | 
49 | 
50 | def deinit():
51 |     if orig_stdout is not None:
52 |         sys.stdout = orig_stdout
53 |     if orig_stderr is not None:
54 |         sys.stderr = orig_stderr
55 | 
56 | 
57 | @contextlib.contextmanager
58 | def colorama_text(*args, **kwargs):
59 |     init(*args, **kwargs)
60 |     try:
61 |         yield
62 |     finally:
63 |         deinit()
64 | 
65 | 
66 | def reinit():
67 |     if wrapped_stdout is not None:
68 |         sys.stdout = wrapped_stdout
69 |     if wrapped_stderr is not None:
70 |         sys.stderr = wrapped_stderr
71 | 
72 | 
73 | def wrap_stream(stream, convert, strip, autoreset, wrap):
74 |     if wrap:
75 |         wrapper = AnsiToWin32(stream,
76 |             convert=convert, strip=strip, autoreset=autoreset)
77 |         if wrapper.should_wrap():
78 |             stream = wrapper.stream
79 |     return stream
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/thirdparty_libs/colorama/win32.py:
--------------------------------------------------------------------------------
  1 | # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file.
  2 | 
  3 | # from winbase.h
  4 | STDOUT = -11
  5 | STDERR = -12
  6 | 
  7 | try:
  8 |     import ctypes
  9 |     from ctypes import LibraryLoader
 10 |     windll = LibraryLoader(ctypes.WinDLL)
 11 |     from ctypes import wintypes
 12 | except (AttributeError, ImportError):
 13 |     windll = None
 14 |     SetConsoleTextAttribute = lambda *_: None
 15 |     winapi_test = lambda *_: None
 16 | else:
 17 |     from ctypes import byref, Structure, c_char, POINTER
 18 | 
 19 |     COORD = wintypes._COORD
 20 | 
 21 |     class CONSOLE_SCREEN_BUFFER_INFO(Structure):
 22 |         """struct in wincon.h."""
 23 |         _fields_ = [
 24 |             ("dwSize", COORD),
 25 |             ("dwCursorPosition", COORD),
 26 |             ("wAttributes", wintypes.WORD),
 27 |             ("srWindow", wintypes.SMALL_RECT),
 28 |             ("dwMaximumWindowSize", COORD),
 29 |         ]
 30 |         def __str__(self):
 31 |             return '(%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d)' % (
 32 |                 self.dwSize.Y, self.dwSize.X
 33 |                 , self.dwCursorPosition.Y, self.dwCursorPosition.X
 34 |                 , self.wAttributes
 35 |                 , self.srWindow.Top, self.srWindow.Left, self.srWindow.Bottom, self.srWindow.Right
 36 |                 , self.dwMaximumWindowSize.Y, self.dwMaximumWindowSize.X
 37 |             )
 38 | 
 39 |     _GetStdHandle = windll.kernel32.GetStdHandle
 40 |     _GetStdHandle.argtypes = [
 41 |         wintypes.DWORD,
 42 |     ]
 43 |     _GetStdHandle.restype = wintypes.HANDLE
 44 | 
 45 |     _GetConsoleScreenBufferInfo = windll.kernel32.GetConsoleScreenBufferInfo
 46 |     _GetConsoleScreenBufferInfo.argtypes = [
 47 |         wintypes.HANDLE,
 48 |         POINTER(CONSOLE_SCREEN_BUFFER_INFO),
 49 |     ]
 50 |     _GetConsoleScreenBufferInfo.restype = wintypes.BOOL
 51 | 
 52 |     _SetConsoleTextAttribute = windll.kernel32.SetConsoleTextAttribute
 53 |     _SetConsoleTextAttribute.argtypes = [
 54 |         wintypes.HANDLE,
 55 |         wintypes.WORD,
 56 |     ]
 57 |     _SetConsoleTextAttribute.restype = wintypes.BOOL
 58 | 
 59 |     _SetConsoleCursorPosition = windll.kernel32.SetConsoleCursorPosition
 60 |     _SetConsoleCursorPosition.argtypes = [
 61 |         wintypes.HANDLE,
 62 |         COORD,
 63 |     ]
 64 |     _SetConsoleCursorPosition.restype = wintypes.BOOL
 65 | 
 66 |     _FillConsoleOutputCharacterA = windll.kernel32.FillConsoleOutputCharacterA
 67 |     _FillConsoleOutputCharacterA.argtypes = [
 68 |         wintypes.HANDLE,
 69 |         c_char,
 70 |         wintypes.DWORD,
 71 |         COORD,
 72 |         POINTER(wintypes.DWORD),
 73 |     ]
 74 |     _FillConsoleOutputCharacterA.restype = wintypes.BOOL
 75 | 
 76 |     _FillConsoleOutputAttribute = windll.kernel32.FillConsoleOutputAttribute
 77 |     _FillConsoleOutputAttribute.argtypes = [
 78 |         wintypes.HANDLE,
 79 |         wintypes.WORD,
 80 |         wintypes.DWORD,
 81 |         COORD,
 82 |         POINTER(wintypes.DWORD),
 83 |     ]
 84 |     _FillConsoleOutputAttribute.restype = wintypes.BOOL
 85 | 
 86 |     _SetConsoleTitleW = windll.kernel32.SetConsoleTitleA
 87 |     _SetConsoleTitleW.argtypes = [
 88 |         wintypes.LPCSTR
 89 |     ]
 90 |     _SetConsoleTitleW.restype = wintypes.BOOL
 91 | 
 92 |     handles = {
 93 |         STDOUT: _GetStdHandle(STDOUT),
 94 |         STDERR: _GetStdHandle(STDERR),
 95 |     }
 96 | 
 97 |     def winapi_test():
 98 |         handle = handles[STDOUT]
 99 |         csbi = CONSOLE_SCREEN_BUFFER_INFO()
100 |         success = _GetConsoleScreenBufferInfo(
101 |             handle, byref(csbi))
102 |         return bool(success)
103 | 
104 |     def GetConsoleScreenBufferInfo(stream_id=STDOUT):
105 |         handle = handles[stream_id]
106 |         csbi = CONSOLE_SCREEN_BUFFER_INFO()
107 |         success = _GetConsoleScreenBufferInfo(
108 |             handle, byref(csbi))
109 |         return csbi
110 | 
111 |     def SetConsoleTextAttribute(stream_id, attrs):
112 |         handle = handles[stream_id]
113 |         return _SetConsoleTextAttribute(handle, attrs)
114 | 
115 |     def SetConsoleCursorPosition(stream_id, position, adjust=True):
116 |         position = COORD(*position)
117 |         # If the position is out of range, do nothing.
118 |         if position.Y <= 0 or position.X <= 0:
119 |             return
120 |         # Adjust for Windows' SetConsoleCursorPosition:
121 |         #    1. being 0-based, while ANSI is 1-based.
122 |         #    2. expecting (x,y), while ANSI uses (y,x).
123 |         adjusted_position = COORD(position.Y - 1, position.X - 1)
124 |         if adjust:
125 |             # Adjust for viewport's scroll position
126 |             sr = GetConsoleScreenBufferInfo(STDOUT).srWindow
127 |             adjusted_position.Y += sr.Top
128 |             adjusted_position.X += sr.Left
129 |         # Resume normal processing
130 |         handle = handles[stream_id]
131 |         return _SetConsoleCursorPosition(handle, adjusted_position)
132 | 
133 |     def FillConsoleOutputCharacter(stream_id, char, length, start):
134 |         handle = handles[stream_id]
135 |         char = c_char(char.encode())
136 |         length = wintypes.DWORD(length)
137 |         num_written = wintypes.DWORD(0)
138 |         # Note that this is hard-coded for ANSI (vs wide) bytes.
139 |         success = _FillConsoleOutputCharacterA(
140 |             handle, char, length, start, byref(num_written))
141 |         return num_written.value
142 | 
143 |     def FillConsoleOutputAttribute(stream_id, attr, length, start):
144 |         ''' FillConsoleOutputAttribute( hConsole, csbi.wAttributes, dwConSize, coordScreen, &cCharsWritten )'''
145 |         handle = handles[stream_id]
146 |         attribute = wintypes.WORD(attr)
147 |         length = wintypes.DWORD(length)
148 |         num_written = wintypes.DWORD(0)
149 |         # Note that this is hard-coded for ANSI (vs wide) bytes.
150 |         return _FillConsoleOutputAttribute(
151 |             handle, attribute, length, start, byref(num_written))
152 | 
153 |     def SetConsoleTitle(title):
154 |         return _SetConsoleTitleW(title)
155 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #   __
 4 | #  /__)  _  _     _   _ _/   _
 5 | # / (   (- (/ (/ (- _)  /  _)
 6 | #          /
 7 | 
 8 | """
 9 | requests HTTP library
10 | ~~~~~~~~~~~~~~~~~~~~~
11 | 
12 | Requests is an HTTP library, written in Python, for human beings. Basic GET
13 | usage:
14 | 
15 |    >>> import requests
16 |    >>> r = requests.get('http://python.org')
17 |    >>> r.status_code
18 |    200
19 |    >>> 'Python is a programming language' in r.content
20 |    True
21 | 
22 | ... or POST:
23 | 
24 |    >>> payload = dict(key1='value1', key2='value2')
25 |    >>> r = requests.post("http://httpbin.org/post", data=payload)
26 |    >>> print r.text
27 |    {
28 |      ...
29 |      "form": {
30 |        "key2": "value2",
31 |        "key1": "value1"
32 |      },
33 |      ...
34 |    }
35 | 
36 | The other HTTP methods are supported - see `requests.api`. Full documentation
37 | is at <http://python-requests.org>.
38 | 
39 | :copyright: (c) 2013 by Kenneth Reitz.
40 | :license: Apache 2.0, see LICENSE for more details.
41 | 
42 | """
43 | 
44 | __title__ = 'requests'
45 | __version__ = '1.2.3'
46 | __build__ = 0x010203
47 | __author__ = 'Kenneth Reitz'
48 | __license__ = 'Apache 2.0'
49 | __copyright__ = 'Copyright 2013 Kenneth Reitz'
50 | 
51 | # Attempt to enable urllib3's SNI support, if possible
52 | try:
53 |     from requests.packages.urllib3.contrib import pyopenssl
54 |     pyopenssl.inject_into_urllib3()
55 | except ImportError:
56 |     pass
57 | 
58 | from . import utils
59 | from .models import Request, Response, PreparedRequest
60 | from .api import request, get, head, post, patch, put, delete, options
61 | from .sessions import session, Session
62 | from .status_codes import codes
63 | from .exceptions import (
64 |     RequestException, Timeout, URLRequired,
65 |     TooManyRedirects, HTTPError, ConnectionError
66 | )
67 | 
68 | # Set default logging handler to avoid "No handler found" warnings.
69 | import logging
70 | try:  # Python 2.7+
71 |     from logging import NullHandler
72 | except ImportError:
73 |     class NullHandler(logging.Handler):
74 |         def emit(self, record):
75 |             pass
76 | 
77 | logging.getLogger(__name__).addHandler(NullHandler())
78 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/api.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.api
  5 | ~~~~~~~~~~~~
  6 | 
  7 | This module implements the Requests API.
  8 | 
  9 | :copyright: (c) 2012 by Kenneth Reitz.
 10 | :license: Apache2, see LICENSE for more details.
 11 | 
 12 | """
 13 | 
 14 | from . import sessions
 15 | 
 16 | 
 17 | def request(method, url, **kwargs):
 18 |     """Constructs and sends a :class:`Request <Request>`.
 19 |     Returns :class:`Response <Response>` object.
 20 | 
 21 |     :param method: method for the new :class:`Request` object.
 22 |     :param url: URL for the new :class:`Request` object.
 23 |     :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
 24 |     :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
 25 |     :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
 26 |     :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
 27 |     :param files: (optional) Dictionary of 'name': file-like-objects (or {'name': ('filename', fileobj)}) for multipart encoding upload.
 28 |     :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
 29 |     :param timeout: (optional) Float describing the timeout of the request.
 30 |     :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
 31 |     :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
 32 |     :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.
 33 |     :param stream: (optional) if ``False``, the response content will be immediately downloaded.
 34 |     :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
 35 | 
 36 |     Usage::
 37 | 
 38 |       >>> import requests
 39 |       >>> req = requests.request('GET', 'http://httpbin.org/get')
 40 |       <Response [200]>
 41 |     """
 42 | 
 43 |     session = sessions.Session()
 44 |     return session.request(method=method, url=url, **kwargs)
 45 | 
 46 | 
 47 | def get(url, **kwargs):
 48 |     """Sends a GET request. Returns :class:`Response` object.
 49 | 
 50 |     :param url: URL for the new :class:`Request` object.
 51 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 52 |     """
 53 | 
 54 |     kwargs.setdefault('allow_redirects', True)
 55 |     return request('get', url, **kwargs)
 56 | 
 57 | 
 58 | def options(url, **kwargs):
 59 |     """Sends a OPTIONS request. Returns :class:`Response` object.
 60 | 
 61 |     :param url: URL for the new :class:`Request` object.
 62 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 63 |     """
 64 | 
 65 |     kwargs.setdefault('allow_redirects', True)
 66 |     return request('options', url, **kwargs)
 67 | 
 68 | 
 69 | def head(url, **kwargs):
 70 |     """Sends a HEAD request. Returns :class:`Response` object.
 71 | 
 72 |     :param url: URL for the new :class:`Request` object.
 73 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 74 |     """
 75 | 
 76 |     kwargs.setdefault('allow_redirects', False)
 77 |     return request('head', url, **kwargs)
 78 | 
 79 | 
 80 | def post(url, data=None, **kwargs):
 81 |     """Sends a POST request. Returns :class:`Response` object.
 82 | 
 83 |     :param url: URL for the new :class:`Request` object.
 84 |     :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
 85 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 86 |     """
 87 | 
 88 |     return request('post', url, data=data, **kwargs)
 89 | 
 90 | 
 91 | def put(url, data=None, **kwargs):
 92 |     """Sends a PUT request. Returns :class:`Response` object.
 93 | 
 94 |     :param url: URL for the new :class:`Request` object.
 95 |     :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
 96 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
 97 |     """
 98 | 
 99 |     return request('put', url, data=data, **kwargs)
100 | 
101 | 
102 | def patch(url, data=None, **kwargs):
103 |     """Sends a PATCH request. Returns :class:`Response` object.
104 | 
105 |     :param url: URL for the new :class:`Request` object.
106 |     :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
107 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
108 |     """
109 | 
110 |     return request('patch', url,  data=data, **kwargs)
111 | 
112 | 
113 | def delete(url, **kwargs):
114 |     """Sends a DELETE request. Returns :class:`Response` object.
115 | 
116 |     :param url: URL for the new :class:`Request` object.
117 |     :param \*\*kwargs: Optional arguments that ``request`` takes.
118 |     """
119 | 
120 |     return request('delete', url, **kwargs)
121 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/certs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | certs.py
 6 | ~~~~~~~~
 7 | 
 8 | This module returns the preferred default CA certificate bundle.
 9 | 
10 | If you are packaging Requests, e.g., for a Linux distribution or a managed
11 | environment, you can change the definition of where() to return a separately
12 | packaged CA bundle.
13 | """
14 | 
15 | import os.path
16 | 
17 | 
18 | def where():
19 |     """Return the preferred certificate bundle."""
20 |     # vendored bundle inside Requests
21 |     return os.path.join(os.path.dirname(__file__), 'cacert.pem')
22 | 
23 | if __name__ == '__main__':
24 |     print(where())
25 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/compat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | pythoncompat
  5 | """
  6 | 
  7 | from .packages import charade as chardet
  8 | 
  9 | import sys
 10 | 
 11 | # -------
 12 | # Pythons
 13 | # -------
 14 | 
 15 | # Syntax sugar.
 16 | _ver = sys.version_info
 17 | 
 18 | #: Python 2.x?
 19 | is_py2 = (_ver[0] == 2)
 20 | 
 21 | #: Python 3.x?
 22 | is_py3 = (_ver[0] == 3)
 23 | 
 24 | #: Python 3.0.x
 25 | is_py30 = (is_py3 and _ver[1] == 0)
 26 | 
 27 | #: Python 3.1.x
 28 | is_py31 = (is_py3 and _ver[1] == 1)
 29 | 
 30 | #: Python 3.2.x
 31 | is_py32 = (is_py3 and _ver[1] == 2)
 32 | 
 33 | #: Python 3.3.x
 34 | is_py33 = (is_py3 and _ver[1] == 3)
 35 | 
 36 | #: Python 3.4.x
 37 | is_py34 = (is_py3 and _ver[1] == 4)
 38 | 
 39 | #: Python 2.7.x
 40 | is_py27 = (is_py2 and _ver[1] == 7)
 41 | 
 42 | #: Python 2.6.x
 43 | is_py26 = (is_py2 and _ver[1] == 6)
 44 | 
 45 | #: Python 2.5.x
 46 | is_py25 = (is_py2 and _ver[1] == 5)
 47 | 
 48 | #: Python 2.4.x
 49 | is_py24 = (is_py2 and _ver[1] == 4)   # I'm assuming this is not by choice.
 50 | 
 51 | 
 52 | # ---------
 53 | # Platforms
 54 | # ---------
 55 | 
 56 | 
 57 | # Syntax sugar.
 58 | _ver = sys.version.lower()
 59 | 
 60 | is_pypy = ('pypy' in _ver)
 61 | is_jython = ('jython' in _ver)
 62 | is_ironpython = ('iron' in _ver)
 63 | 
 64 | # Assume CPython, if nothing else.
 65 | is_cpython = not any((is_pypy, is_jython, is_ironpython))
 66 | 
 67 | # Windows-based system.
 68 | is_windows = 'win32' in str(sys.platform).lower()
 69 | 
 70 | # Standard Linux 2+ system.
 71 | is_linux = ('linux' in str(sys.platform).lower())
 72 | is_osx = ('darwin' in str(sys.platform).lower())
 73 | is_hpux = ('hpux' in str(sys.platform).lower())   # Complete guess.
 74 | is_solaris = ('solar==' in str(sys.platform).lower())   # Complete guess.
 75 | 
 76 | try:
 77 |     import simplejson as json
 78 | except ImportError:
 79 |     import json
 80 | 
 81 | # ---------
 82 | # Specifics
 83 | # ---------
 84 | 
 85 | if is_py2:
 86 |     from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass
 87 |     from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
 88 |     from urllib2 import parse_http_list
 89 |     import cookielib
 90 |     from Cookie import Morsel
 91 |     from StringIO import StringIO
 92 |     from .packages.urllib3.packages.ordered_dict import OrderedDict
 93 |     from httplib import IncompleteRead
 94 | 
 95 |     builtin_str = str
 96 |     bytes = str
 97 |     str = unicode
 98 |     basestring = basestring
 99 |     numeric_types = (int, long, float)
100 | 
101 | 
102 | elif is_py3:
103 |     from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
104 |     from urllib.request import parse_http_list, getproxies, proxy_bypass
105 |     from http import cookiejar as cookielib
106 |     from http.cookies import Morsel
107 |     from io import StringIO
108 |     from collections import OrderedDict
109 |     from http.client import IncompleteRead
110 | 
111 |     builtin_str = str
112 |     str = str
113 |     bytes = bytes
114 |     basestring = (str, bytes)
115 |     numeric_types = (int, float)
116 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.exceptions
 5 | ~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | This module contains the set of Requests' exceptions.
 8 | 
 9 | """
10 | 
11 | 
12 | class RequestException(RuntimeError):
13 |     """There was an ambiguous exception that occurred while handling your
14 |     request."""
15 | 
16 | 
17 | class HTTPError(RequestException):
18 |     """An HTTP error occurred."""
19 | 
20 |     def __init__(self, *args, **kwargs):
21 |         """ Initializes HTTPError with optional `response` object. """
22 |         self.response = kwargs.pop('response', None)
23 |         super(HTTPError, self).__init__(*args, **kwargs)
24 | 
25 | 
26 | class ConnectionError(RequestException):
27 |     """A Connection error occurred."""
28 | 
29 | 
30 | class SSLError(ConnectionError):
31 |     """An SSL error occurred."""
32 | 
33 | 
34 | class Timeout(RequestException):
35 |     """The request timed out."""
36 | 
37 | 
38 | class URLRequired(RequestException):
39 |     """A valid URL is required to make a request."""
40 | 
41 | 
42 | class TooManyRedirects(RequestException):
43 |     """Too many redirects."""
44 | 
45 | 
46 | class MissingSchema(RequestException, ValueError):
47 |     """The URL schema (e.g. http or https) is missing."""
48 | 
49 | 
50 | class InvalidSchema(RequestException, ValueError):
51 |     """See defaults.py for valid schemas."""
52 | 
53 | 
54 | class InvalidURL(RequestException, ValueError):
55 |     """ The URL provided was somehow invalid. """
56 | 
57 | 
58 | class ChunkedEncodingError(RequestException):
59 |     """The server declared chunked encoding but sent an invalid chunk."""
60 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/hooks.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | requests.hooks
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | This module provides the capabilities for the Requests hooks system.
 8 | 
 9 | Available hooks:
10 | 
11 | ``response``:
12 |     The response generated from a Request.
13 | 
14 | """
15 | 
16 | 
17 | HOOKS = ['response']
18 | 
19 | 
20 | def default_hooks():
21 |     hooks = {}
22 |     for event in HOOKS:
23 |         hooks[event] = []
24 |     return hooks
25 | 
26 | # TODO: response is the only one
27 | 
28 | 
29 | def dispatch_hook(key, hooks, hook_data, **kwargs):
30 |     """Dispatches a hook dictionary on a given piece of data."""
31 | 
32 |     hooks = hooks or dict()
33 | 
34 |     if key in hooks:
35 |         hooks = hooks.get(key)
36 | 
37 |         if hasattr(hooks, '__call__'):
38 |             hooks = [hooks]
39 | 
40 |         for hook in hooks:
41 |             _hook_data = hook(hook_data, **kwargs)
42 |             if _hook_data is not None:
43 |                 hook_data = _hook_data
44 | 
45 |     return hook_data
46 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import urllib3
4 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | __version__ = "1.0.3"
19 | from sys import version_info
20 | 
21 | 
22 | def detect(aBuf):
23 |     if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 |             (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 |         raise ValueError('Expected a bytes object, not a unicode object')
26 | 
27 |     from . import universaldetector
28 |     u = universaldetector.UniversalDetector()
29 |     u.reset()
30 |     u.feed(aBuf)
31 |     u.close()
32 |     return u.result
33 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(Big5SMModel)
38 |         self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "Big5"
43 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/charsetgroupprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | # 
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | # 
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | # 
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | # 
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from . import constants
 29 | import sys
 30 | from .charsetprober import CharSetProber
 31 | 
 32 | 
 33 | class CharSetGroupProber(CharSetProber):
 34 |     def __init__(self):
 35 |         CharSetProber.__init__(self)
 36 |         self._mActiveNum = 0
 37 |         self._mProbers = []
 38 |         self._mBestGuessProber = None
 39 | 
 40 |     def reset(self):
 41 |         CharSetProber.reset(self)
 42 |         self._mActiveNum = 0
 43 |         for prober in self._mProbers:
 44 |             if prober:
 45 |                 prober.reset()
 46 |                 prober.active = True
 47 |                 self._mActiveNum += 1
 48 |         self._mBestGuessProber = None
 49 | 
 50 |     def get_charset_name(self):
 51 |         if not self._mBestGuessProber:
 52 |             self.get_confidence()
 53 |             if not self._mBestGuessProber:
 54 |                 return None
 55 | #                self._mBestGuessProber = self._mProbers[0]
 56 |         return self._mBestGuessProber.get_charset_name()
 57 | 
 58 |     def feed(self, aBuf):
 59 |         for prober in self._mProbers:
 60 |             if not prober:
 61 |                 continue
 62 |             if not prober.active:
 63 |                 continue
 64 |             st = prober.feed(aBuf)
 65 |             if not st:
 66 |                 continue
 67 |             if st == constants.eFoundIt:
 68 |                 self._mBestGuessProber = prober
 69 |                 return self.get_state()
 70 |             elif st == constants.eNotMe:
 71 |                 prober.active = False
 72 |                 self._mActiveNum -= 1
 73 |                 if self._mActiveNum <= 0:
 74 |                     self._mState = constants.eNotMe
 75 |                     return self.get_state()
 76 |         return self.get_state()
 77 | 
 78 |     def get_confidence(self):
 79 |         st = self.get_state()
 80 |         if st == constants.eFoundIt:
 81 |             return 0.99
 82 |         elif st == constants.eNotMe:
 83 |             return 0.01
 84 |         bestConf = 0.0
 85 |         self._mBestGuessProber = None
 86 |         for prober in self._mProbers:
 87 |             if not prober:
 88 |                 continue
 89 |             if not prober.active:
 90 |                 if constants._debug:
 91 |                     sys.stderr.write(prober.get_charset_name()
 92 |                                      + ' not active\n')
 93 |                 continue
 94 |             cf = prober.get_confidence()
 95 |             if constants._debug:
 96 |                 sys.stderr.write('%s confidence = %s\n' %
 97 |                                  (prober.get_charset_name(), cf))
 98 |             if bestConf < cf:
 99 |                 bestConf = cf
100 |                 self._mBestGuessProber = prober
101 |         if not self._mBestGuessProber:
102 |             return 0.0
103 |         return bestConf
104 | #        else:
105 | #            self._mBestGuessProber = self._mProbers[0]
106 | #            return self._mBestGuessProber.get_confidence()
107 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/charsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from . import constants
30 | import re
31 | 
32 | 
33 | class CharSetProber:
34 |     def __init__(self):
35 |         pass
36 | 
37 |     def reset(self):
38 |         self._mState = constants.eDetecting
39 | 
40 |     def get_charset_name(self):
41 |         return None
42 | 
43 |     def feed(self, aBuf):
44 |         pass
45 | 
46 |     def get_state(self):
47 |         return self._mState
48 | 
49 |     def get_confidence(self):
50 |         return 0.0
51 | 
52 |     def filter_high_bit_only(self, aBuf):
53 |         aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 |         return aBuf
55 | 
56 |     def filter_without_english_letters(self, aBuf):
57 |         aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 |         return aBuf
59 | 
60 |     def filter_with_english_letters(self, aBuf):
61 |         # TODO
62 |         return aBuf
63 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 | 
31 | 
32 | class CodingStateMachine:
33 |     def __init__(self, sm):
34 |         self._mModel = sm
35 |         self._mCurrentBytePos = 0
36 |         self._mCurrentCharLen = 0
37 |         self.reset()
38 | 
39 |     def reset(self):
40 |         self._mCurrentState = eStart
41 | 
42 |     def next_state(self, c):
43 |         # for each byte we get its class
44 |         # if it is first byte, we also get byte length
45 |         # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 |         byteCls = self._mModel['classTable'][wrap_ord(c)]
47 |         if self._mCurrentState == eStart:
48 |             self._mCurrentBytePos = 0
49 |             self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 |         # from byte's class and stateTable, we get its next state
51 |         curr_state = (self._mCurrentState * self._mModel['classFactor']
52 |                       + byteCls)
53 |         self._mCurrentState = self._mModel['stateTable'][curr_state]
54 |         self._mCurrentBytePos += 1
55 |         return self._mCurrentState
56 | 
57 |     def get_current_charlen(self):
58 |         return self._mCurrentCharLen
59 | 
60 |     def get_coding_state_machine(self):
61 |         return self._mModel['name']
62 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Ian Cordasco - port to Python
 4 | #
 5 | # This library is free software; you can redistribute it and/or
 6 | # modify it under the terms of the GNU Lesser General Public
 7 | # License as published by the Free Software Foundation; either
 8 | # version 2.1 of the License, or (at your option) any later version.
 9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301  USA
19 | ######################### END LICENSE BLOCK #########################
20 | 
21 | import sys
22 | 
23 | 
24 | if sys.version_info < (3, 0):
25 |     base_str = (str, unicode)
26 | else:
27 |     base_str = (bytes, str)
28 | 
29 | 
30 | def wrap_ord(a):
31 |     if sys.version_info < (3, 0) and isinstance(a, base_str):
32 |         return ord(a)
33 |     else:
34 |         return a
35 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/constants.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | # 
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | # 
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | _debug = 0
30 | 
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 | 
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 | 
39 | SHORTCUT_THRESHOLD = 0.95
40 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(CP949SMModel)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     def get_charset_name(self):
44 |         return "CP949"
45 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/escprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 |                     ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 | 
35 | 
36 | class EscCharSetProber(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = [
40 |             CodingStateMachine(HZSMModel),
41 |             CodingStateMachine(ISO2022CNSMModel),
42 |             CodingStateMachine(ISO2022JPSMModel),
43 |             CodingStateMachine(ISO2022KRSMModel)
44 |         ]
45 |         self.reset()
46 | 
47 |     def reset(self):
48 |         CharSetProber.reset(self)
49 |         for codingSM in self._mCodingSM:
50 |             if not codingSM:
51 |                 continue
52 |             codingSM.active = True
53 |             codingSM.reset()
54 |         self._mActiveSM = len(self._mCodingSM)
55 |         self._mDetectedCharset = None
56 | 
57 |     def get_charset_name(self):
58 |         return self._mDetectedCharset
59 | 
60 |     def get_confidence(self):
61 |         if self._mDetectedCharset:
62 |             return 0.99
63 |         else:
64 |             return 0.00
65 | 
66 |     def feed(self, aBuf):
67 |         for c in aBuf:
68 |             # PY3K: aBuf is a byte array, so c is an int, not a byte
69 |             for codingSM in self._mCodingSM:
70 |                 if not codingSM:
71 |                     continue
72 |                 if not codingSM.active:
73 |                     continue
74 |                 codingState = codingSM.next_state(wrap_ord(c))
75 |                 if codingState == constants.eError:
76 |                     codingSM.active = False
77 |                     self._mActiveSM -= 1
78 |                     if self._mActiveSM <= 0:
79 |                         self._mState = constants.eNotMe
80 |                         return self.get_state()
81 |                 elif codingState == constants.eItsMe:
82 |                     self._mState = constants.eFoundIt
83 |                     self._mDetectedCharset = codingSM.get_coding_state_machine()  # nopep8
84 |                     return self.get_state()
85 | 
86 |         return self.get_state()
87 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/eucjpprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import sys
29 | from . import constants
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .codingstatemachine import CodingStateMachine
32 | from .chardistribution import EUCJPDistributionAnalysis
33 | from .jpcntx import EUCJPContextAnalysis
34 | from .mbcssm import EUCJPSMModel
35 | 
36 | 
37 | class EUCJPProber(MultiByteCharSetProber):
38 |     def __init__(self):
39 |         MultiByteCharSetProber.__init__(self)
40 |         self._mCodingSM = CodingStateMachine(EUCJPSMModel)
41 |         self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
42 |         self._mContextAnalyzer = EUCJPContextAnalysis()
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         MultiByteCharSetProber.reset(self)
47 |         self._mContextAnalyzer.reset()
48 | 
49 |     def get_charset_name(self):
50 |         return "EUC-JP"
51 | 
52 |     def feed(self, aBuf):
53 |         aLen = len(aBuf)
54 |         for i in range(0, aLen):
55 |             # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
56 |             codingState = self._mCodingSM.next_state(aBuf[i])
57 |             if codingState == constants.eError:
58 |                 if constants._debug:
59 |                     sys.stderr.write(self.get_charset_name()
60 |                                      + ' prober hit error at byte ' + str(i)
61 |                                      + '\n')
62 |                 self._mState = constants.eNotMe
63 |                 break
64 |             elif codingState == constants.eItsMe:
65 |                 self._mState = constants.eFoundIt
66 |                 break
67 |             elif codingState == constants.eStart:
68 |                 charLen = self._mCodingSM.get_current_charlen()
69 |                 if i == 0:
70 |                     self._mLastChar[1] = aBuf[0]
71 |                     self._mContextAnalyzer.feed(self._mLastChar, charLen)
72 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 |                 else:
74 |                     self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
75 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
76 |                                                      charLen)
77 | 
78 |         self._mLastChar[0] = aBuf[aLen - 1]
79 | 
80 |         if self.get_state() == constants.eDetecting:
81 |             if (self._mContextAnalyzer.got_enough_data() and
82 |                (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
83 |                 self._mState = constants.eFoundIt
84 | 
85 |         return self.get_state()
86 | 
87 |     def get_confidence(self):
88 |         contxtCf = self._mContextAnalyzer.get_confidence()
89 |         distribCf = self._mDistributionAnalyzer.get_confidence()
90 |         return max(contxtCf, distribCf)
91 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         MultiByteCharSetProber.__init__(self)
37 |         self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 |         self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     def get_charset_name(self):
42 |         return "EUC-KR"
43 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 |         self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "EUC-TW"
42 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | # 
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | # 
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         MultiByteCharSetProber.__init__(self)
36 |         self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 |         self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     def get_charset_name(self):
41 |         return "GB2312"
42 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/latin1prober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .constants import eNotMe
 31 | from .compat import wrap_ord
 32 | 
 33 | FREQ_CAT_NUM = 4
 34 | 
 35 | UDF = 0  # undefined
 36 | OTH = 1  # other
 37 | ASC = 2  # ascii capital letter
 38 | ASS = 3  # ascii small letter
 39 | ACV = 4  # accent capital vowel
 40 | ACO = 5  # accent capital other
 41 | ASV = 6  # accent small vowel
 42 | ASO = 7  # accent small other
 43 | CLASS_NUM = 8  # total classes
 44 | 
 45 | Latin1_CharToClass = (
 46 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
 47 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
 48 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
 49 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
 50 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
 51 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
 52 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
 53 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
 54 |     OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
 55 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
 56 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
 57 |     ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
 58 |     OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
 59 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
 60 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
 61 |     ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
 62 |     OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
 63 |     OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
 64 |     UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
 65 |     OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
 66 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
 67 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
 68 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
 69 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
 70 |     ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
 71 |     ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
 72 |     ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
 73 |     ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
 74 |     ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
 75 |     ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
 76 |     ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
 77 |     ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 78 | )
 79 | 
 80 | # 0 : illegal
 81 | # 1 : very unlikely
 82 | # 2 : normal
 83 | # 3 : very likely
 84 | Latin1ClassModel = (
 85 |     # UDF OTH ASC ASS ACV ACO ASV ASO
 86 |     0,  0,  0,  0,  0,  0,  0,  0,  # UDF
 87 |     0,  3,  3,  3,  3,  3,  3,  3,  # OTH
 88 |     0,  3,  3,  3,  3,  3,  3,  3,  # ASC
 89 |     0,  3,  3,  3,  1,  1,  3,  3,  # ASS
 90 |     0,  3,  3,  3,  1,  2,  1,  2,  # ACV
 91 |     0,  3,  3,  3,  3,  3,  3,  3,  # ACO
 92 |     0,  3,  1,  3,  1,  1,  1,  3,  # ASV
 93 |     0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 94 | )
 95 | 
 96 | 
 97 | class Latin1Prober(CharSetProber):
 98 |     def __init__(self):
 99 |         CharSetProber.__init__(self)
100 |         self.reset()
101 | 
102 |     def reset(self):
103 |         self._mLastCharClass = OTH
104 |         self._mFreqCounter = [0] * FREQ_CAT_NUM
105 |         CharSetProber.reset(self)
106 | 
107 |     def get_charset_name(self):
108 |         return "windows-1252"
109 | 
110 |     def feed(self, aBuf):
111 |         aBuf = self.filter_with_english_letters(aBuf)
112 |         for c in aBuf:
113 |             charClass = Latin1_CharToClass[wrap_ord(c)]
114 |             freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
115 |                                     + charClass]
116 |             if freq == 0:
117 |                 self._mState = eNotMe
118 |                 break
119 |             self._mFreqCounter[freq] += 1
120 |             self._mLastCharClass = charClass
121 | 
122 |         return self.get_state()
123 | 
124 |     def get_confidence(self):
125 |         if self.get_state() == eNotMe:
126 |             return 0.01
127 | 
128 |         total = sum(self._mFreqCounter)
129 |         if total < 0.01:
130 |             confidence = 0.0
131 |         else:
132 |             confidence = ((self._mFreqCounter[3] / total)
133 |                           - (self._mFreqCounter[1] * 20.0 / total))
134 |         if confidence < 0.0:
135 |             confidence = 0.0
136 |         # lower the confidence of latin1 so that other more accurate
137 |         # detector can take priority.
138 |         confidence = confidence * 0.5
139 |         return confidence
140 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/mbcharsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | import sys
31 | from . import constants
32 | from .charsetprober import CharSetProber
33 | 
34 | 
35 | class MultiByteCharSetProber(CharSetProber):
36 |     def __init__(self):
37 |         CharSetProber.__init__(self)
38 |         self._mDistributionAnalyzer = None
39 |         self._mCodingSM = None
40 |         self._mLastChar = [0, 0]
41 | 
42 |     def reset(self):
43 |         CharSetProber.reset(self)
44 |         if self._mCodingSM:
45 |             self._mCodingSM.reset()
46 |         if self._mDistributionAnalyzer:
47 |             self._mDistributionAnalyzer.reset()
48 |         self._mLastChar = [0, 0]
49 | 
50 |     def get_charset_name(self):
51 |         pass
52 | 
53 |     def feed(self, aBuf):
54 |         aLen = len(aBuf)
55 |         for i in range(0, aLen):
56 |             codingState = self._mCodingSM.next_state(aBuf[i])
57 |             if codingState == constants.eError:
58 |                 if constants._debug:
59 |                     sys.stderr.write(self.get_charset_name()
60 |                                      + ' prober hit error at byte ' + str(i)
61 |                                      + '\n')
62 |                 self._mState = constants.eNotMe
63 |                 break
64 |             elif codingState == constants.eItsMe:
65 |                 self._mState = constants.eFoundIt
66 |                 break
67 |             elif codingState == constants.eStart:
68 |                 charLen = self._mCodingSM.get_current_charlen()
69 |                 if i == 0:
70 |                     self._mLastChar[1] = aBuf[0]
71 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
72 |                 else:
73 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
74 |                                                      charLen)
75 | 
76 |         self._mLastChar[0] = aBuf[aLen - 1]
77 | 
78 |         if self.get_state() == constants.eDetecting:
79 |             if (self._mDistributionAnalyzer.got_enough_data() and
80 |                     (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
81 |                 self._mState = constants.eFoundIt
82 | 
83 |         return self.get_state()
84 | 
85 |     def get_confidence(self):
86 |         return self._mDistributionAnalyzer.get_confidence()
87 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self):
43 |         CharSetGroupProber.__init__(self)
44 |         self._mProbers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/sbcharsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | import sys
 30 | from . import constants
 31 | from .charsetprober import CharSetProber
 32 | from .compat import wrap_ord
 33 | 
 34 | SAMPLE_SIZE = 64
 35 | SB_ENOUGH_REL_THRESHOLD = 1024
 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95
 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05
 38 | SYMBOL_CAT_ORDER = 250
 39 | NUMBER_OF_SEQ_CAT = 4
 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
 41 | #NEGATIVE_CAT = 0
 42 | 
 43 | 
 44 | class SingleByteCharSetProber(CharSetProber):
 45 |     def __init__(self, model, reversed=False, nameProber=None):
 46 |         CharSetProber.__init__(self)
 47 |         self._mModel = model
 48 |         # TRUE if we need to reverse every pair in the model lookup
 49 |         self._mReversed = reversed
 50 |         # Optional auxiliary prober for name decision
 51 |         self._mNameProber = nameProber
 52 |         self.reset()
 53 | 
 54 |     def reset(self):
 55 |         CharSetProber.reset(self)
 56 |         # char order of last character
 57 |         self._mLastOrder = 255
 58 |         self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
 59 |         self._mTotalSeqs = 0
 60 |         self._mTotalChar = 0
 61 |         # characters that fall in our sampling range
 62 |         self._mFreqChar = 0
 63 | 
 64 |     def get_charset_name(self):
 65 |         if self._mNameProber:
 66 |             return self._mNameProber.get_charset_name()
 67 |         else:
 68 |             return self._mModel['charsetName']
 69 | 
 70 |     def feed(self, aBuf):
 71 |         if not self._mModel['keepEnglishLetter']:
 72 |             aBuf = self.filter_without_english_letters(aBuf)
 73 |         aLen = len(aBuf)
 74 |         if not aLen:
 75 |             return self.get_state()
 76 |         for c in aBuf:
 77 |             order = self._mModel['charToOrderMap'][wrap_ord(c)]
 78 |             if order < SYMBOL_CAT_ORDER:
 79 |                 self._mTotalChar += 1
 80 |             if order < SAMPLE_SIZE:
 81 |                 self._mFreqChar += 1
 82 |                 if self._mLastOrder < SAMPLE_SIZE:
 83 |                     self._mTotalSeqs += 1
 84 |                     if not self._mReversed:
 85 |                         i = (self._mLastOrder * SAMPLE_SIZE) + order
 86 |                         model = self._mModel['precedenceMatrix'][i]
 87 |                     else:  # reverse the order of the letters in the lookup
 88 |                         i = (order * SAMPLE_SIZE) + self._mLastOrder
 89 |                         model = self._mModel['precedenceMatrix'][i]
 90 |                     self._mSeqCounters[model] += 1
 91 |             self._mLastOrder = order
 92 | 
 93 |         if self.get_state() == constants.eDetecting:
 94 |             if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
 95 |                 cf = self.get_confidence()
 96 |                 if cf > POSITIVE_SHORTCUT_THRESHOLD:
 97 |                     if constants._debug:
 98 |                         sys.stderr.write('%s confidence = %s, we have a'
 99 |                                          'winner\n' %
100 |                                          (self._mModel['charsetName'], cf))
101 |                     self._mState = constants.eFoundIt
102 |                 elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
103 |                     if constants._debug:
104 |                         sys.stderr.write('%s confidence = %s, below negative'
105 |                                          'shortcut threshhold %s\n' %
106 |                                          (self._mModel['charsetName'], cf,
107 |                                           NEGATIVE_SHORTCUT_THRESHOLD))
108 |                     self._mState = constants.eNotMe
109 | 
110 |         return self.get_state()
111 | 
112 |     def get_confidence(self):
113 |         r = 0.01
114 |         if self._mTotalSeqs > 0:
115 |             r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
116 |                  / self._mModel['mTypicalPositiveRatio'])
117 |             r = r * self._mFreqChar / self._mTotalChar
118 |             if r >= 1.0:
119 |                 r = 0.99
120 |         return r
121 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | 
41 | 
42 | class SBCSGroupProber(CharSetGroupProber):
43 |     def __init__(self):
44 |         CharSetGroupProber.__init__(self)
45 |         self._mProbers = [
46 |             SingleByteCharSetProber(Win1251CyrillicModel),
47 |             SingleByteCharSetProber(Koi8rModel),
48 |             SingleByteCharSetProber(Latin5CyrillicModel),
49 |             SingleByteCharSetProber(MacCyrillicModel),
50 |             SingleByteCharSetProber(Ibm866Model),
51 |             SingleByteCharSetProber(Ibm855Model),
52 |             SingleByteCharSetProber(Latin7GreekModel),
53 |             SingleByteCharSetProber(Win1253GreekModel),
54 |             SingleByteCharSetProber(Latin5BulgarianModel),
55 |             SingleByteCharSetProber(Win1251BulgarianModel),
56 |             SingleByteCharSetProber(Latin2HungarianModel),
57 |             SingleByteCharSetProber(Win1250HungarianModel),
58 |             SingleByteCharSetProber(TIS620ThaiModel),
59 |         ]
60 |         hebrewProber = HebrewProber()
61 |         logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 |                                                       False, hebrewProber)
63 |         visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 |                                                      hebrewProber)
65 |         hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 |         self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 |                                visualHebrewProber])
68 | 
69 |         self.reset()
70 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/sjisprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import sys
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import SJISDistributionAnalysis
32 | from .jpcntx import SJISContextAnalysis
33 | from .mbcssm import SJISSMModel
34 | from . import constants
35 | 
36 | 
37 | class SJISProber(MultiByteCharSetProber):
38 |     def __init__(self):
39 |         MultiByteCharSetProber.__init__(self)
40 |         self._mCodingSM = CodingStateMachine(SJISSMModel)
41 |         self._mDistributionAnalyzer = SJISDistributionAnalysis()
42 |         self._mContextAnalyzer = SJISContextAnalysis()
43 |         self.reset()
44 | 
45 |     def reset(self):
46 |         MultiByteCharSetProber.reset(self)
47 |         self._mContextAnalyzer.reset()
48 | 
49 |     def get_charset_name(self):
50 |         return "SHIFT_JIS"
51 | 
52 |     def feed(self, aBuf):
53 |         aLen = len(aBuf)
54 |         for i in range(0, aLen):
55 |             codingState = self._mCodingSM.next_state(aBuf[i])
56 |             if codingState == constants.eError:
57 |                 if constants._debug:
58 |                     sys.stderr.write(self.get_charset_name()
59 |                                      + ' prober hit error at byte ' + str(i)
60 |                                      + '\n')
61 |                 self._mState = constants.eNotMe
62 |                 break
63 |             elif codingState == constants.eItsMe:
64 |                 self._mState = constants.eFoundIt
65 |                 break
66 |             elif codingState == constants.eStart:
67 |                 charLen = self._mCodingSM.get_current_charlen()
68 |                 if i == 0:
69 |                     self._mLastChar[1] = aBuf[0]
70 |                     self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
71 |                                                 charLen)
72 |                     self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 |                 else:
74 |                     self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
75 |                                                      - charLen], charLen)
76 |                     self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
77 |                                                      charLen)
78 | 
79 |         self._mLastChar[0] = aBuf[aLen - 1]
80 | 
81 |         if self.get_state() == constants.eDetecting:
82 |             if (self._mContextAnalyzer.got_enough_data() and
83 |                (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
84 |                 self._mState = constants.eFoundIt
85 | 
86 |         return self.get_state()
87 | 
88 |     def get_confidence(self):
89 |         contxtCf = self._mContextAnalyzer.get_confidence()
90 |         distribCf = self._mDistributionAnalyzer.get_confidence()
91 |         return max(contxtCf, distribCf)
92 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/charade/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 | 
33 | ONE_CHAR_PROB = 0.5
34 | 
35 | 
36 | class UTF8Prober(CharSetProber):
37 |     def __init__(self):
38 |         CharSetProber.__init__(self)
39 |         self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 |         self.reset()
41 | 
42 |     def reset(self):
43 |         CharSetProber.reset(self)
44 |         self._mCodingSM.reset()
45 |         self._mNumOfMBChar = 0
46 | 
47 |     def get_charset_name(self):
48 |         return "utf-8"
49 | 
50 |     def feed(self, aBuf):
51 |         for c in aBuf:
52 |             codingState = self._mCodingSM.next_state(c)
53 |             if codingState == constants.eError:
54 |                 self._mState = constants.eNotMe
55 |                 break
56 |             elif codingState == constants.eItsMe:
57 |                 self._mState = constants.eFoundIt
58 |                 break
59 |             elif codingState == constants.eStart:
60 |                 if self._mCodingSM.get_current_charlen() >= 2:
61 |                     self._mNumOfMBChar += 1
62 | 
63 |         if self.get_state() == constants.eDetecting:
64 |             if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 |                 self._mState = constants.eFoundIt
66 | 
67 |         return self.get_state()
68 | 
69 |     def get_confidence(self):
70 |         unlike = 0.99
71 |         if self._mNumOfMBChar < 6:
72 |             for i in range(0, self._mNumOfMBChar):
73 |                 unlike = unlike * ONE_CHAR_PROB
74 |             return 1.0 - unlike
75 |         else:
76 |             return unlike
77 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/__init__.py:
--------------------------------------------------------------------------------
 1 | # urllib3/__init__.py
 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
 3 | #
 4 | # This module is part of urllib3 and is released under
 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | """
 8 | urllib3 - Thread-safe connection pooling and re-using.
 9 | """
10 | 
11 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
12 | __license__ = 'MIT'
13 | __version__ = 'dev'
14 | 
15 | 
16 | from .connectionpool import (
17 |     HTTPConnectionPool,
18 |     HTTPSConnectionPool,
19 |     connection_from_url
20 | )
21 | 
22 | from . import exceptions
23 | from .filepost import encode_multipart_formdata
24 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
25 | from .response import HTTPResponse
26 | from .util import make_headers, get_host
27 | 
28 | 
29 | # Set default logging handler to avoid "No handler found" warnings.
30 | import logging
31 | try:  # Python 2.7+
32 |     from logging import NullHandler
33 | except ImportError:
34 |     class NullHandler(logging.Handler):
35 |         def emit(self, record):
36 |             pass
37 | 
38 | logging.getLogger(__name__).addHandler(NullHandler())
39 | 
40 | def add_stderr_logger(level=logging.DEBUG):
41 |     """
42 |     Helper for quickly adding a StreamHandler to the logger. Useful for
43 |     debugging.
44 | 
45 |     Returns the handler after adding it.
46 |     """
47 |     # This method needs to be in this __init__.py to get the __name__ correct
48 |     # even if urllib3 is vendored within another package.
49 |     logger = logging.getLogger(__name__)
50 |     handler = logging.StreamHandler()
51 |     handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
52 |     logger.addHandler(handler)
53 |     logger.setLevel(level)
54 |     logger.debug('Added an stderr logging handler to logger: %s' % __name__)
55 |     return handler
56 | 
57 | # ... Clean up.
58 | del NullHandler
59 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/_collections.py:
--------------------------------------------------------------------------------
 1 | # urllib3/_collections.py
 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
 3 | #
 4 | # This module is part of urllib3 and is released under
 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | from collections import MutableMapping
 8 | from threading import RLock
 9 | 
10 | try: # Python 2.7+
11 |     from collections import OrderedDict
12 | except ImportError:
13 |     from .packages.ordered_dict import OrderedDict
14 | 
15 | 
16 | __all__ = ['RecentlyUsedContainer']
17 | 
18 | 
19 | _Null = object()
20 | 
21 | 
22 | class RecentlyUsedContainer(MutableMapping):
23 |     """
24 |     Provides a thread-safe dict-like container which maintains up to
25 |     ``maxsize`` keys while throwing away the least-recently-used keys beyond
26 |     ``maxsize``.
27 | 
28 |     :param maxsize:
29 |         Maximum number of recent elements to retain.
30 | 
31 |     :param dispose_func:
32 |         Every time an item is evicted from the container,
33 |         ``dispose_func(value)`` is called.  Callback which will get called
34 |     """
35 | 
36 |     ContainerCls = OrderedDict
37 | 
38 |     def __init__(self, maxsize=10, dispose_func=None):
39 |         self._maxsize = maxsize
40 |         self.dispose_func = dispose_func
41 | 
42 |         self._container = self.ContainerCls()
43 |         self.lock = RLock()
44 | 
45 |     def __getitem__(self, key):
46 |         # Re-insert the item, moving it to the end of the eviction line.
47 |         with self.lock:
48 |             item = self._container.pop(key)
49 |             self._container[key] = item
50 |             return item
51 | 
52 |     def __setitem__(self, key, value):
53 |         evicted_value = _Null
54 |         with self.lock:
55 |             # Possibly evict the existing value of 'key'
56 |             evicted_value = self._container.get(key, _Null)
57 |             self._container[key] = value
58 | 
59 |             # If we didn't evict an existing value, we might have to evict the
60 |             # least recently used item from the beginning of the container.
61 |             if len(self._container) > self._maxsize:
62 |                 _key, evicted_value = self._container.popitem(last=False)
63 | 
64 |         if self.dispose_func and evicted_value is not _Null:
65 |             self.dispose_func(evicted_value)
66 | 
67 |     def __delitem__(self, key):
68 |         with self.lock:
69 |             value = self._container.pop(key)
70 | 
71 |         if self.dispose_func:
72 |             self.dispose_func(value)
73 | 
74 |     def __len__(self):
75 |         with self.lock:
76 |             return len(self._container)
77 | 
78 |     def __iter__(self):
79 |         raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
80 | 
81 |     def clear(self):
82 |         with self.lock:
83 |             # Copy pointers to all values, then wipe the mapping
84 |             # under Python 2, this copies the list of values twice :-|
85 |             values = list(self._container.values())
86 |             self._container.clear()
87 | 
88 |         if self.dispose_func:
89 |             for value in values:
90 |                 self.dispose_func(value)
91 | 
92 |     def keys(self):
93 |         with self.lock:
94 |             return self._container.keys()
95 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/exceptions.py:
--------------------------------------------------------------------------------
 1 | # urllib3/exceptions.py
 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
 3 | #
 4 | # This module is part of urllib3 and is released under
 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | 
 8 | ## Base Exceptions
 9 | 
10 | class HTTPError(Exception):
11 |     "Base exception used by this module."
12 |     pass
13 | 
14 | 
15 | class PoolError(HTTPError):
16 |     "Base exception for errors caused within a pool."
17 |     def __init__(self, pool, message):
18 |         self.pool = pool
19 |         HTTPError.__init__(self, "%s: %s" % (pool, message))
20 | 
21 |     def __reduce__(self):
22 |         # For pickling purposes.
23 |         return self.__class__, (None, None)
24 | 
25 | 
26 | class RequestError(PoolError):
27 |     "Base exception for PoolErrors that have associated URLs."
28 |     def __init__(self, pool, url, message):
29 |         self.url = url
30 |         PoolError.__init__(self, pool, message)
31 | 
32 |     def __reduce__(self):
33 |         # For pickling purposes.
34 |         return self.__class__, (None, self.url, None)
35 | 
36 | 
37 | class SSLError(HTTPError):
38 |     "Raised when SSL certificate fails in an HTTPS connection."
39 |     pass
40 | 
41 | 
42 | class DecodeError(HTTPError):
43 |     "Raised when automatic decoding based on Content-Type fails."
44 |     pass
45 | 
46 | 
47 | ## Leaf Exceptions
48 | 
49 | class MaxRetryError(RequestError):
50 |     "Raised when the maximum number of retries is exceeded."
51 | 
52 |     def __init__(self, pool, url, reason=None):
53 |         self.reason = reason
54 | 
55 |         message = "Max retries exceeded with url: %s" % url
56 |         if reason:
57 |             message += " (Caused by %s: %s)" % (type(reason), reason)
58 |         else:
59 |             message += " (Caused by redirect)"
60 | 
61 |         RequestError.__init__(self, pool, url, message)
62 | 
63 | 
64 | class HostChangedError(RequestError):
65 |     "Raised when an existing pool gets a request for a foreign host."
66 | 
67 |     def __init__(self, pool, url, retries=3):
68 |         message = "Tried to open a foreign host with url: %s" % url
69 |         RequestError.__init__(self, pool, url, message)
70 |         self.retries = retries
71 | 
72 | 
73 | class TimeoutError(RequestError):
74 |     "Raised when a socket timeout occurs."
75 |     pass
76 | 
77 | 
78 | class EmptyPoolError(PoolError):
79 |     "Raised when a pool runs out of connections and no more are allowed."
80 |     pass
81 | 
82 | 
83 | class ClosedPoolError(PoolError):
84 |     "Raised when a request enters a pool after the pool has been closed."
85 |     pass
86 | 
87 | 
88 | class LocationParseError(ValueError, HTTPError):
89 |     "Raised when get_host or similar fails to parse the URL input."
90 | 
91 |     def __init__(self, location):
92 |         message = "Failed to parse: %s" % location
93 |         HTTPError.__init__(self, message)
94 | 
95 |         self.location = location
96 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/filepost.py:
--------------------------------------------------------------------------------
 1 | # urllib3/filepost.py
 2 | # Copyright 2008-2013 Andrey Petrov and contributors (see CONTRIBUTORS.txt)
 3 | #
 4 | # This module is part of urllib3 and is released under
 5 | # the MIT License: http://www.opensource.org/licenses/mit-license.php
 6 | 
 7 | import codecs
 8 | import mimetypes
 9 | 
10 | from uuid import uuid4
11 | from io import BytesIO
12 | 
13 | from .packages import six
14 | from .packages.six import b
15 | 
16 | writer = codecs.lookup('utf-8')[3]
17 | 
18 | 
19 | def choose_boundary():
20 |     """
21 |     Our embarassingly-simple replacement for mimetools.choose_boundary.
22 |     """
23 |     return uuid4().hex
24 | 
25 | 
26 | def get_content_type(filename):
27 |     return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
28 | 
29 | 
30 | def iter_fields(fields):
31 |     """
32 |     Iterate over fields.
33 | 
34 |     Supports list of (k, v) tuples and dicts.
35 |     """
36 |     if isinstance(fields, dict):
37 |         return ((k, v) for k, v in six.iteritems(fields))
38 | 
39 |     return ((k, v) for k, v in fields)
40 | 
41 | 
42 | def encode_multipart_formdata(fields, boundary=None):
43 |     """
44 |     Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
45 | 
46 |     :param fields:
47 |         Dictionary of fields or list of (key, value) or (key, value, MIME type)
48 |         field tuples.  The key is treated as the field name, and the value as
49 |         the body of the form-data bytes. If the value is a tuple of two
50 |         elements, then the first element is treated as the filename of the
51 |         form-data section and a suitable MIME type is guessed based on the
52 |         filename. If the value is a tuple of three elements, then the third
53 |         element is treated as an explicit MIME type of the form-data section.
54 | 
55 |         Field names and filenames must be unicode.
56 | 
57 |     :param boundary:
58 |         If not specified, then a random boundary will be generated using
59 |         :func:`mimetools.choose_boundary`.
60 |     """
61 |     body = BytesIO()
62 |     if boundary is None:
63 |         boundary = choose_boundary()
64 | 
65 |     for fieldname, value in iter_fields(fields):
66 |         body.write(b('--%s\r\n' % (boundary)))
67 | 
68 |         if isinstance(value, tuple):
69 |             if len(value) == 3:
70 |                 filename, data, content_type = value
71 |             else:
72 |                 filename, data = value
73 |                 content_type = get_content_type(filename)
74 |             writer(body).write('Content-Disposition: form-data; name="%s"; '
75 |                                'filename="%s"\r\n' % (fieldname, filename))
76 |             body.write(b('Content-Type: %s\r\n\r\n' %
77 |                        (content_type,)))
78 |         else:
79 |             data = value
80 |             writer(body).write('Content-Disposition: form-data; name="%s"\r\n'
81 |                                % (fieldname))
82 |             body.write(b'\r\n')
83 | 
84 |         if isinstance(data, int):
85 |             data = str(data)  # Backwards compatibility
86 | 
87 |         if isinstance(data, six.text_type):
88 |             writer(body).write(data)
89 |         else:
90 |             body.write(data)
91 | 
92 |         body.write(b'\r\n')
93 | 
94 |     body.write(b('--%s--\r\n' % (boundary)))
95 | 
96 |     content_type = str('multipart/form-data; boundary=%s' % boundary)
97 | 
98 |     return body.getvalue(), content_type
99 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import ssl_match_hostname
4 | 
5 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
 1 | """The match_hostname() function from Python 3.2, essential when using SSL."""
 2 | 
 3 | import re
 4 | 
 5 | __version__ = '3.2.2'
 6 | 
 7 | class CertificateError(ValueError):
 8 |     pass
 9 | 
10 | def _dnsname_to_pat(dn):
11 |     pats = []
12 |     for frag in dn.split(r'.'):
13 |         if frag == '*':
14 |             # When '*' is a fragment by itself, it matches a non-empty dotless
15 |             # fragment.
16 |             pats.append('[^.]+')
17 |         else:
18 |             # Otherwise, '*' matches any dotless fragment.
19 |             frag = re.escape(frag)
20 |             pats.append(frag.replace(r'\*', '[^.]*'))
21 |     return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
22 | 
23 | def match_hostname(cert, hostname):
24 |     """Verify that *cert* (in decoded format as returned by
25 |     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 rules
26 |     are mostly followed, but IP addresses are not accepted for *hostname*.
27 | 
28 |     CertificateError is raised on failure. On success, the function
29 |     returns nothing.
30 |     """
31 |     if not cert:
32 |         raise ValueError("empty or no certificate")
33 |     dnsnames = []
34 |     san = cert.get('subjectAltName', ())
35 |     for key, value in san:
36 |         if key == 'DNS':
37 |             if _dnsname_to_pat(value).match(hostname):
38 |                 return
39 |             dnsnames.append(value)
40 |     if not dnsnames:
41 |         # The subject is only checked when there is no dNSName entry
42 |         # in subjectAltName
43 |         for sub in cert.get('subject', ()):
44 |             for key, value in sub:
45 |                 # XXX according to RFC 2818, the most specific Common Name
46 |                 # must be used.
47 |                 if key == 'commonName':
48 |                     if _dnsname_to_pat(value).match(hostname):
49 |                         return
50 |                     dnsnames.append(value)
51 |     if len(dnsnames) > 1:
52 |         raise CertificateError("hostname %r "
53 |             "doesn't match either of %s"
54 |             % (hostname, ', '.join(map(repr, dnsnames))))
55 |     elif len(dnsnames) == 1:
56 |         raise CertificateError("hostname %r "
57 |             "doesn't match %r"
58 |             % (hostname, dnsnames[0]))
59 |     else:
60 |         raise CertificateError("no appropriate commonName or "
61 |             "subjectAltName fields were found")
62 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/status_codes.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from .structures import LookupDict
 4 | 
 5 | _codes = {
 6 | 
 7 |     # Informational.
 8 |     100: ('continue',),
 9 |     101: ('switching_protocols',),
10 |     102: ('processing',),
11 |     103: ('checkpoint',),
12 |     122: ('uri_too_long', 'request_uri_too_long'),
13 |     200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
14 |     201: ('created',),
15 |     202: ('accepted',),
16 |     203: ('non_authoritative_info', 'non_authoritative_information'),
17 |     204: ('no_content',),
18 |     205: ('reset_content', 'reset'),
19 |     206: ('partial_content', 'partial'),
20 |     207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
21 |     208: ('already_reported',),
22 |     226: ('im_used',),
23 | 
24 |     # Redirection.
25 |     300: ('multiple_choices',),
26 |     301: ('moved_permanently', 'moved', '\\o-'),
27 |     302: ('found',),
28 |     303: ('see_other', 'other'),
29 |     304: ('not_modified',),
30 |     305: ('use_proxy',),
31 |     306: ('switch_proxy',),
32 |     307: ('temporary_redirect', 'temporary_moved', 'temporary'),
33 |     308: ('resume_incomplete', 'resume'),
34 | 
35 |     # Client Error.
36 |     400: ('bad_request', 'bad'),
37 |     401: ('unauthorized',),
38 |     402: ('payment_required', 'payment'),
39 |     403: ('forbidden',),
40 |     404: ('not_found', '-o-'),
41 |     405: ('method_not_allowed', 'not_allowed'),
42 |     406: ('not_acceptable',),
43 |     407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
44 |     408: ('request_timeout', 'timeout'),
45 |     409: ('conflict',),
46 |     410: ('gone',),
47 |     411: ('length_required',),
48 |     412: ('precondition_failed', 'precondition'),
49 |     413: ('request_entity_too_large',),
50 |     414: ('request_uri_too_large',),
51 |     415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
52 |     416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
53 |     417: ('expectation_failed',),
54 |     418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
55 |     422: ('unprocessable_entity', 'unprocessable'),
56 |     423: ('locked',),
57 |     424: ('failed_dependency', 'dependency'),
58 |     425: ('unordered_collection', 'unordered'),
59 |     426: ('upgrade_required', 'upgrade'),
60 |     428: ('precondition_required', 'precondition'),
61 |     429: ('too_many_requests', 'too_many'),
62 |     431: ('header_fields_too_large', 'fields_too_large'),
63 |     444: ('no_response', 'none'),
64 |     449: ('retry_with', 'retry'),
65 |     450: ('blocked_by_windows_parental_controls', 'parental_controls'),
66 |     451: ('unavailable_for_legal_reasons', 'legal_reasons'),
67 |     499: ('client_closed_request',),
68 | 
69 |     # Server Error.
70 |     500: ('internal_server_error', 'server_error', '/o\\', '✗'),
71 |     501: ('not_implemented',),
72 |     502: ('bad_gateway',),
73 |     503: ('service_unavailable', 'unavailable'),
74 |     504: ('gateway_timeout',),
75 |     505: ('http_version_not_supported', 'http_version'),
76 |     506: ('variant_also_negotiates',),
77 |     507: ('insufficient_storage',),
78 |     509: ('bandwidth_limit_exceeded', 'bandwidth'),
79 |     510: ('not_extended',),
80 | }
81 | 
82 | codes = LookupDict(name='status_codes')
83 | 
84 | for (code, titles) in list(_codes.items()):
85 |     for title in titles:
86 |         setattr(codes, title, code)
87 |         if not title.startswith('\\'):
88 |             setattr(codes, title.upper(), code)
89 | 


--------------------------------------------------------------------------------
/thirdparty_libs/requests/structures.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | requests.structures
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | Data structures that power Requests.
  8 | 
  9 | """
 10 | 
 11 | import os
 12 | import collections
 13 | from itertools import islice
 14 | 
 15 | 
 16 | class IteratorProxy(object):
 17 |     """docstring for IteratorProxy"""
 18 |     def __init__(self, i):
 19 |         self.i = i
 20 |         # self.i = chain.from_iterable(i)
 21 | 
 22 |     def __iter__(self):
 23 |         return self.i
 24 | 
 25 |     def __len__(self):
 26 |         if hasattr(self.i, '__len__'):
 27 |             return len(self.i)
 28 |         if hasattr(self.i, 'len'):
 29 |             return self.i.len
 30 |         if hasattr(self.i, 'fileno'):
 31 |             return os.fstat(self.i.fileno()).st_size
 32 | 
 33 |     def read(self, n):
 34 |         return "".join(islice(self.i, None, n))
 35 | 
 36 | 
 37 | class CaseInsensitiveDict(collections.MutableMapping):
 38 |     """
 39 |     A case-insensitive ``dict``-like object.
 40 | 
 41 |     Implements all methods and operations of
 42 |     ``collections.MutableMapping`` as well as dict's ``copy``. Also
 43 |     provides ``lower_items``.
 44 | 
 45 |     All keys are expected to be strings. The structure remembers the
 46 |     case of the last key to be set, and ``iter(instance)``,
 47 |     ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
 48 |     will contain case-sensitive keys. However, querying and contains
 49 |     testing is case insensitive:
 50 | 
 51 |         cid = CaseInsensitiveDict()
 52 |         cid['Accept'] = 'application/json'
 53 |         cid['aCCEPT'] == 'application/json'  # True
 54 |         list(cid) == ['Accept']  # True
 55 | 
 56 |     For example, ``headers['content-encoding']`` will return the
 57 |     value of a ``'Content-Encoding'`` response header, regardless
 58 |     of how the header name was originally stored.
 59 | 
 60 |     If the constructor, ``.update``, or equality comparison
 61 |     operations are given keys that have equal ``.lower()``s, the
 62 |     behavior is undefined.
 63 | 
 64 |     """
 65 |     def __init__(self, data=None, **kwargs):
 66 |         self._store = dict()
 67 |         if data is None:
 68 |             data = {}
 69 |         self.update(data, **kwargs)
 70 | 
 71 |     def __setitem__(self, key, value):
 72 |         # Use the lowercased key for lookups, but store the actual
 73 |         # key alongside the value.
 74 |         self._store[key.lower()] = (key, value)
 75 | 
 76 |     def __getitem__(self, key):
 77 |         return self._store[key.lower()][1]
 78 | 
 79 |     def __delitem__(self, key):
 80 |         del self._store[key.lower()]
 81 | 
 82 |     def __iter__(self):
 83 |         return (casedkey for casedkey, mappedvalue in self._store.values())
 84 | 
 85 |     def __len__(self):
 86 |         return len(self._store)
 87 | 
 88 |     def lower_items(self):
 89 |         """Like iteritems(), but with all lowercase keys."""
 90 |         return (
 91 |             (lowerkey, keyval[1])
 92 |             for (lowerkey, keyval)
 93 |             in self._store.items()
 94 |         )
 95 | 
 96 |     def __eq__(self, other):
 97 |         if isinstance(other, collections.Mapping):
 98 |             other = CaseInsensitiveDict(other)
 99 |         else:
100 |             return NotImplemented
101 |         # Compare insensitively
102 |         return dict(self.lower_items()) == dict(other.lower_items())
103 | 
104 |     # Copy is required
105 |     def copy(self):
106 |         return CaseInsensitiveDict(self._store.values())
107 | 
108 |     def __repr__(self):
109 |         return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
110 | 
111 | 
112 | class LookupDict(dict):
113 |     """Dictionary lookup object."""
114 | 
115 |     def __init__(self, name=None):
116 |         self.name = name
117 |         super(LookupDict, self).__init__()
118 | 
119 |     def __repr__(self):
120 |         return '<lookup \'%s\'>' % (self.name)
121 | 
122 |     def __getitem__(self, key):
123 |         # We allow fall-through here, so values default to None
124 | 
125 |         return self.__dict__.get(key, None)
126 | 
127 |     def get(self, key, default=None):
128 |         return self.__dict__.get(key, default)
129 | 


--------------------------------------------------------------------------------
/unittest/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env/python
2 | #-*- coding:utf-8 -*-
3 | 
4 | __author__ = 'BlackYe.'
5 | 
6 | import sys
7 | sys.path.append("/data/project/webdirdig")


--------------------------------------------------------------------------------
/unittest/webscan_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | import sys
 7 | sys.path.append("/data/project/webdirdig")
 8 | 
 9 | from lib.core.webscan import WebScan
10 | from lib.net.myrequests import Requester
11 | 
12 | def f():
13 |     print "haha"
14 | 
15 | def g():
16 |     print 111
17 | 
18 | 
19 | def test(t_queue):
20 |     t_queue.put("hahah")
21 |     t_queue.put("bbbb")
22 |     t_queue.put("cccc")
23 | 
24 | if __name__ == '__main__':
25 | 
26 |     from gevent import queue
27 |     s = queue.Queue()
28 |     print s.qsize()
29 |     test(s)
30 |     print s.qsize()
31 | 
32 |     i = 0
33 |     while i < s.qsize():
34 |         print s.peek()
35 |         i = i+1
36 | 
37 |     '''
38 |     url = 'http://tx3.cbg.163.com/'
39 |     try:
40 |         requester = Requester(url)
41 |         requester.request("/help/")
42 | 
43 |     except Exception as e:
44 |         print (e.args[0]['message'])
45 | 
46 |     webscan = WebScan(requester, test_path = '/help/', suffix= None, bdir = True)
47 |     print webscan.scan("/help/1/")
48 | 
49 |     for bakdir_ext in ['.tar.gz', '.zip']:
50 |         exist_dir_suffix = '/help//'
51 |         ox = exist_dir_suffix.split('/')
52 |         print ox
53 |         ox.remove('')
54 |         ooxx = ''
55 | 
56 |         ooxx = ''.join(('/' + _) if _ != '' else (_ + bakdir_ext) for _ in ox)
57 | 
58 | 
59 |         print ooxx
60 |     '''
61 |     """
62 | 
63 |     from gevent import queue
64 |     from copy import deepcopy
65 |     s = queue.PriorityQueue()
66 |     p = queue.Queue()
67 |     s.put("a")
68 |     s.put("b")
69 |     p.queue = deepcopy(s.queue)
70 |     print p
71 | 
72 |     s.get()
73 |     print p.qsize()
74 |     p.put("test")
75 |     print p.queue
76 |     """


--------------------------------------------------------------------------------
/webdirdig.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env/python
 2 | #-*- coding:utf-8 -*-
 3 | 
 4 | __author__ = 'BlackYe.'
 5 | 
 6 | 
 7 | from lib.controller.controller import Controller
 8 | import sys
 9 | 
10 | def main():
11 |     Controller(sys.argv[1])
12 | 
13 | 
14 | if __name__ == '__main__': main()


--------------------------------------------------------------------------------