├── IPlugin.py ├── Main.py ├── ProbeTool.py ├── README.md ├── TaskCenter.py ├── __init__.py ├── bin ├── masscan └── masscan.exe ├── common ├── IPy.py ├── __init__.py ├── db │ ├── __init__.py │ └── sqlite3_db.py ├── initsql.py ├── logger │ ├── __init__.py │ ├── log_config.py │ └── log_util.py ├── qqwry.py └── utils.py ├── constants.py ├── create_c_net_file.py ├── datas ├── ports.db └── qqwry.dat ├── fuzzdir ├── __init__.py ├── dict │ ├── directory.lst │ └── directory.test.lst └── dirfuzz.py ├── hosts.txt ├── http_banner.py ├── pool ├── __init__.py └── thread_pool.py ├── report ├── ReportCenter.py ├── __init__.py └── template │ ├── index.html │ ├── inspector.css │ ├── package.json │ └── utils.js ├── reportrs.jpg ├── result.txt ├── scanrs.jpg └── thirdparty ├── __init__.py ├── chardet ├── __init__.py ├── big5freq.py ├── big5prober.py ├── chardistribution.py ├── charsetgroupprober.py ├── charsetprober.py ├── cli │ ├── __init__.py │ └── chardetect.py ├── codingstatemachine.py ├── compat.py ├── cp949prober.py ├── enums.py ├── escprober.py ├── escsm.py ├── eucjpprober.py ├── euckrfreq.py ├── euckrprober.py ├── euctwfreq.py ├── euctwprober.py ├── gb2312freq.py ├── gb2312prober.py ├── hebrewprober.py ├── jisfreq.py ├── jpcntx.py ├── langbulgarianmodel.py ├── langcyrillicmodel.py ├── langgreekmodel.py ├── langhebrewmodel.py ├── langhungarianmodel.py ├── langthaimodel.py ├── langturkishmodel.py ├── latin1prober.py ├── mbcharsetprober.py ├── mbcsgroupprober.py ├── mbcssm.py ├── sbcharsetprober.py ├── sbcsgroupprober.py ├── sjisprober.py ├── universaldetector.py ├── utf8prober.py └── version.py └── connection ├── __init__.py ├── compat.py ├── exceptions.py ├── http_urllib3.py └── urllib3 ├── __init__.py ├── _collections.py ├── connection.py ├── connectionpool.py ├── contrib ├── __init__.py ├── _securetransport │ ├── __init__.py │ ├── bindings.py │ └── low_level.py ├── appengine.py ├── ntlmpool.py ├── pyopenssl.py ├── securetransport.py └── socks.py ├── exceptions.py ├── fields.py ├── filepost.py ├── packages ├── __init__.py ├── backports │ ├── __init__.py │ └── makefile.py ├── ordered_dict.py ├── six.py └── ssl_match_hostname │ ├── __init__.py │ └── _implementation.py ├── poolmanager.py ├── request.py ├── response.py └── util ├── __init__.py ├── connection.py ├── request.py ├── response.py ├── retry.py ├── selectors.py ├── ssl_.py ├── timeout.py ├── url.py └── wait.py /IPlugin.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import threading 3 | from common.logger.log_util import LogUtil as logging 4 | from common.utils import md5_string 5 | 6 | logger = logging.getLogger(__name__) 7 | mu = threading.Lock() 8 | 9 | class PLGSTATE: 10 | NORUN = 0x01 11 | RUNNING = 0x10 12 | FINISHED = 0x11 13 | 14 | class ReportManage(object): 15 | 16 | def __init__(self): 17 | self._results = None 18 | self._output_queue = None 19 | self._unique_hash = None 20 | 21 | @property 22 | def results(self): 23 | return self._results 24 | 25 | @property 26 | def unique_hash(self): 27 | return self._unique_hash 28 | 29 | @property 30 | def output_queue(self): 31 | return self._output_queue 32 | 33 | @classmethod 34 | def instance(self): 35 | rpt = ReportManage() 36 | rpt._results = [] 37 | rpt._unique_hash = [] 38 | rpt._output_queue = [] 39 | return rpt 40 | 41 | 42 | class IPlugin(object): 43 | 44 | def __init__(self, knowledgebase=None): 45 | self._id = None 46 | self._name = None 47 | self._level = 1 48 | self._cookies = "" 49 | self._proxies = {} 50 | self._result_manage = ReportManage.instance() 51 | self._state = PLGSTATE.NORUN 52 | 53 | 54 | @property 55 | def state(self): 56 | return self._state 57 | 58 | @property 59 | def id(self): 60 | if self._id is None: 61 | raise Exception("must be set plugin's id ") 62 | return self._id 63 | 64 | @property 65 | def name(self): 66 | if self._name is None: 67 | raise Exception("must be set plugin's name ") 68 | return self._name 69 | 70 | @property 71 | def cookies(self): 72 | return self._cookies 73 | 74 | @cookies.setter 75 | def cookies(self, value): 76 | self._cookies = value 77 | 78 | @property 79 | def proxies(self,type=1): 80 | return self._proxies 81 | 82 | #('http', ('127.0.0.1', 9050), ('username', 'password')),('socks5', ('127.0.0.1', 1080)) 83 | """ 84 | 'socks5': pycurl.PROXYTYPE_SOCKS5 85 | 'socks4': pycurl.PROXYTYPE_SOCKS4 86 | 'http': pycurl.PROXYTYPE_HTTP 87 | 'https': pycurl.PROXYTYPE_HTTP 88 | """ 89 | @proxies.setter 90 | def proxies(self, value): 91 | self._proxies = value 92 | 93 | @property 94 | def result_manage(self): 95 | return self._result_manage 96 | 97 | @result_manage.setter 98 | def result_manage(self,value): 99 | self._result_manage = value 100 | 101 | @property 102 | def level(self): 103 | return self._level 104 | 105 | def _report(self,package,unique=[]): 106 | if isinstance(package,list) or isinstance(package,tuple): 107 | if unique: 108 | rsdiff = list(set(unique) - set(range(len(package)))) 109 | if rsdiff: 110 | raise Exception('Unknown filter index({0})'.format(rsdiff)) 111 | else: 112 | unique = range(len(package)) 113 | filters = [str(package[x]) for x in unique] 114 | unique_hash = md5_string(",".join(filters)) 115 | elif isinstance(package,dict): 116 | filters = {} 117 | if unique: 118 | rsdiff = list(set(unique)-set(package.keys())) 119 | if rsdiff: 120 | raise Exception('Unknown filter fields({0})'.format(rsdiff)) 121 | else: 122 | unique = range(len(package.keys())) 123 | for x in unique: 124 | filters.update({x:package.get(x)}) 125 | 126 | unique_hash = md5_string(",".join([ str(x) for x in filters.values()])) 127 | else: 128 | raise Exception('unique Only support list,tuple,dict') 129 | 130 | if self.result_manage is None: 131 | self.result_manage = ReportManage.instance() 132 | 133 | if mu.acquire(True): 134 | if not unique_hash in self.result_manage.unique_hash: 135 | self.result_manage.unique_hash.append(unique_hash) 136 | self.result_manage.results.append(filters) 137 | self.result_manage.output_queue.append(package) 138 | mu.release() 139 | 140 | def _run(self, *args,**kwargs): 141 | raise Exception('unimplemented method') 142 | 143 | def _store(self, *args,**kwargs): 144 | raise Exception('unimplemented method') 145 | 146 | def _create_report(self, *args,**kwargs): 147 | raise Exception('unimplemented method') 148 | 149 | def cmd_run(self, *args,**kwargs): 150 | self._state = PLGSTATE.RUNNING 151 | try: 152 | t = threading.Thread(target=self._store) 153 | t.start() 154 | self._run(*args,**kwargs) 155 | except: 156 | import traceback 157 | msg = traceback.format_exc() 158 | logger.warn(msg) 159 | self._state = PLGSTATE.FINISHED 160 | 161 | def __str__(self): 162 | return 'plugin(plg_id=%d, name=%s)' % (self.id, self._name) 163 | 164 | -------------------------------------------------------------------------------- /Main.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import base64 3 | import os 4 | import socket 5 | import sys 6 | import uuid 7 | import subprocess 8 | import time 9 | import re 10 | import cgi 11 | from optparse import OptionParser 12 | from multiprocessing import Process, Queue 13 | from TaskCenter import TaskCenter, TaskStatus 14 | from common.initsql import SQL1,SQL2 15 | from common.db.sqlite3_db import sqlite3_db 16 | from common.utils import query_service_and_banner, get_socket_banner, char_convert, computing_ports, CommonUtils, md5_string 17 | from ProbeTool import HttpWeb 18 | from constants import default_ports 19 | from fuzzdir.dirfuzz import DirFuzz 20 | from pool.thread_pool import ThreadPool 21 | from IPlugin import IPlugin 22 | 23 | from common.logger.log_util import LogUtil as logging 24 | from report.ReportCenter import Report 25 | from thirdparty.connection.http_urllib3 import HttpUtil 26 | 27 | logger = logging.getLogger(__name__) 28 | class PortScan(IPlugin): 29 | def __init__(self,msgqueue=None,taskstatus=None,statusqueue=None): 30 | super(PortScan, self).__init__() 31 | self.msgqueue = msgqueue 32 | self.statusqueue = statusqueue 33 | self.taskstatus = taskstatus 34 | self._id = 10000 35 | self._name = "portscan" 36 | self._level = 10 37 | self.rate = 500 38 | self.uuid_hash = md5_string(str(uuid.uuid4())) 39 | self.finished = False 40 | self.db = None 41 | self.taskid = 0 42 | self.portdb = os.path.join(os.path.dirname(__file__), 'repertory',format(time.strftime("%Y-%m-%d", time.localtime())),"{0}.port.db".format(time.strftime("%H_%M_%S", time.localtime()))) 43 | if not os.path.exists(os.path.dirname(self.portdb)): 44 | os.makedirs(os.path.dirname(self.portdb)) 45 | 46 | def init_db(self): 47 | self.db = sqlite3_db(self.portdb) 48 | self.db.create_table(SQL1) 49 | self.db.create_table(SQL2) 50 | logger.info("database (port.db) initialization completed") 51 | name = "assetscan_task_{0}".format(self.uuid_hash) 52 | self.db.insert('porttask', {"name":name,"status":1}, filter=False) 53 | rs = self.db.query_row("select id from porttask where name='{0}'".format(name)) 54 | self.taskid = rs[0] 55 | 56 | def report(self,ip,port,protocol): 57 | package = (ip,port,protocol,) 58 | self._report(package) 59 | 60 | def start_scanning(self,scanmode,command): 61 | if scanmode == "fast": 62 | preg = re.compile(r".*Discovered open port (?P\d+)/(?P\w+) on (?P((25[0-5]|2[0-4]\d|[01]?\d\d?)($|(?!\.$)\.)){4}).*",re.I) 63 | cmddir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'bin')) 64 | process = subprocess.Popen(command, cwd=cmddir, shell=False, stdout=subprocess.PIPE, 65 | stderr=subprocess.STDOUT) 66 | while True: 67 | time.sleep(0.1) 68 | returncode = process.poll() 69 | line = process.stdout.readline() 70 | line = line.strip() 71 | if line: 72 | rs = re.match(preg, line) 73 | if rs: 74 | self.report(rs.group("ip"), rs.group("port"), rs.group("protocol")) 75 | pid = process.pid 76 | if returncode is None: 77 | continue 78 | else: 79 | break 80 | else: 81 | pool = ThreadPool(50) 82 | iplist,portlist = command 83 | for ip in iplist: 84 | for port in portlist: 85 | pool.add_task(self.port_scan,ip,port) 86 | pool.wait_all_complete() 87 | 88 | def port_scan(self,ip,port): 89 | try: 90 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 91 | s.settimeout(0.2) 92 | port = int(port) 93 | if s.connect_ex((ip, port)) == 0: 94 | self.report(ip, port, "tcp") 95 | except Exception as e: 96 | pass 97 | finally: 98 | s.close() 99 | 100 | def _run(self, *args,**kwargs): 101 | self.init_db() 102 | logger.info("tasks start running") 103 | if any([not kwargs.get("ipscope",None),not kwargs.get("ports",None)]): 104 | return 105 | ipscope = CommonUtils.package_ipscope(kwargs.get("ipscope")) 106 | ports = computing_ports(kwargs.get("ports")) 107 | scanmode = kwargs.get("scanmode","fast") 108 | pseudo_ip = kwargs.get("pseudo_ip","") 109 | pseudo_port = kwargs.get("pseudo_port","") 110 | sps = len(ports) / 1000 111 | if (sps <= 1): 112 | ports_list = [ports] 113 | else: 114 | ports_list = CommonUtils.div_list(ports, sps) 115 | 116 | if len(ports_list) <= 1: 117 | for plist in ports_list: 118 | pl = ",".join([str(x) for x in plist]) 119 | command = CommonUtils.create_command(scanmode,ipscope=ipscope,ports=pl,pseudo_ip=pseudo_ip,pseudo_port=pseudo_port,rate=self.rate) 120 | self.start_scanning(scanmode,command) 121 | else: 122 | pool = ThreadPool(5) 123 | for plist in ports_list: 124 | pl = ",".join([str(x) for x in plist]) 125 | command = CommonUtils.create_command(scanmode,ipscope=ipscope,ports=pl,pseudo_ip=pseudo_ip,pseudo_port=pseudo_port,rate=self.rate) 126 | pool.add_task(self.start_scanning,scanmode,command) 127 | pool.wait_all_complete() 128 | self.finished = True 129 | TaskCenter.update_task_status(self.statusqueue,"portscan",TaskStatus.FINISHED) if self.statusqueue else None 130 | 131 | def _store(self): 132 | logger.info("start collecting results information.........") 133 | self.product = 0 134 | httpclient = HttpUtil() 135 | while not self.finished: 136 | time.sleep(0.2) 137 | if not self.result_manage.output_queue: 138 | continue 139 | else: 140 | ip, port, protocol = self.result_manage.output_queue.pop() 141 | ref_service, ref_banner = query_service_and_banner(port, protocol) 142 | web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port,httpclient) 143 | banner = web_banner if web_banner else get_socket_banner(ip, port, ref_banner) 144 | banner = banner.replace("\n", "").replace("\r", "") 145 | banner = char_convert(banner) 146 | banner = base64.b64encode(banner) 147 | service = web_service if web_service else ref_service 148 | banner = cgi.escape(banner) 149 | rs_one = {"ip": ip,"taskid":self.taskid,"domain": domain,"port": str(port), "service": service, "banner": banner, "protocol": protocol,"assettype": assettype, "position": position, "proext": proext} 150 | self.db.insert('asset',rs_one,filter=False) 151 | if self.msgqueue: 152 | rs = self.db.query_row("select id from asset where ip='{0}' and port='{1}' and taskid='{2}'".format(ip, port,self.taskid)) 153 | rs_one.update({"assetid":rs[0]}) 154 | self.product = self.product + 1 155 | self.msgqueue.put(rs_one) 156 | 157 | def cmdLineParser(): 158 | optparser = OptionParser() 159 | optparser.add_option("-i", "--ipscope", dest="ipscope", type="string", help="Specify IP scan range,eg: 127.0.0.1/24 or 10.65.10.3-10.65.10.255") 160 | optparser.add_option("-p", "--portscope", dest="portscope", type="string",default="web_ports",help="Specify Port scan range,eg: 80,443,8080 or web_ports or top_100 or top_1000") 161 | optparser.add_option("-m", "--scanmode", dest="scanmode", type="string", default="fast", help="Scan mode[fast,low],default:fast") 162 | optparser.add_option("-f", "--file", dest="file", type="string",default="",help="asset's file") 163 | optparser.add_option("-t", "--task-run",action="store_true", dest="taskstart", default=False,help="Start in task mode,default cmd run") 164 | try: 165 | (options, args) = optparser.parse_args() 166 | except Exception, err: 167 | sys.exit(0) 168 | 169 | if len(sys.argv) < 2: 170 | optparser.print_help() 171 | sys.exit(0) 172 | 173 | ipscope = options.ipscope 174 | portscope = options.portscope 175 | assetfile = options.file 176 | scanmode = options.scanmode 177 | taskstart = options.taskstart 178 | if assetfile: 179 | with open(assetfile,"rb+") as file: 180 | ipscope = file.read() 181 | portscope = default_ports.get(portscope,portscope) 182 | if taskstart: 183 | msgqueue = Queue() 184 | statusqueue = Queue() 185 | mainscan = PortScan(msgqueue,statusqueue=statusqueue) 186 | dirfuzz = DirFuzz(statusqueue=statusqueue) 187 | TaskCenter.register(statusqueue,[mainscan.name,dirfuzz.name]) 188 | dirdb = dirfuzz.fuzzdb 189 | portdb = mainscan.portdb 190 | rpt_tools = Report(portdb, dirdb) 191 | mainprocess = Process(target=mainscan.cmd_run, kwargs={"ipscope":ipscope,"ports":portscope,"scanmode":scanmode}) 192 | dirfuzzprocess = Process(target=dirfuzz.funzz,args=(msgqueue,)) 193 | taskcenterprocess = Process(target=TaskCenter.run,args=(statusqueue,)) 194 | mainprocess.start() 195 | dirfuzzprocess.start() 196 | taskcenterprocess.start() 197 | mainprocess.join() 198 | dirfuzzprocess.join() 199 | taskcenterprocess.join() 200 | mainprocess.terminate() 201 | dirfuzzprocess.terminate() 202 | taskcenterprocess.terminate() 203 | rpt_tools.report_html() 204 | 205 | else: 206 | test = PortScan() 207 | test.cmd_run(ipscope=ipscope, ports=portscope,scanmode=scanmode) 208 | rpt_tools = Report(test.portdb) 209 | rpt_tools.report_html() 210 | 211 | if __name__ == "__main__": 212 | cmdLineParser() -------------------------------------------------------------------------------- /ProbeTool.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from __future__ import division 3 | import re 4 | from common.utils import get_server_profile, get_banner_by_content 5 | from constants import finger2https 6 | from thirdparty.connection.http_urllib3 import HttpUtil 7 | 8 | class HttpWeb(object): 9 | NOT_DETECT_PORTS = [22,21,3389] 10 | 11 | @classmethod 12 | def is_ssl_request(cls,content): 13 | for x in finger2https: 14 | if len(content) < 2000 and x.lower() in content.lower(): 15 | return True 16 | return False 17 | 18 | @classmethod 19 | def detect(cls,ip,port,httpclient=None): 20 | assettype = 0 21 | port = int(port) 22 | if httpclient is None: 23 | httpclient = HttpUtil() 24 | banner,service,ostype,proext,domain,position= "","","","","","" 25 | if not port in cls.NOT_DETECT_PORTS: 26 | if port == 80: 27 | schemas = ["http"] 28 | elif port == 443 or port == 8443: 29 | schemas = ["https"] 30 | else: 31 | schemas = ["http", "https"] 32 | pregx = re.compile(r"(.*?)", re.I) 33 | for schema in schemas: 34 | url = "{schema}://{ip}:{port}".format(schema=schema, ip=ip if not domain else domain, port=port) 35 | try: 36 | res = httpclient.request(url,timeout=1,redirect=True) 37 | content = res.content 38 | if cls.is_ssl_request(content): 39 | continue 40 | headers = res.headers 41 | ostype, server, server_app = get_server_profile(headers) 42 | ostype = OsType.get_ostype(port=port,server=server,server_app=server_app,res=res) 43 | service = "{0} web application ".format(schema) 44 | if server: 45 | service = service + " server: {0}".format(server) 46 | if server_app: 47 | service = service + " application: {0}".format(server_app) 48 | 49 | if content: 50 | rs = re.findall(pregx, content) 51 | if rs and len(rs) > 0: 52 | banner = rs[0] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + rs[0] 53 | else: 54 | banner = content[0:100] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + res.content[0:100] 55 | assettype = 1 56 | proext = schema 57 | break 58 | except: 59 | banner = "" 60 | assettype = 0 61 | return banner,service,ostype,assettype,domain,position,proext 62 | 63 | class OsType(object): 64 | WINDOWS_PORTS = [3389] 65 | LINUX_PORTS = [] 66 | LINUX = "unix" 67 | WINDOWS = "windows" 68 | @classmethod 69 | def get_ostype(cls, port=None, server=None, server_app=None, res=None): 70 | ostype = "unknown" 71 | only_windows_ports = list(set(cls.WINDOWS_PORTS)-set(cls.LINUX_PORTS)) 72 | only_linux_ports = list(set(cls.LINUX_PORTS)-set(cls.WINDOWS_PORTS)) 73 | if port: 74 | if isinstance(port,int): 75 | if port and (port in only_windows_ports): 76 | ostype = cls.WINDOWS 77 | elif port in only_linux_ports: 78 | ostype = cls.LINUX 79 | 80 | elif isinstance(port,list): 81 | counts = len(set(cls.WINDOWS_PORTS+cls.LINUX_PORTS)) 82 | win_num = len(set(port) & set(cls.WINDOWS_PORTS)) 83 | lin_num = len(set(port) & set(cls.LINUX_PORTS)) 84 | diff = abs(win_num-lin_num)/counts 85 | if diff > 0.7: 86 | if win_num > lin_num: 87 | ostype = cls.WINDOWS 88 | else: 89 | ostype = cls.LINUX 90 | if server: 91 | if isinstance(server,list): 92 | server = ",".join(server) 93 | regx = re.compile(r"Microsof|iis",re.I) 94 | if regx.findall(server): 95 | ostype = cls.WINDOWS 96 | 97 | if server_app: 98 | if any(["asp" in server_app,"aspx" in server_app]): 99 | ostype = cls.WINDOWS 100 | 101 | if res and res.status_code == 500: 102 | regx = re.compile(r"[a-zA-Z]:(?:\\(?:[a-zA-Z0-9_]+.[a-zA-Z0-9_]{1,16}))+", re.I) 103 | if regx.findall(res.content): 104 | ostype = "windows" 105 | 106 | return ostype 107 | 108 | if __name__ == "__main__": 109 | banner, service, ostype, assettype, domain,position,proext = HttpWeb.detect('42.48.85.15',8082) 110 | print banner 111 | print service 112 | print ostype 113 | print assettype 114 | print position 115 | print domain 116 | print proext 117 | 118 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # assetscan 2 | 资产扫描工具 3 | ``` 4 | Usage: Main.py [options] 5 | 6 | Options: 7 | -h, --help show this help message and exit 8 | -i IPSCOPE, --ipscope=IPSCOPE 9 | Specify IP scan range,eg: 127.0.0.1/24 or 10 | 10.65.10.3-10.65.10.255 11 | -p PORTSCOPE, --portscope=PORTSCOPE 12 | Specify Port scan range,eg: 80,443,8080 or web_ports 13 | or top_100 or top_1000 14 | -m SCANMODE, --scanmode=SCANMODE 15 | Scan mode[fast,low],default:fast 16 | -f FILE, --file=FILE asset's file 17 | -t, --task-run Start in task mode,default cmd run 18 | ``` 19 | ###参数介绍 20 | ``` 21 | ipscope 为指定扫描范围,格式如下 10.0.0.1/24 或者 10.0.0.1-255 或者 10.0.0.1-10.0.0.255 22 | portscope 为指定扫描端口,系统提供web_ports,top_100,top_1000三大类,形式如下: 23 | 1.python Main.py -i 10.0.0.1/24 -p 80,443,8080 24 | 2.python Main.py -i 10.0.0.1/24 -p 80,8080-10000 25 | 3.python Main.py -i 10.0.0.1/24 -p web_ports 26 | scanmode 为指定扫描模式,默认情况下是fast模式,由于有些内网权限非root,所以此时建议采用慢模式扫描 27 | file 为指定文件进行批量扫描 28 | task-run 为任务模式扫描,此时任务模式采用多进程多线程模式,会一并扫描目录模块 29 | ``` 30 | ###扫描console信息如下 31 | 32 | ![avatar](scanrs.jpg) 33 | 34 | ###生成的报表信息如下 35 | ![avatar](reportrs.jpg) 36 | 37 | -------------------------------------------------------------------------------- /TaskCenter.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import time 3 | from common.logger.log_util import LogUtil as logging 4 | logger = logging.getLogger(__name__) 5 | class TaskStatus: 6 | START = 0x01 7 | RUNNING = 0x02 8 | ERROR = 0x03 9 | FINISHED = 0x04 10 | 11 | class TaskCenter(object): 12 | 13 | @classmethod 14 | def register(cls,statusqueue,tskname): 15 | taskinfo = {} 16 | for tname in tskname: 17 | taskinfo.update({tname:TaskStatus.START}) 18 | if not statusqueue.empty(): 19 | sts = statusqueue.get(True) 20 | sts.update(taskinfo) 21 | statusqueue.put(sts) 22 | else: 23 | statusqueue.put(taskinfo) 24 | 25 | @classmethod 26 | def run(cls,statusqueue): 27 | finished = False 28 | while not finished: 29 | time.sleep(0.2) 30 | if not statusqueue.empty(): 31 | status = statusqueue.get(True) 32 | rs = [x for x in status.values() if x == TaskStatus.FINISHED] 33 | if len(rs) == len(status): 34 | finished = True 35 | logger.info("All tasks({0}) completed".format(",".join(status.keys()))) 36 | statusqueue.put(status) 37 | 38 | @classmethod 39 | def update_task_status(cls,statusqueue,taskname,taskstatus): 40 | if not statusqueue.empty(): 41 | status = statusqueue.get(True) 42 | if taskname in status.keys(): 43 | status.update({taskname:taskstatus}) 44 | statusqueue.put(status) 45 | 46 | @classmethod 47 | def task_is_finished(cls,statusqueue,taskname): 48 | if not statusqueue.empty(): 49 | status = statusqueue.get(True) 50 | statusqueue.put(status) 51 | else: 52 | status = {} 53 | if status.get(taskname,None) == TaskStatus.FINISHED: 54 | return True 55 | else: 56 | return False 57 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/__init__.py -------------------------------------------------------------------------------- /bin/masscan: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/bin/masscan -------------------------------------------------------------------------------- /bin/masscan.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/bin/masscan.exe -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/__init__.py -------------------------------------------------------------------------------- /common/db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/db/__init__.py -------------------------------------------------------------------------------- /common/initsql.py: -------------------------------------------------------------------------------- 1 | SQL1=""" 2 | CREATE TABLE `porttask` ( 3 | `id` INTEGER PRIMARY KEY AUTOINCREMENT, 4 | `name` varchar(100) DEFAULT NULL, 5 | `status` INTEGER DEFAULT 0 6 | ) 7 | """ 8 | SQL2=""" 9 | CREATE TABLE `asset` ( 10 | `id` INTEGER PRIMARY KEY AUTOINCREMENT, 11 | `taskid` INTEGER DEFAULT NULL, 12 | `ip` varchar(100) NOT NULL, 13 | `port` varchar(100) DEFAULT NULL, 14 | `domain` varchar(100) DEFAULT NULL, 15 | `banner` varchar(500) DEFAULT NULL, 16 | `protocol` varchar(100) DEFAULT NULL, 17 | `service` varchar(200) DEFAULT NULL, 18 | `assettype` int(10) DEFAULT NULL, 19 | `position` varchar(200) DEFAULT NULL, 20 | `proext` varchar(50) DEFAULT NULL 21 | ) 22 | """ 23 | SQL3=""" 24 | CREATE TABLE `fuzztask` ( 25 | `id` INTEGER PRIMARY KEY AUTOINCREMENT, 26 | `taskid` INTEGER DEFAULT NULL, 27 | `assetid` INTEGER DEFAULT NULL, 28 | `url` varchar(500) DEFAULT NULL, 29 | `path` varchar(500) DEFAULT NULL, 30 | `reqcode` INTEGER DEFAULT 0, 31 | `banner` varchar(500) DEFAULT NULL, 32 | `reslength` INTEGER DEFAULT 0, 33 | `status` INTEGER DEFAULT 0 34 | ) 35 | """ 36 | -------------------------------------------------------------------------------- /common/logger/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/logger/__init__.py -------------------------------------------------------------------------------- /common/logger/log_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import os 4 | import logging, logging.config 5 | import time 6 | 7 | SYSLOG_NAME = format(time.strftime("%y-%m-%d_%H_%M_%S", time.localtime()))+".log" 8 | LOGGERPATH = os.path.join(os.path.dirname(__file__), "../../log/") 9 | if not os.path.exists(LOGGERPATH): 10 | os.makedirs(LOGGERPATH) 11 | 12 | LOGGING = { 13 | 'version': 1, 14 | 'disable_existing_loggers': True, 15 | 'formatters': { 16 | 'verbose': { 17 | 'format': '%(asctime)s %(levelname)s {%(process)d-%(thread)d %(module)s.%(funcName)s:%(lineno)d} %(message)s', 18 | 'datefmt': '%y-%m-%d %H:%M:%S' 19 | }, 20 | 'simple': {'format': '%(levelname)s %(message)s'}, 21 | 'default': { 22 | 'format': '%(asctime)s %(message)s', 23 | 'datefmt': '%Y-%m-%d %H:%M:%S' 24 | } 25 | }, 26 | 'handlers': { 27 | 'null': { 28 | 'level': 'DEBUG', 29 | 'class': 'logging.NullHandler', 30 | }, 31 | 'console': { 32 | 'level': 'DEBUG', 33 | 'class': 'logging.StreamHandler', 34 | 'formatter': 'default' 35 | }, 36 | 37 | 'file': { 38 | 'level': 'DEBUG', 39 | 'class': 'logging.handlers.TimedRotatingFileHandler', 40 | 'filename': os.path.join(os.environ.get('LOG_HOME', ''), 41 | os.environ.get('LOG_FILE_NAME', os.path.join(LOGGERPATH, SYSLOG_NAME))), 42 | 'formatter': 'verbose', 43 | 'encoding': 'utf8' 44 | 45 | }, 46 | }, 47 | 'loggers': { 48 | 'mylogger': { 49 | 'level': 'DEBUG', 50 | 'handlers': ['file', 'console'], 51 | 'propagate': True 52 | } 53 | } 54 | } 55 | 56 | if __name__ == '__main__': 57 | logging.config.dictConfig(LOGGING) 58 | logger = logging.getLogger('mylogger') 59 | logger.info('Hello') 60 | -------------------------------------------------------------------------------- /common/logger/log_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import threading,sys,os 4 | import log_config 5 | LEGEND = 70 6 | class LogUtil(object): 7 | import logging 8 | from logging import config 9 | 10 | logging.addLevelName(LEGEND, 'LEGEND') 11 | config.dictConfig(log_config.LOGGING) 12 | 13 | _log_server = None 14 | _mutex = threading.Condition() 15 | 16 | ERROR = logging.ERROR 17 | WARN = logging.WARN 18 | INFO = logging.INFO 19 | DEBUG = logging.DEBUG 20 | 21 | @staticmethod 22 | def getLogger(name=None, via_socket=True): 23 | import logging 24 | logger = logging.getLogger('mylogger') 25 | 26 | def findCaller(): 27 | f = sys._getframe(2) 28 | rv = "(unknown file)", 0, "(unknown function)" 29 | while hasattr(f, "f_code"): 30 | co = f.f_code 31 | filename = os.path.normcase(co.co_filename) 32 | if filename == __file__: 33 | f = f.f_back 34 | continue 35 | rv = (co.co_filename, f.f_lineno, co.co_name) 36 | break 37 | return rv 38 | 39 | def legend(msg, *args, **kwargs): 40 | try: 41 | fn, lno, func = findCaller() 42 | except ValueError: 43 | fn, lno, func = "(unknown file)", 0, "(unknown function)" 44 | fn = os.path.splitext(os.path.basename(fn))[0] 45 | logger.log(LEGEND, '{%s.%s:%d} %s', fn, func, lno, msg, *args, **kwargs) 46 | logger.__setattr__('legend', legend) 47 | 48 | return logger -------------------------------------------------------------------------------- /common/qqwry.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | '''用Python脚本查询纯真IP库 4 | 5 | QQWry.Dat的格式如下: 6 | 7 | +----------+ 8 | | 文件头 | (8字节) 9 | +----------+ 10 | | 记录区 | (不定长) 11 | +----------+ 12 | | 索引区 | (大小由文件头决定) 13 | +----------+ 14 | 15 | 文件头:4字节开始索引偏移值+4字节结尾索引偏移值 16 | 17 | 记录区: 每条IP记录格式 ==> IP地址[国家信息][地区信息] 18 | 19 | 对于国家记录,可以有三种表示方式: 20 | 21 | 字符串形式(IP记录第5字节不等于0x01和0x02的情况), 22 | 重定向模式1(第5字节为0x01),则接下来3字节为国家信息存储地的偏移值 23 | 重定向模式(第5字节为0x02), 24 | 25 | 对于地区记录,可以有两种表示方式: 字符串形式和重定向 26 | 27 | 最后一条规则:重定向模式1的国家记录后不能跟地区记录 28 | 29 | 索引区: 每条索引记录格式 ==> 4字节起始IP地址 + 3字节指向IP记录的偏移值 30 | 31 | 索引区的IP和它指向的记录区一条记录中的IP构成一个IP范围。查询信息是这个 32 | 范围内IP的信息 33 | 34 | ''' 35 | 36 | import sys 37 | import socket 38 | from struct import pack, unpack 39 | 40 | import os 41 | 42 | 43 | class IPInfo(object): 44 | '''QQWry.Dat数据库查询功能集合 45 | ''' 46 | 47 | def __init__(self, dbname=None): 48 | ''' 初始化类,读取数据库内容为一个字符串, 49 | 通过开始8字节确定数据库的索引信息''' 50 | self.dbname = dbname = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../datas/qqwry.dat") if not dbname else dbname 51 | # f = file(dbname, 'r') 52 | 53 | # Demon注:在Windows下用'r'会有问题,会把\r\n转换成\n 54 | # 详见http://demon.tw/programming/python-open-mode.html 55 | # 还有Python文档中不提倡用file函数来打开文件,推荐用open 56 | f = open(dbname, 'rb') 57 | 58 | self.img = f.read() 59 | f.close() 60 | 61 | # QQWry.Dat文件的开始8字节是索引信息,前4字节是开始索引的偏移值, 62 | # 后4字节是结束索引的偏移值。 63 | # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8]) 64 | 65 | # Demon注:unpack默认使用的endian是和机器有关的 66 | # Intel x86和AMD64(x86-64)是little-endian 67 | # Motorola 68000和PowerPC G5是big-endian 68 | # 而纯真数据库全部采用了little-endian字节序 69 | # 所以在某些big-endian的机器上原代码会出错 70 | (self.firstIndex, self.lastIndex) = unpack('= 400 and res.status_code < 500) or (res.status_code > 500) or (res.status_code < 200)) 78 | if condition2: 79 | pass 80 | else: 81 | if not condition1: 82 | if mu.acquire(): 83 | content = res.content[0:100] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + res.content[0:100] 84 | content = content.replace("\n","").replace("\r","") 85 | rs_one = {"taskid":taskid,"assetid":assetid,"url":newurl,"path":filename,"reqcode":res.status_code,"banner":cgi.escape(base64.b64encode(char_convert(content))),"reslength":len(res.content),"status":1} 86 | self.fuzzdb.insert('fuzztask', rs_one, filter=False) 87 | mu.release() 88 | except: 89 | pass 90 | 91 | def result_unique(self): 92 | sql = "select * from (select *,count(reslength) as flag from fuzztask where taskid={0} group by reslength)".format(self.taskid) 93 | rs = self.fuzzdb.queryall(sql) 94 | sql_1 = "delete from fuzztask" 95 | sql_2 = "update sqlite_sequence SET seq = 0 where name ='fuzztask'" 96 | self.fuzzdb.query(sql_1) 97 | self.fuzzdb.query(sql_2) 98 | for id,taskid,assetid,url,path,reqcode,banner,reslength,status,count in rs: 99 | rs_one = {"taskid": taskid, "assetid": assetid, "url": url,"path":path,"reqcode":reqcode,"banner": banner, "reslength": reslength, "status": 1} 100 | self.fuzzdb.insert('fuzztask', rs_one, filter=False) 101 | logger.info("url:{0} ".format(url)) 102 | 103 | def funzz(self,msgqueue=None): 104 | if msgqueue: 105 | self.taskrun = True 106 | self.init_db() 107 | self.init_dir_dict() 108 | tp = ThreadPool(10) 109 | self.httpclient = HttpUtil() 110 | if msgqueue is None: 111 | if not self.single: 112 | rs = self.assetdb.query_all("select * from asset") 113 | for id, taskid,ip, port, domain, banner, protocol, service, assettype, position, schema in rs: 114 | if self.taskid is None: 115 | self.taskid = taskid 116 | web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port,self.httpclient) 117 | if proext: 118 | url = "{schema}://{ip}:{port}".format(schema=proext,ip=ip,port=port) 119 | rs = self.cache_content(taskid,id,url) 120 | if rs: 121 | for x in self.filename: 122 | tp.add_task(self.req_ad_file,taskid,id,url,x,rs) 123 | else: 124 | self.taskid = -100 125 | rs = self.cache_content(self.taskid,-100,self.url) 126 | for x in self.filename: 127 | tp.add_task(self.req_ad_file, self.taskid,-100,self.url, x, rs) 128 | else: 129 | task_null_count = 0 130 | while not self.finished: 131 | time.sleep(0.2) 132 | if task_null_count >= 5: 133 | TaskCenter.update_task_status(self.statusqueue, "dirscan", TaskStatus.FINISHED) 134 | self.finished = True 135 | continue 136 | if not msgqueue.empty(): 137 | rs_one = msgqueue.get(True) 138 | self.taskid = rs_one.get("taskid") 139 | web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(rs_one.get("ip"), rs_one.get("port"),self.httpclient) 140 | if proext: 141 | url = "{schema}://{ip}:{port}".format(schema=proext, ip=rs_one.get("ip"), port=rs_one.get("port")) 142 | rs = self.cache_content(self.taskid,rs_one.get("assetid"),url) 143 | if rs: 144 | for x in self.filename: 145 | tp.add_task(self.req_ad_file,self.taskid,rs_one.get("assetid"), url, x, rs) 146 | else: 147 | if TaskCenter.task_is_finished(self.statusqueue,"portscan"): 148 | task_null_count = task_null_count+1 149 | time.sleep(0.5) 150 | 151 | tp.wait_all_complete() 152 | self.result_unique() 153 | 154 | if __name__ == "__main__": 155 | optparser = OptionParser() 156 | optparser.add_option("-d", "--dbname", dest="dbname", type="string", default="", help="port scan result's db") 157 | optparser.add_option("-u", "--url", dest="url", type="string", default="", help="url cues") 158 | try: 159 | (options, args) = optparser.parse_args() 160 | except Exception, err: 161 | sys.exit(0) 162 | if len(sys.argv) < 2: 163 | optparser.print_help() 164 | sys.exit(0) 165 | dbname = options.dbname 166 | url = options.url 167 | test = DirFuzz(dbname=dbname,url=url) 168 | test.funzz() 169 | -------------------------------------------------------------------------------- /hosts.txt: -------------------------------------------------------------------------------- 1 | wx.cctaa-wx.cn 2 | cdn.cctaa-wx.cn 3 | bbs.cctaa-wx.cn 4 | test.cctaa-wx.cn 5 | 2015.cctaa-wx.cn 6 | www.cctaa-wx.cn 7 | mp.cctaa-wx.cn 8 | js.cctaa-wx.cn 9 | book.cctaa-wx.cn 10 | -------------------------------------------------------------------------------- /http_banner.py: -------------------------------------------------------------------------------- 1 | import threading 2 | from ProbeTool import HttpWeb 3 | from common.utils import query_service_and_banner, get_socket_banner, CommonUtils 4 | from constants import default_ports 5 | from pool.thread_pool import ThreadPool 6 | mu = threading.Lock() 7 | ports = default_ports.get("web_ports") 8 | with open("hosts.txt", "rb+") as file: 9 | ipscope = file.read() 10 | 11 | domains = CommonUtils.package_ipscope(ipscope,handle_ip=False,retType="list") 12 | def scanner(ip,port): 13 | ref_service, ref_banner = query_service_and_banner(port, "tcp") 14 | web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port) 15 | banner = web_banner if web_banner else get_socket_banner(domain, port, ref_banner) 16 | if mu.acquire(True): 17 | if proext: 18 | msg = "{proext}://{domain}:{port} {banner}\n".format(proext=proext,domain=ip,port=port,banner=banner) 19 | f = open("result.txt","ab+") 20 | f.write(msg) 21 | f.close() 22 | mu.release() 23 | 24 | f = open("result.txt","wb+") 25 | f.truncate() 26 | f.close() 27 | pool = ThreadPool(30) 28 | for domain in domains: 29 | for port in ports: 30 | pool.add_task(scanner,domain,port) 31 | pool.wait_all_complete() -------------------------------------------------------------------------------- /pool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/pool/__init__.py -------------------------------------------------------------------------------- /pool/thread_pool.py: -------------------------------------------------------------------------------- 1 | import Queue 2 | import threading 3 | 4 | 5 | class ThreadPool(object): 6 | def __init__(self, thread_num): 7 | self.task_queue = Queue.Queue() 8 | self.threads = [] 9 | self.__init_thread_pool(thread_num) 10 | 11 | def __init_thread_pool(self,thread_num): 12 | for i in range(thread_num): 13 | worker = Worker(self.task_queue) 14 | worker.setDaemon(True) 15 | worker.start() 16 | self.threads.append(worker) 17 | 18 | def add_task(self, func, *args): 19 | self.task_queue.put((func, args)) 20 | 21 | def wait_all_complete(self): 22 | self.task_queue.join() 23 | self._terminate_workers() 24 | 25 | def force_complete(self): 26 | self.clear_tasks() 27 | self._terminate_workers() 28 | 29 | def clear_tasks(self): 30 | while not self.task_queue.empty(): 31 | self.task_queue.get_nowait() 32 | self.task_queue.task_done() 33 | def _terminate_workers(self): 34 | for worker in self.threads: 35 | worker.terminate() 36 | class Worker(threading.Thread): 37 | def __init__(self, task_queue): 38 | super(Worker, self).__init__() 39 | self.task_queue = task_queue 40 | self.stop = False 41 | 42 | def run(self): 43 | max_len = 64 44 | while not self.stop: 45 | try: 46 | do, args = self.task_queue.get(timeout=1) 47 | args_desc = str(args) 48 | if len(args_desc) > max_len: 49 | pass 50 | try: 51 | do(*args) 52 | except: 53 | pass 54 | if self.stop: 55 | pass 56 | self.task_queue.task_done() 57 | except: 58 | pass 59 | def terminate(self): 60 | self.stop = True -------------------------------------------------------------------------------- /report/ReportCenter.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import base64 3 | import json 4 | import os 5 | import shutil 6 | import time 7 | 8 | from common.db.sqlite3_db import sqlite3_db 9 | from common.utils import update_file_content 10 | from common.logger.log_util import LogUtil as logging 11 | logger = logging.getLogger(__name__) 12 | 13 | class Report(object): 14 | def __init__(self,portdb,dirdb=None): 15 | self.portdb = portdb 16 | self.dirdb = dirdb 17 | self.report_dir = os.path.dirname(self.portdb) 18 | self.report_tpl_dir = os.path.join(os.path.dirname(__file__),"template") 19 | self.port_db = sqlite3_db(self.portdb) 20 | self.dir_db = sqlite3_db(self.dirdb) if self.dirdb else None 21 | 22 | 23 | def create_scan_datajson(self): 24 | port_rs = self.port_db.queryall("select * from asset") 25 | ip_port_map = {} 26 | ip_children = [] 27 | for id, taskid, ip, port, domain, banner, protocol, service, assettype, position, schema in port_rs: 28 | if self.dir_db: 29 | dir_rs = self.dir_db.queryall("select * from fuzztask where taskid={0} and assetid={1}".format(taskid,id)) 30 | if dir_rs: 31 | path_children = [] 32 | for id,taskid,assetid,url,path,reqcode,banner,reslength,status in dir_rs: 33 | path_children.append({"name": "/{0} (code:{1})".format(path, reqcode),"children": [{"name": banner, "type": "path", "value": url}]}) 34 | else: 35 | if schema: 36 | path_children = [{"name": "/","children": [{"name": banner, "type": "path", "value":"{schema}://{ip}:{port}".format(schema=schema, ip=ip, port=port)}]}] 37 | else: 38 | path_children = [{"name": "Unknown", "children": [{"name": base64.b64decode(banner)}]}] 39 | else: 40 | if schema: 41 | path_children = [{"name": "/", "children": [{"name": banner, "type": "path","value": "{schema}://{ip}:{port}".format(schema=schema,ip=ip,port=port)}]}] 42 | else: 43 | path_children = [{"name": "Unknown", "children": [{"name": base64.b64decode(banner)}]}] 44 | 45 | if ip_port_map.has_key(ip): 46 | if port not in ip_port_map.get(ip): 47 | ip_port_map.get(ip).append(port) 48 | for x in ip_children: 49 | if x.get("name") == ip: 50 | x.get("children").append({"name": port, "children": path_children}) 51 | break 52 | else: 53 | ip_port_map.update({ip:[port]}) 54 | ip_children.append({"name": ip, "children": [{"name": port, "children": path_children}]}) 55 | datajson = json.dumps({"name":u"结果","children":ip_children}) 56 | return datajson 57 | 58 | def report_html(self): 59 | files = ["index.html","inspector.css","package.json","utils.js"] 60 | report_files = os.path.join(self.report_dir,"{0}_files".format(time.strftime("%H_%M_%S", time.localtime()))) 61 | if not os.path.exists(report_files): 62 | os.makedirs(report_files) 63 | for f in files: 64 | shutil.copy(os.path.join(self.report_tpl_dir,f),report_files) 65 | 66 | jsondata = self.create_scan_datajson() 67 | update_file_content(os.path.join(report_files,"index.html"),"$$$JSONDATA$$$",jsondata) 68 | logger.info("scan result: {0}".format(os.path.join(report_files,"index.html"))) 69 | 70 | if __name__ == "__main__": 71 | dirdb = r"D:\gitproject\assetscan\repertory\2020-06-01\13_49_47.fuzz.db" 72 | portdb = r"D:\gitproject\assetscan\repertory\2020-06-01\13_49_47.port.db" 73 | test = Report(portdb,dirdb) 74 | print test.create_scan_datajson() -------------------------------------------------------------------------------- /report/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/report/__init__.py -------------------------------------------------------------------------------- /report/template/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Collapsible Tree 4 | 5 | 6 | 186 | -------------------------------------------------------------------------------- /report/template/inspector.css: -------------------------------------------------------------------------------- 1 | :root{--syntax_normal:#1b1e23;--syntax_comment:#a9b0bc;--syntax_number:#20a5ba;--syntax_keyword:#c30771;--syntax_atom:#10a778;--syntax_string:#008ec4;--syntax_error:#ffbedc;--syntax_unknown_variable:#838383;--syntax_known_variable:#005f87;--syntax_matchbracket:#20bbfc;--syntax_key:#6636b4;--mono_fonts:82%/1.5 Menlo,Consolas,monospace}.observablehq--collapsed,.observablehq--expanded,.observablehq--function,.observablehq--gray,.observablehq--import,.observablehq--string:after,.observablehq--string:before{color:var(--syntax_normal)}.observablehq--collapsed,.observablehq--inspect a{cursor:pointer}.observablehq--field{text-indent:-1em;margin-left:1em}.observablehq--empty{color:var(--syntax_comment)}.observablehq--blue,.observablehq--keyword{color:#3182bd}.observablehq--forbidden,.observablehq--pink{color:#e377c2}.observablehq--orange{color:#e6550d}.observablehq--boolean,.observablehq--null,.observablehq--undefined{color:var(--syntax_atom)}.observablehq--bigint,.observablehq--date,.observablehq--green,.observablehq--number,.observablehq--regexp,.observablehq--symbol{color:var(--syntax_number)}.observablehq--index,.observablehq--key{color:var(--syntax_key)}.observablehq--prototype-key{color:#aaa}.observablehq--empty{font-style:oblique}.observablehq--purple,.observablehq--string{color:var(--syntax_string)}.observablehq--error,.observablehq--red{color:#e7040f}.observablehq--inspect{font:var(--mono_fonts);overflow-x:auto;display:block;white-space:pre}.observablehq--error .observablehq--inspect{word-break:break-all;white-space:pre-wrap} -------------------------------------------------------------------------------- /report/template/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@d3/collapsible-tree", 3 | "main": "main.js", 4 | "version": "353.0.0", 5 | "homepage": "https://observablehq.com/@d3/collapsible-tree", 6 | "author": { 7 | "name": "D3", 8 | "url": "https://observablehq.com/@d3" 9 | }, 10 | "type": "module", 11 | "peerDependencies": { 12 | "@observablehq/runtime": "4" 13 | } 14 | } -------------------------------------------------------------------------------- /reportrs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/reportrs.jpg -------------------------------------------------------------------------------- /result.txt: -------------------------------------------------------------------------------- 1 | https://222.175.107.35:443 Welcome to OpenResty! 2 | https://www.bobdirectbank.com:443 北京银行直销银行 3 | http://www.bobdirectbank.com:80 北京银行直销银行 4 | -------------------------------------------------------------------------------- /scanrs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/scanrs.jpg -------------------------------------------------------------------------------- /thirdparty/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/__init__.py -------------------------------------------------------------------------------- /thirdparty/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | 19 | from .compat import PY2, PY3 20 | from .universaldetector import UniversalDetector 21 | from .version import __version__, VERSION 22 | 23 | 24 | def detect(byte_str): 25 | """ 26 | Detect the encoding of the given byte string. 27 | 28 | :param byte_str: The byte sequence to examine. 29 | :type byte_str: ``bytes`` or ``bytearray`` 30 | """ 31 | if not isinstance(byte_str, bytearray): 32 | if not isinstance(byte_str, bytes): 33 | raise TypeError('Expected object of type bytes or bytearray, got: ' 34 | '{0}'.format(type(byte_str))) 35 | else: 36 | byte_str = bytearray(byte_str) 37 | detector = UniversalDetector() 38 | detector.feed(byte_str) 39 | return detector.close() 40 | -------------------------------------------------------------------------------- /thirdparty/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import BIG5_SM_MODEL 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(Big5Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) 38 | self.distribution_analyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "Big5" 44 | 45 | @property 46 | def language(self): 47 | return "Chinese" 48 | -------------------------------------------------------------------------------- /thirdparty/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState 29 | from .charsetprober import CharSetProber 30 | 31 | 32 | class CharSetGroupProber(CharSetProber): 33 | def __init__(self, lang_filter=None): 34 | super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) 35 | self._active_num = 0 36 | self.probers = [] 37 | self._best_guess_prober = None 38 | 39 | def reset(self): 40 | super(CharSetGroupProber, self).reset() 41 | self._active_num = 0 42 | for prober in self.probers: 43 | if prober: 44 | prober.reset() 45 | prober.active = True 46 | self._active_num += 1 47 | self._best_guess_prober = None 48 | 49 | @property 50 | def charset_name(self): 51 | if not self._best_guess_prober: 52 | self.get_confidence() 53 | if not self._best_guess_prober: 54 | return None 55 | return self._best_guess_prober.charset_name 56 | 57 | @property 58 | def language(self): 59 | if not self._best_guess_prober: 60 | self.get_confidence() 61 | if not self._best_guess_prober: 62 | return None 63 | return self._best_guess_prober.language 64 | 65 | def feed(self, byte_str): 66 | for prober in self.probers: 67 | if not prober: 68 | continue 69 | if not prober.active: 70 | continue 71 | state = prober.feed(byte_str) 72 | if not state: 73 | continue 74 | if state == ProbingState.FOUND_IT: 75 | self._best_guess_prober = prober 76 | return self.state 77 | elif state == ProbingState.NOT_ME: 78 | prober.active = False 79 | self._active_num -= 1 80 | if self._active_num <= 0: 81 | self._state = ProbingState.NOT_ME 82 | return self.state 83 | return self.state 84 | 85 | def get_confidence(self): 86 | state = self.state 87 | if state == ProbingState.FOUND_IT: 88 | return 0.99 89 | elif state == ProbingState.NOT_ME: 90 | return 0.01 91 | best_conf = 0.0 92 | self._best_guess_prober = None 93 | for prober in self.probers: 94 | if not prober: 95 | continue 96 | if not prober.active: 97 | self.logger.debug('%s not active', prober.charset_name) 98 | continue 99 | conf = prober.get_confidence() 100 | self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) 101 | if best_conf < conf: 102 | best_conf = conf 103 | self._best_guess_prober = prober 104 | if not self._best_guess_prober: 105 | return 0.0 106 | return best_conf 107 | -------------------------------------------------------------------------------- /thirdparty/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import logging 30 | import re 31 | 32 | from .enums import ProbingState 33 | 34 | 35 | class CharSetProber(object): 36 | 37 | SHORTCUT_THRESHOLD = 0.95 38 | 39 | def __init__(self, lang_filter=None): 40 | self._state = None 41 | self.lang_filter = lang_filter 42 | self.logger = logging.getLogger(__name__) 43 | 44 | def reset(self): 45 | self._state = ProbingState.DETECTING 46 | 47 | @property 48 | def charset_name(self): 49 | return None 50 | 51 | def feed(self, buf): 52 | pass 53 | 54 | @property 55 | def state(self): 56 | return self._state 57 | 58 | def get_confidence(self): 59 | return 0.0 60 | 61 | @staticmethod 62 | def filter_high_byte_only(buf): 63 | buf = re.sub(b'([\x00-\x7F])+', b' ', buf) 64 | return buf 65 | 66 | @staticmethod 67 | def filter_international_words(buf): 68 | """ 69 | We define three types of bytes: 70 | alphabet: english alphabets [a-zA-Z] 71 | international: international characters [\x80-\xFF] 72 | marker: everything else [^a-zA-Z\x80-\xFF] 73 | 74 | The input buffer can be thought to contain a series of words delimited 75 | by markers. This function works to filter all words that contain at 76 | least one international character. All contiguous sequences of markers 77 | are replaced by a single space ascii character. 78 | 79 | This filter applies to all scripts which do not use English characters. 80 | """ 81 | filtered = bytearray() 82 | 83 | # This regex expression filters out only words that have at-least one 84 | # international character. The word may include one marker character at 85 | # the end. 86 | words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', 87 | buf) 88 | 89 | for word in words: 90 | filtered.extend(word[:-1]) 91 | 92 | # If the last character in the word is a marker, replace it with a 93 | # space as markers shouldn't affect our analysis (they are used 94 | # similarly across all languages and may thus have similar 95 | # frequencies). 96 | last_char = word[-1:] 97 | if not last_char.isalpha() and last_char < b'\x80': 98 | last_char = b' ' 99 | filtered.extend(last_char) 100 | 101 | return filtered 102 | 103 | @staticmethod 104 | def filter_with_english_letters(buf): 105 | """ 106 | Returns a copy of ``buf`` that retains only the sequences of English 107 | alphabet and high byte characters that are not between <> characters. 108 | Also retains English alphabet and high byte characters immediately 109 | before occurrences of >. 110 | 111 | This filter can be applied to all scripts which contain both English 112 | characters and extended ASCII characters, but is currently only used by 113 | ``Latin1Prober``. 114 | """ 115 | filtered = bytearray() 116 | in_tag = False 117 | prev = 0 118 | 119 | for curr in range(len(buf)): 120 | # Slice here to get bytes instead of an int with Python 3 121 | buf_char = buf[curr:curr + 1] 122 | # Check if we're coming out of or entering an HTML tag 123 | if buf_char == b'>': 124 | in_tag = False 125 | elif buf_char == b'<': 126 | in_tag = True 127 | 128 | # If current character is not extended-ASCII and not alphabetic... 129 | if buf_char < b'\x80' and not buf_char.isalpha(): 130 | # ...and we're not in a tag 131 | if curr > prev and not in_tag: 132 | # Keep everything after last non-extended-ASCII, 133 | # non-alphabetic character 134 | filtered.extend(buf[prev:curr]) 135 | # Output a space to delimit stretch we kept 136 | filtered.extend(b' ') 137 | prev = curr + 1 138 | 139 | # If we're not in a tag... 140 | if not in_tag: 141 | # Keep everything after last non-extended-ASCII, non-alphabetic 142 | # character 143 | filtered.extend(buf[prev:]) 144 | 145 | return filtered 146 | -------------------------------------------------------------------------------- /thirdparty/chardet/cli/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /thirdparty/chardet/cli/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | 21 | from thirdparty.chardet import __version__ 22 | from thirdparty.chardet import PY2 23 | from thirdparty.chardet import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | line = bytearray(line) 39 | u.feed(line) 40 | # shortcut out of the loop to save reading further - particularly useful if we read a BOM. 41 | if u.done: 42 | break 43 | u.close() 44 | result = u.result 45 | if PY2: 46 | name = name.decode(sys.getfilesystemencoding(), 'ignore') 47 | if result['encoding']: 48 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 49 | result['confidence']) 50 | else: 51 | return '{0}: no result'.format(name) 52 | 53 | 54 | def main(argv=None): 55 | """ 56 | Handles command line arguments and gets things started. 57 | 58 | :param argv: List of arguments, as if specified on the command-line. 59 | If None, ``sys.argv[1:]`` is used instead. 60 | :type argv: list of str 61 | """ 62 | # Get command line arguments 63 | parser = argparse.ArgumentParser( 64 | description="Takes one or more file paths and reports their detected \ 65 | encodings") 66 | parser.add_argument('input', 67 | help='File whose encoding we would like to determine. \ 68 | (default: stdin)', 69 | type=argparse.FileType('rb'), nargs='*', 70 | default=[sys.stdin if PY2 else sys.stdin.buffer]) 71 | parser.add_argument('--version', action='version', 72 | version='%(prog)s {0}'.format(__version__)) 73 | args = parser.parse_args(argv) 74 | 75 | for f in args.input: 76 | if f.isatty(): 77 | print("You are running chardetect interactively. Press " + 78 | "CTRL-D twice at the start of a blank line to signal the " + 79 | "end of your input. If you want help, run chardetect " + 80 | "--help\n", file=sys.stderr) 81 | print(description_of(f, f.name)) 82 | 83 | 84 | if __name__ == '__main__': 85 | main() 86 | -------------------------------------------------------------------------------- /thirdparty/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import logging 29 | 30 | from .enums import MachineState 31 | 32 | 33 | class CodingStateMachine(object): 34 | """ 35 | A state machine to verify a byte sequence for a particular encoding. For 36 | each byte the detector receives, it will feed that byte to every active 37 | state machine available, one byte at a time. The state machine changes its 38 | state based on its previous state and the byte it receives. There are 3 39 | states in a state machine that are of interest to an auto-detector: 40 | 41 | START state: This is the state to start with, or a legal byte sequence 42 | (i.e. a valid code point) for character has been identified. 43 | 44 | ME state: This indicates that the state machine identified a byte sequence 45 | that is specific to the charset it is designed for and that 46 | there is no other possible encoding which can contain this byte 47 | sequence. This will to lead to an immediate positive answer for 48 | the detector. 49 | 50 | ERROR state: This indicates the state machine identified an illegal byte 51 | sequence for that encoding. This will lead to an immediate 52 | negative answer for this encoding. Detector will exclude this 53 | encoding from consideration from here on. 54 | """ 55 | def __init__(self, sm): 56 | self._model = sm 57 | self._curr_byte_pos = 0 58 | self._curr_char_len = 0 59 | self._curr_state = None 60 | self.logger = logging.getLogger(__name__) 61 | self.reset() 62 | 63 | def reset(self): 64 | self._curr_state = MachineState.START 65 | 66 | def next_state(self, c): 67 | # for each byte we get its class 68 | # if it is first byte, we also get byte length 69 | byte_class = self._model['class_table'][c] 70 | if self._curr_state == MachineState.START: 71 | self._curr_byte_pos = 0 72 | self._curr_char_len = self._model['char_len_table'][byte_class] 73 | # from byte's class and state_table, we get its next state 74 | curr_state = (self._curr_state * self._model['class_factor'] 75 | + byte_class) 76 | self._curr_state = self._model['state_table'][curr_state] 77 | self._curr_byte_pos += 1 78 | return self._curr_state 79 | 80 | def get_current_charlen(self): 81 | return self._curr_char_len 82 | 83 | def get_coding_state_machine(self): 84 | return self._model['name'] 85 | 86 | @property 87 | def language(self): 88 | return self._model['language'] 89 | -------------------------------------------------------------------------------- /thirdparty/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Dan Blanchard 4 | # Ian Cordasco 5 | # 6 | # This library is free software; you can redistribute it and/or 7 | # modify it under the terms of the GNU Lesser General Public 8 | # License as published by the Free Software Foundation; either 9 | # version 2.1 of the License, or (at your option) any later version. 10 | # 11 | # This library is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 | # Lesser General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU Lesser General Public 17 | # License along with this library; if not, write to the Free Software 18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 19 | # 02110-1301 USA 20 | ######################### END LICENSE BLOCK ######################### 21 | 22 | import sys 23 | 24 | 25 | if sys.version_info < (3, 0): 26 | PY2 = True 27 | PY3 = False 28 | base_str = (str, unicode) 29 | text_type = unicode 30 | else: 31 | PY2 = False 32 | PY3 = True 33 | base_str = (bytes, str) 34 | text_type = str 35 | -------------------------------------------------------------------------------- /thirdparty/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .chardistribution import EUCKRDistributionAnalysis 29 | from .codingstatemachine import CodingStateMachine 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .mbcssm import CP949_SM_MODEL 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(CP949Prober, self).__init__() 37 | self.coding_sm = CodingStateMachine(CP949_SM_MODEL) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self.distribution_analyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | @property 44 | def charset_name(self): 45 | return "CP949" 46 | 47 | @property 48 | def language(self): 49 | return "Korean" 50 | -------------------------------------------------------------------------------- /thirdparty/chardet/enums.py: -------------------------------------------------------------------------------- 1 | """ 2 | All of the Enums that are used throughout the chardet package. 3 | 4 | :author: Dan Blanchard (dan.blanchard@gmail.com) 5 | """ 6 | 7 | 8 | class InputState(object): 9 | """ 10 | This enum represents the different states a universal detector can be in. 11 | """ 12 | PURE_ASCII = 0 13 | ESC_ASCII = 1 14 | HIGH_BYTE = 2 15 | 16 | 17 | class LanguageFilter(object): 18 | """ 19 | This enum represents the different language filters we can apply to a 20 | ``UniversalDetector``. 21 | """ 22 | CHINESE_SIMPLIFIED = 0x01 23 | CHINESE_TRADITIONAL = 0x02 24 | JAPANESE = 0x04 25 | KOREAN = 0x08 26 | NON_CJK = 0x10 27 | ALL = 0x1F 28 | CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 29 | CJK = CHINESE | JAPANESE | KOREAN 30 | 31 | 32 | class ProbingState(object): 33 | """ 34 | This enum represents the different states a prober can be in. 35 | """ 36 | DETECTING = 0 37 | FOUND_IT = 1 38 | NOT_ME = 2 39 | 40 | 41 | class MachineState(object): 42 | """ 43 | This enum represents the different states a state machine can be in. 44 | """ 45 | START = 0 46 | ERROR = 1 47 | ITS_ME = 2 48 | 49 | 50 | class SequenceLikelihood(object): 51 | """ 52 | This enum represents the likelihood of a character following the previous one. 53 | """ 54 | NEGATIVE = 0 55 | UNLIKELY = 1 56 | LIKELY = 2 57 | POSITIVE = 3 58 | 59 | @classmethod 60 | def get_num_categories(cls): 61 | """:returns: The number of likelihood categories in the enum.""" 62 | return 4 63 | 64 | 65 | class CharacterCategory(object): 66 | """ 67 | This enum represents the different categories language models for 68 | ``SingleByteCharsetProber`` put characters into. 69 | 70 | Anything less than CONTROL is considered a letter. 71 | """ 72 | UNDEFINED = 255 73 | LINE_BREAK = 254 74 | SYMBOL = 253 75 | DIGIT = 252 76 | CONTROL = 251 77 | -------------------------------------------------------------------------------- /thirdparty/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .enums import LanguageFilter, ProbingState, MachineState 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, 32 | ISO2022KR_SM_MODEL) 33 | 34 | 35 | class EscCharSetProber(CharSetProber): 36 | """ 37 | This CharSetProber uses a "code scheme" approach for detecting encodings, 38 | whereby easily recognizable escape or shift sequences are relied on to 39 | identify these encodings. 40 | """ 41 | 42 | def __init__(self, lang_filter=None): 43 | super(EscCharSetProber, self).__init__(lang_filter=lang_filter) 44 | self.coding_sm = [] 45 | if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: 46 | self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) 47 | self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL)) 48 | if self.lang_filter & LanguageFilter.JAPANESE: 49 | self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) 50 | if self.lang_filter & LanguageFilter.KOREAN: 51 | self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) 52 | self.active_sm_count = None 53 | self._detected_charset = None 54 | self._detected_language = None 55 | self._state = None 56 | self.reset() 57 | 58 | def reset(self): 59 | super(EscCharSetProber, self).reset() 60 | for coding_sm in self.coding_sm: 61 | if not coding_sm: 62 | continue 63 | coding_sm.active = True 64 | coding_sm.reset() 65 | self.active_sm_count = len(self.coding_sm) 66 | self._detected_charset = None 67 | self._detected_language = None 68 | 69 | @property 70 | def charset_name(self): 71 | return self._detected_charset 72 | 73 | @property 74 | def language(self): 75 | return self._detected_language 76 | 77 | def get_confidence(self): 78 | if self._detected_charset: 79 | return 0.99 80 | else: 81 | return 0.00 82 | 83 | def feed(self, byte_str): 84 | for c in byte_str: 85 | for coding_sm in self.coding_sm: 86 | if not coding_sm or not coding_sm.active: 87 | continue 88 | coding_state = coding_sm.next_state(c) 89 | if coding_state == MachineState.ERROR: 90 | coding_sm.active = False 91 | self.active_sm_count -= 1 92 | if self.active_sm_count <= 0: 93 | self._state = ProbingState.NOT_ME 94 | return self.state 95 | elif coding_state == MachineState.ITS_ME: 96 | self._state = ProbingState.FOUND_IT 97 | self._detected_charset = coding_sm.get_coding_state_machine() 98 | self._detected_language = coding_sm.language 99 | return self.state 100 | 101 | return self.state 102 | -------------------------------------------------------------------------------- /thirdparty/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .enums import ProbingState, MachineState 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import EUCJPDistributionAnalysis 32 | from .jpcntx import EUCJPContextAnalysis 33 | from .mbcssm import EUCJP_SM_MODEL 34 | 35 | 36 | class EUCJPProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(EUCJPProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) 40 | self.distribution_analyzer = EUCJPDistributionAnalysis() 41 | self.context_analyzer = EUCJPContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(EUCJPProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return "EUC-JP" 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte 59 | coding_state = self.coding_sm.next_state(byte_str[i]) 60 | if coding_state == MachineState.ERROR: 61 | self.logger.debug('%s %s prober hit error at byte %s', 62 | self.charset_name, self.language, i) 63 | self._state = ProbingState.NOT_ME 64 | break 65 | elif coding_state == MachineState.ITS_ME: 66 | self._state = ProbingState.FOUND_IT 67 | break 68 | elif coding_state == MachineState.START: 69 | char_len = self.coding_sm.get_current_charlen() 70 | if i == 0: 71 | self._last_char[1] = byte_str[0] 72 | self.context_analyzer.feed(self._last_char, char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i - 1:i + 1], 76 | char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKR_SM_MODEL 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | super(EUCKRProber, self).__init__() 37 | self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) 38 | self.distribution_analyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | @property 42 | def charset_name(self): 43 | return "EUC-KR" 44 | 45 | @property 46 | def language(self): 47 | return "Korean" 48 | -------------------------------------------------------------------------------- /thirdparty/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTW_SM_MODEL 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(EUCTWProber, self).__init__() 36 | self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) 37 | self.distribution_analyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "EUC-TW" 43 | 44 | @property 45 | def language(self): 46 | return "Taiwan" 47 | -------------------------------------------------------------------------------- /thirdparty/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312_SM_MODEL 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | super(GB2312Prober, self).__init__() 36 | self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) 37 | self.distribution_analyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | @property 41 | def charset_name(self): 42 | return "GB2312" 43 | 44 | @property 45 | def language(self): 46 | return "Chinese" 47 | -------------------------------------------------------------------------------- /thirdparty/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import ProbingState 31 | 32 | FREQ_CAT_NUM = 4 33 | 34 | UDF = 0 # undefined 35 | OTH = 1 # other 36 | ASC = 2 # ascii capital letter 37 | ASS = 3 # ascii small letter 38 | ACV = 4 # accent capital vowel 39 | ACO = 5 # accent capital other 40 | ASV = 6 # accent small vowel 41 | ASO = 7 # accent small other 42 | CLASS_NUM = 8 # total classes 43 | 44 | Latin1_CharToClass = ( 45 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 53 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 54 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 56 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 57 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 58 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 60 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 61 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 62 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 63 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 64 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 65 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 69 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 71 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 72 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 73 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 75 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 76 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 77 | ) 78 | 79 | # 0 : illegal 80 | # 1 : very unlikely 81 | # 2 : normal 82 | # 3 : very likely 83 | Latin1ClassModel = ( 84 | # UDF OTH ASC ASS ACV ACO ASV ASO 85 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 86 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 87 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 88 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 89 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 90 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 91 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 92 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 93 | ) 94 | 95 | 96 | class Latin1Prober(CharSetProber): 97 | def __init__(self): 98 | super(Latin1Prober, self).__init__() 99 | self._last_char_class = None 100 | self._freq_counter = None 101 | self.reset() 102 | 103 | def reset(self): 104 | self._last_char_class = OTH 105 | self._freq_counter = [0] * FREQ_CAT_NUM 106 | CharSetProber.reset(self) 107 | 108 | @property 109 | def charset_name(self): 110 | return "ISO-8859-1" 111 | 112 | @property 113 | def language(self): 114 | return "" 115 | 116 | def feed(self, byte_str): 117 | byte_str = self.filter_with_english_letters(byte_str) 118 | for c in byte_str: 119 | char_class = Latin1_CharToClass[c] 120 | freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) 121 | + char_class] 122 | if freq == 0: 123 | self._state = ProbingState.NOT_ME 124 | break 125 | self._freq_counter[freq] += 1 126 | self._last_char_class = char_class 127 | 128 | return self.state 129 | 130 | def get_confidence(self): 131 | if self.state == ProbingState.NOT_ME: 132 | return 0.01 133 | 134 | total = sum(self._freq_counter) 135 | if total < 0.01: 136 | confidence = 0.0 137 | else: 138 | confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) 139 | / total) 140 | if confidence < 0.0: 141 | confidence = 0.0 142 | # lower the confidence of latin1 so that other more accurate 143 | # detector can take priority. 144 | confidence = confidence * 0.73 145 | return confidence 146 | -------------------------------------------------------------------------------- /thirdparty/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetprober import CharSetProber 31 | from .enums import ProbingState, MachineState 32 | 33 | 34 | class MultiByteCharSetProber(CharSetProber): 35 | """ 36 | MultiByteCharSetProber 37 | """ 38 | 39 | def __init__(self, lang_filter=None): 40 | super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) 41 | self.distribution_analyzer = None 42 | self.coding_sm = None 43 | self._last_char = [0, 0] 44 | 45 | def reset(self): 46 | super(MultiByteCharSetProber, self).reset() 47 | if self.coding_sm: 48 | self.coding_sm.reset() 49 | if self.distribution_analyzer: 50 | self.distribution_analyzer.reset() 51 | self._last_char = [0, 0] 52 | 53 | @property 54 | def charset_name(self): 55 | raise NotImplementedError 56 | 57 | @property 58 | def language(self): 59 | raise NotImplementedError 60 | 61 | def feed(self, byte_str): 62 | for i in range(len(byte_str)): 63 | coding_state = self.coding_sm.next_state(byte_str[i]) 64 | if coding_state == MachineState.ERROR: 65 | self.logger.debug('%s %s prober hit error at byte %s', 66 | self.charset_name, self.language, i) 67 | self._state = ProbingState.NOT_ME 68 | break 69 | elif coding_state == MachineState.ITS_ME: 70 | self._state = ProbingState.FOUND_IT 71 | break 72 | elif coding_state == MachineState.START: 73 | char_len = self.coding_sm.get_current_charlen() 74 | if i == 0: 75 | self._last_char[1] = byte_str[0] 76 | self.distribution_analyzer.feed(self._last_char, char_len) 77 | else: 78 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 79 | char_len) 80 | 81 | self._last_char[0] = byte_str[-1] 82 | 83 | if self.state == ProbingState.DETECTING: 84 | if (self.distribution_analyzer.got_enough_data() and 85 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 86 | self._state = ProbingState.FOUND_IT 87 | 88 | return self.state 89 | 90 | def get_confidence(self): 91 | return self.distribution_analyzer.get_confidence() 92 | -------------------------------------------------------------------------------- /thirdparty/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self, lang_filter=None): 43 | super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) 44 | self.probers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /thirdparty/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood 31 | 32 | 33 | class SingleByteCharSetProber(CharSetProber): 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | 39 | def __init__(self, model, reversed=False, name_prober=None): 40 | super(SingleByteCharSetProber, self).__init__() 41 | self._model = model 42 | # TRUE if we need to reverse every pair in the model lookup 43 | self._reversed = reversed 44 | # Optional auxiliary prober for name decision 45 | self._name_prober = name_prober 46 | self._last_order = None 47 | self._seq_counters = None 48 | self._total_seqs = None 49 | self._total_char = None 50 | self._freq_char = None 51 | self.reset() 52 | 53 | def reset(self): 54 | super(SingleByteCharSetProber, self).reset() 55 | # char order of last character 56 | self._last_order = 255 57 | self._seq_counters = [0] * SequenceLikelihood.get_num_categories() 58 | self._total_seqs = 0 59 | self._total_char = 0 60 | # characters that fall in our sampling range 61 | self._freq_char = 0 62 | 63 | @property 64 | def charset_name(self): 65 | if self._name_prober: 66 | return self._name_prober.charset_name 67 | else: 68 | return self._model['charset_name'] 69 | 70 | @property 71 | def language(self): 72 | if self._name_prober: 73 | return self._name_prober.language 74 | else: 75 | return self._model.get('language') 76 | 77 | def feed(self, byte_str): 78 | if not self._model['keep_english_letter']: 79 | byte_str = self.filter_international_words(byte_str) 80 | if not byte_str: 81 | return self.state 82 | char_to_order_map = self._model['char_to_order_map'] 83 | for i, c in enumerate(byte_str): 84 | # XXX: Order is in range 1-64, so one would think we want 0-63 here, 85 | # but that leads to 27 more test failures than before. 86 | order = char_to_order_map[c] 87 | # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but 88 | # CharacterCategory.SYMBOL is actually 253, so we use CONTROL 89 | # to make it closer to the original intent. The only difference 90 | # is whether or not we count digits and control characters for 91 | # _total_char purposes. 92 | if order < CharacterCategory.CONTROL: 93 | self._total_char += 1 94 | if order < self.SAMPLE_SIZE: 95 | self._freq_char += 1 96 | if self._last_order < self.SAMPLE_SIZE: 97 | self._total_seqs += 1 98 | if not self._reversed: 99 | i = (self._last_order * self.SAMPLE_SIZE) + order 100 | model = self._model['precedence_matrix'][i] 101 | else: # reverse the order of the letters in the lookup 102 | i = (order * self.SAMPLE_SIZE) + self._last_order 103 | model = self._model['precedence_matrix'][i] 104 | self._seq_counters[model] += 1 105 | self._last_order = order 106 | 107 | charset_name = self._model['charset_name'] 108 | if self.state == ProbingState.DETECTING: 109 | if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: 110 | confidence = self.get_confidence() 111 | if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: 112 | self.logger.debug('%s confidence = %s, we have a winner', 113 | charset_name, confidence) 114 | self._state = ProbingState.FOUND_IT 115 | elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: 116 | self.logger.debug('%s confidence = %s, below negative ' 117 | 'shortcut threshhold %s', charset_name, 118 | confidence, 119 | self.NEGATIVE_SHORTCUT_THRESHOLD) 120 | self._state = ProbingState.NOT_ME 121 | 122 | return self.state 123 | 124 | def get_confidence(self): 125 | r = 0.01 126 | if self._total_seqs > 0: 127 | r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / 128 | self._total_seqs / self._model['typical_positive_ratio']) 129 | r = r * self._freq_char / self._total_char 130 | if r >= 1.0: 131 | r = 0.99 132 | return r 133 | -------------------------------------------------------------------------------- /thirdparty/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | from .langturkishmodel import Latin5TurkishModel 41 | 42 | 43 | class SBCSGroupProber(CharSetGroupProber): 44 | def __init__(self): 45 | super(SBCSGroupProber, self).__init__() 46 | self.probers = [ 47 | SingleByteCharSetProber(Win1251CyrillicModel), 48 | SingleByteCharSetProber(Koi8rModel), 49 | SingleByteCharSetProber(Latin5CyrillicModel), 50 | SingleByteCharSetProber(MacCyrillicModel), 51 | SingleByteCharSetProber(Ibm866Model), 52 | SingleByteCharSetProber(Ibm855Model), 53 | SingleByteCharSetProber(Latin7GreekModel), 54 | SingleByteCharSetProber(Win1253GreekModel), 55 | SingleByteCharSetProber(Latin5BulgarianModel), 56 | SingleByteCharSetProber(Win1251BulgarianModel), 57 | # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250) 58 | # after we retrain model. 59 | # SingleByteCharSetProber(Latin2HungarianModel), 60 | # SingleByteCharSetProber(Win1250HungarianModel), 61 | SingleByteCharSetProber(TIS620ThaiModel), 62 | SingleByteCharSetProber(Latin5TurkishModel), 63 | ] 64 | hebrew_prober = HebrewProber() 65 | logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, 66 | False, hebrew_prober) 67 | visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True, 68 | hebrew_prober) 69 | hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) 70 | self.probers.extend([hebrew_prober, logical_hebrew_prober, 71 | visual_hebrew_prober]) 72 | 73 | self.reset() 74 | -------------------------------------------------------------------------------- /thirdparty/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import SJISDistributionAnalysis 31 | from .jpcntx import SJISContextAnalysis 32 | from .mbcssm import SJIS_SM_MODEL 33 | from .enums import ProbingState, MachineState 34 | 35 | 36 | class SJISProber(MultiByteCharSetProber): 37 | def __init__(self): 38 | super(SJISProber, self).__init__() 39 | self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) 40 | self.distribution_analyzer = SJISDistributionAnalysis() 41 | self.context_analyzer = SJISContextAnalysis() 42 | self.reset() 43 | 44 | def reset(self): 45 | super(SJISProber, self).reset() 46 | self.context_analyzer.reset() 47 | 48 | @property 49 | def charset_name(self): 50 | return self.context_analyzer.charset_name 51 | 52 | @property 53 | def language(self): 54 | return "Japanese" 55 | 56 | def feed(self, byte_str): 57 | for i in range(len(byte_str)): 58 | coding_state = self.coding_sm.next_state(byte_str[i]) 59 | if coding_state == MachineState.ERROR: 60 | self.logger.debug('%s %s prober hit error at byte %s', 61 | self.charset_name, self.language, i) 62 | self._state = ProbingState.NOT_ME 63 | break 64 | elif coding_state == MachineState.ITS_ME: 65 | self._state = ProbingState.FOUND_IT 66 | break 67 | elif coding_state == MachineState.START: 68 | char_len = self.coding_sm.get_current_charlen() 69 | if i == 0: 70 | self._last_char[1] = byte_str[0] 71 | self.context_analyzer.feed(self._last_char[2 - char_len:], 72 | char_len) 73 | self.distribution_analyzer.feed(self._last_char, char_len) 74 | else: 75 | self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 76 | - char_len], char_len) 77 | self.distribution_analyzer.feed(byte_str[i - 1:i + 1], 78 | char_len) 79 | 80 | self._last_char[0] = byte_str[-1] 81 | 82 | if self.state == ProbingState.DETECTING: 83 | if (self.context_analyzer.got_enough_data() and 84 | (self.get_confidence() > self.SHORTCUT_THRESHOLD)): 85 | self._state = ProbingState.FOUND_IT 86 | 87 | return self.state 88 | 89 | def get_confidence(self): 90 | context_conf = self.context_analyzer.get_confidence() 91 | distrib_conf = self.distribution_analyzer.get_confidence() 92 | return max(context_conf, distrib_conf) 93 | -------------------------------------------------------------------------------- /thirdparty/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .charsetprober import CharSetProber 29 | from .enums import ProbingState, MachineState 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8_SM_MODEL 32 | 33 | 34 | 35 | class UTF8Prober(CharSetProber): 36 | ONE_CHAR_PROB = 0.5 37 | 38 | def __init__(self): 39 | super(UTF8Prober, self).__init__() 40 | self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) 41 | self._num_mb_chars = None 42 | self.reset() 43 | 44 | def reset(self): 45 | super(UTF8Prober, self).reset() 46 | self.coding_sm.reset() 47 | self._num_mb_chars = 0 48 | 49 | @property 50 | def charset_name(self): 51 | return "utf-8" 52 | 53 | @property 54 | def language(self): 55 | return "" 56 | 57 | def feed(self, byte_str): 58 | for c in byte_str: 59 | coding_state = self.coding_sm.next_state(c) 60 | if coding_state == MachineState.ERROR: 61 | self._state = ProbingState.NOT_ME 62 | break 63 | elif coding_state == MachineState.ITS_ME: 64 | self._state = ProbingState.FOUND_IT 65 | break 66 | elif coding_state == MachineState.START: 67 | if self.coding_sm.get_current_charlen() >= 2: 68 | self._num_mb_chars += 1 69 | 70 | if self.state == ProbingState.DETECTING: 71 | if self.get_confidence() > self.SHORTCUT_THRESHOLD: 72 | self._state = ProbingState.FOUND_IT 73 | 74 | return self.state 75 | 76 | def get_confidence(self): 77 | unlike = 0.99 78 | if self._num_mb_chars < 6: 79 | unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars 80 | return 1.0 - unlike 81 | else: 82 | return unlike 83 | -------------------------------------------------------------------------------- /thirdparty/chardet/version.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module exists only to simplify retrieving the version number of chardet 3 | from within setup.py and from chardet subpackages. 4 | 5 | :author: Dan Blanchard (dan.blanchard@gmail.com) 6 | """ 7 | 8 | __version__ = "3.0.4" 9 | VERSION = __version__.split('.') 10 | -------------------------------------------------------------------------------- /thirdparty/connection/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/__init__.py -------------------------------------------------------------------------------- /thirdparty/connection/compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | PY2 = sys.version_info[0] == 2 4 | 5 | if PY2: 6 | string_types = basestring, 7 | from urllib import quote_plus, urlencode, unquote 8 | from urlparse import urlparse 9 | from itertools import imap as map 10 | else: 11 | string_types = str, bytes 12 | from urllib.parse import quote_plus, urlencode, urlparse, unquote 13 | map = map 14 | -------------------------------------------------------------------------------- /thirdparty/connection/exceptions.py: -------------------------------------------------------------------------------- 1 | __all__ = [ 2 | 'ImproperlyConfigured', 'ElasticsearchException', 'SerializationError', 3 | 'TransportError', 'NotFoundError', 'ConflictError', 'RequestError', 'ConnectionError', 4 | 'SSLError', 'ConnectionTimeout' 5 | ] 6 | 7 | class ImproperlyConfigured(Exception): 8 | """ 9 | Exception raised when the config passed to the client is inconsistent or invalid. 10 | """ 11 | 12 | 13 | class ElasticsearchException(Exception): 14 | """ 15 | Base class for all exceptions raised by this package's operations (doesn't 16 | apply to :class:`~elasticsearch.ImproperlyConfigured`). 17 | """ 18 | 19 | 20 | class SerializationError(ElasticsearchException): 21 | """ 22 | Data passed in failed to serialize properly in the ``Serializer`` being 23 | used. 24 | """ 25 | 26 | 27 | class TransportError(ElasticsearchException): 28 | """ 29 | Exception raised when ES returns a non-OK (>=400) HTTP status code. Or when 30 | an actual connection error happens; in that case the ``status_code`` will 31 | be set to ``'N/A'``. 32 | """ 33 | @property 34 | def status_code(self): 35 | """ 36 | The HTTP status code of the response that precipitated the error or 37 | ``'N/A'`` if not applicable. 38 | """ 39 | return self.args[0] 40 | 41 | @property 42 | def error(self): 43 | """ A string error message. """ 44 | return self.args[1] 45 | 46 | @property 47 | def info(self): 48 | """ Dict of returned error info from ES, where available. """ 49 | return self.args[2] 50 | 51 | def __str__(self): 52 | cause = '' 53 | try: 54 | if self.info: 55 | cause = ', %r' % self.info['error']['root_cause'][0]['reason'] 56 | except LookupError: 57 | pass 58 | return 'TransportError(%s, %r%s)' % (self.status_code, self.error, cause) 59 | 60 | 61 | class ConnectionError(TransportError): 62 | """ 63 | Error raised when there was an exception while talking to ES. Original 64 | exception from the underlying :class:`~elasticsearch.Connection` 65 | implementation is available as ``.info.`` 66 | """ 67 | def __str__(self): 68 | return 'ConnectionError(%s) caused by: %s(%s)' % ( 69 | self.error, self.info.__class__.__name__, self.info) 70 | 71 | 72 | class SSLError(ConnectionError): 73 | """ Error raised when encountering SSL errors. """ 74 | 75 | 76 | class ConnectionTimeout(ConnectionError): 77 | """ A network timeout. Doesn't cause a node retry by default. """ 78 | def __str__(self): 79 | return 'ConnectionTimeout caused by - %s(%s)' % ( 80 | self.info.__class__.__name__, self.info) 81 | 82 | 83 | class NotFoundError(TransportError): 84 | """ Exception representing a 404 status code. """ 85 | 86 | 87 | class ConflictError(TransportError): 88 | """ Exception representing a 409 status code. """ 89 | 90 | 91 | class RequestError(TransportError): 92 | """ Exception representing a 400 status code. """ 93 | 94 | 95 | class AuthenticationException(TransportError): 96 | """ Exception representing a 401 status code. """ 97 | 98 | 99 | class AuthorizationException(TransportError): 100 | """ Exception representing a 403 status code. """ 101 | 102 | # more generic mappings from status_code to python exceptions 103 | HTTP_EXCEPTIONS = { 104 | 400: RequestError, 105 | 401: AuthenticationException, 106 | 403: AuthorizationException, 107 | 404: NotFoundError, 108 | 409: ConflictError, 109 | } 110 | -------------------------------------------------------------------------------- /thirdparty/connection/http_urllib3.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from urllib import urlencode 3 | import urllib3 4 | from thirdparty.connection.exceptions import ConnectionError 5 | urllib3.disable_warnings() 6 | 7 | class HttpUtil(): 8 | def __init__(self): 9 | self.pool = urllib3.PoolManager() 10 | self.default_headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',"Cookie":"rememberMe=xx"} 11 | def request(self, url, params=None,body=None, timeout=None,headers={},redirect=False,**kwargs): 12 | if params: 13 | url = '%s?%s' % (url, urlencode(params)) 14 | if body: 15 | method = "POST" 16 | else: 17 | method = "GET" 18 | try: 19 | kw = {} 20 | if timeout: 21 | kw['timeout'] = timeout 22 | if not isinstance(url, str): 23 | url = url.encode('utf-8') 24 | if not isinstance(method, str): 25 | method = method.encode('utf-8') 26 | if redirect: 27 | retries = 3 28 | else: 29 | retries = False 30 | headers.update(self.default_headers) 31 | response = self.pool.request(method, url, body, retries=retries,redirect=redirect, headers=headers,timeout=urllib3.Timeout(connect=timeout, read=2.0),**kwargs) 32 | response.content = response.data 33 | response.status_code = response.status 34 | response.resp_headers = response.getheaders() 35 | except Exception as e: 36 | raise ConnectionError('N/A', str(e), e) 37 | return response -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | from __future__ import absolute_import 6 | import warnings 7 | 8 | from .connectionpool import ( 9 | HTTPConnectionPool, 10 | HTTPSConnectionPool, 11 | connection_from_url 12 | ) 13 | 14 | from . import exceptions 15 | from .filepost import encode_multipart_formdata 16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 17 | from .response import HTTPResponse 18 | from .util.request import make_headers 19 | from .util.url import get_host 20 | from .util.timeout import Timeout 21 | from .util.retry import Retry 22 | 23 | 24 | # Set default logging handler to avoid "No handler found" warnings. 25 | import logging 26 | try: # Python 2.7+ 27 | from logging import NullHandler 28 | except ImportError: 29 | class NullHandler(logging.Handler): 30 | def emit(self, record): 31 | pass 32 | 33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 34 | __license__ = 'MIT' 35 | __version__ = '1.22' 36 | 37 | __all__ = ( 38 | 'HTTPConnectionPool', 39 | 'HTTPSConnectionPool', 40 | 'PoolManager', 41 | 'ProxyManager', 42 | 'HTTPResponse', 43 | 'Retry', 44 | 'Timeout', 45 | 'add_stderr_logger', 46 | 'connection_from_url', 47 | 'disable_warnings', 48 | 'encode_multipart_formdata', 49 | 'get_host', 50 | 'make_headers', 51 | 'proxy_from_url', 52 | ) 53 | 54 | logging.getLogger(__name__).addHandler(NullHandler()) 55 | 56 | 57 | def add_stderr_logger(level=logging.DEBUG): 58 | """ 59 | Helper for quickly adding a StreamHandler to the logger. Useful for 60 | debugging. 61 | 62 | Returns the handler after adding it. 63 | """ 64 | # This method needs to be in this __init__.py to get the __name__ correct 65 | # even if urllib3 is vendored within another package. 66 | logger = logging.getLogger(__name__) 67 | handler = logging.StreamHandler() 68 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 69 | logger.addHandler(handler) 70 | logger.setLevel(level) 71 | logger.debug('Added a stderr logging handler to logger: %s', __name__) 72 | return handler 73 | 74 | 75 | # ... Clean up. 76 | del NullHandler 77 | 78 | 79 | # All warning filters *must* be appended unless you're really certain that they 80 | # shouldn't be: otherwise, it's very hard for users to use most Python 81 | # mechanisms to silence them. 82 | # SecurityWarning's always go off by default. 83 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True) 84 | # SubjectAltNameWarning's should go off once per host 85 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True) 86 | # InsecurePlatformWarning's don't vary between requests, so we keep it default. 87 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning, 88 | append=True) 89 | # SNIMissingWarnings should go off only once. 90 | warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True) 91 | 92 | 93 | def disable_warnings(category=exceptions.HTTPWarning): 94 | """ 95 | Helper for quickly disabling all urllib3 warnings. 96 | """ 97 | warnings.simplefilter('ignore', category) 98 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/contrib/__init__.py -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/contrib/_securetransport/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/contrib/_securetransport/__init__.py -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | from __future__ import absolute_import 7 | 8 | from logging import getLogger 9 | from ntlm import ntlm 10 | 11 | from .. import HTTPSConnectionPool 12 | from ..packages.six.moves.http_client import HTTPSConnection 13 | 14 | 15 | log = getLogger(__name__) 16 | 17 | 18 | class NTLMConnectionPool(HTTPSConnectionPool): 19 | """ 20 | Implements an NTLM authentication version of an urllib3 connection pool 21 | """ 22 | 23 | scheme = 'https' 24 | 25 | def __init__(self, user, pw, authurl, *args, **kwargs): 26 | """ 27 | authurl is a random URL on the server that is protected by NTLM. 28 | user is the Windows user, probably in the DOMAIN\\username format. 29 | pw is the password for the user. 30 | """ 31 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 32 | self.authurl = authurl 33 | self.rawuser = user 34 | user_parts = user.split('\\', 1) 35 | self.domain = user_parts[0].upper() 36 | self.user = user_parts[1] 37 | self.pw = pw 38 | 39 | def _new_conn(self): 40 | # Performs the NTLM handshake that secures the connection. The socket 41 | # must be kept open while requests are performed. 42 | self.num_connections += 1 43 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s', 44 | self.num_connections, self.host, self.authurl) 45 | 46 | headers = {} 47 | headers['Connection'] = 'Keep-Alive' 48 | req_header = 'Authorization' 49 | resp_header = 'www-authenticate' 50 | 51 | conn = HTTPSConnection(host=self.host, port=self.port) 52 | 53 | # Send negotiation message 54 | headers[req_header] = ( 55 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) 56 | log.debug('Request headers: %s', headers) 57 | conn.request('GET', self.authurl, None, headers) 58 | res = conn.getresponse() 59 | reshdr = dict(res.getheaders()) 60 | log.debug('Response status: %s %s', res.status, res.reason) 61 | log.debug('Response headers: %s', reshdr) 62 | log.debug('Response data: %s [...]', res.read(100)) 63 | 64 | # Remove the reference to the socket, so that it can not be closed by 65 | # the response object (we want to keep the socket open) 66 | res.fp = None 67 | 68 | # Server should respond with a challenge message 69 | auth_header_values = reshdr[resp_header].split(', ') 70 | auth_header_value = None 71 | for s in auth_header_values: 72 | if s[:5] == 'NTLM ': 73 | auth_header_value = s[5:] 74 | if auth_header_value is None: 75 | raise Exception('Unexpected %s response header: %s' % 76 | (resp_header, reshdr[resp_header])) 77 | 78 | # Send authentication message 79 | ServerChallenge, NegotiateFlags = \ 80 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) 81 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, 82 | self.user, 83 | self.domain, 84 | self.pw, 85 | NegotiateFlags) 86 | headers[req_header] = 'NTLM %s' % auth_msg 87 | log.debug('Request headers: %s', headers) 88 | conn.request('GET', self.authurl, None, headers) 89 | res = conn.getresponse() 90 | log.debug('Response status: %s %s', res.status, res.reason) 91 | log.debug('Response headers: %s', dict(res.getheaders())) 92 | log.debug('Response data: %s [...]', res.read()[:100]) 93 | if res.status != 200: 94 | if res.status == 401: 95 | raise Exception('Server rejected request: wrong ' 96 | 'username or password') 97 | raise Exception('Wrong server response: %s %s' % 98 | (res.status, res.reason)) 99 | 100 | res.fp = None 101 | log.debug('Connection established') 102 | return conn 103 | 104 | def urlopen(self, method, url, body=None, headers=None, retries=3, 105 | redirect=True, assert_same_host=True): 106 | if headers is None: 107 | headers = {} 108 | headers['Connection'] = 'Keep-Alive' 109 | return super(NTLMConnectionPool, self).urlopen(method, url, body, 110 | headers, retries, 111 | redirect, 112 | assert_same_host) 113 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/contrib/socks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module contains provisional support for SOCKS proxies from within 4 | urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and 5 | SOCKS5. To enable its functionality, either install PySocks or install this 6 | module with the ``socks`` extra. 7 | 8 | The SOCKS implementation supports the full range of urllib3 features. It also 9 | supports the following SOCKS features: 10 | 11 | - SOCKS4 12 | - SOCKS4a 13 | - SOCKS5 14 | - Usernames and passwords for the SOCKS proxy 15 | 16 | Known Limitations: 17 | 18 | - Currently PySocks does not support contacting remote websites via literal 19 | IPv6 addresses. Any such connection attempt will fail. You must use a domain 20 | name. 21 | - Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any 22 | such connection attempt will fail. 23 | """ 24 | from __future__ import absolute_import 25 | 26 | try: 27 | import socks 28 | except ImportError: 29 | import warnings 30 | from ..exceptions import DependencyWarning 31 | 32 | warnings.warn(( 33 | 'SOCKS support in urllib3 requires the installation of optional ' 34 | 'dependencies: specifically, PySocks. For more information, see ' 35 | 'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies' 36 | ), 37 | DependencyWarning 38 | ) 39 | raise 40 | 41 | from socket import error as SocketError, timeout as SocketTimeout 42 | 43 | from ..connection import ( 44 | HTTPConnection, HTTPSConnection 45 | ) 46 | from ..connectionpool import ( 47 | HTTPConnectionPool, HTTPSConnectionPool 48 | ) 49 | from ..exceptions import ConnectTimeoutError, NewConnectionError 50 | from ..poolmanager import PoolManager 51 | from ..util.url import parse_url 52 | 53 | try: 54 | import ssl 55 | except ImportError: 56 | ssl = None 57 | 58 | 59 | class SOCKSConnection(HTTPConnection): 60 | """ 61 | A plain-text HTTP connection that connects via a SOCKS proxy. 62 | """ 63 | def __init__(self, *args, **kwargs): 64 | self._socks_options = kwargs.pop('_socks_options') 65 | super(SOCKSConnection, self).__init__(*args, **kwargs) 66 | 67 | def _new_conn(self): 68 | """ 69 | Establish a new connection via the SOCKS proxy. 70 | """ 71 | extra_kw = {} 72 | if self.source_address: 73 | extra_kw['source_address'] = self.source_address 74 | 75 | if self.socket_options: 76 | extra_kw['socket_options'] = self.socket_options 77 | 78 | try: 79 | conn = socks.create_connection( 80 | (self.host, self.port), 81 | proxy_type=self._socks_options['socks_version'], 82 | proxy_addr=self._socks_options['proxy_host'], 83 | proxy_port=self._socks_options['proxy_port'], 84 | proxy_username=self._socks_options['username'], 85 | proxy_password=self._socks_options['password'], 86 | proxy_rdns=self._socks_options['rdns'], 87 | timeout=self.timeout, 88 | **extra_kw 89 | ) 90 | 91 | except SocketTimeout as e: 92 | raise ConnectTimeoutError( 93 | self, "Connection to %s timed out. (connect timeout=%s)" % 94 | (self.host, self.timeout)) 95 | 96 | except socks.ProxyError as e: 97 | # This is fragile as hell, but it seems to be the only way to raise 98 | # useful errors here. 99 | if e.socket_err: 100 | error = e.socket_err 101 | if isinstance(error, SocketTimeout): 102 | raise ConnectTimeoutError( 103 | self, 104 | "Connection to %s timed out. (connect timeout=%s)" % 105 | (self.host, self.timeout) 106 | ) 107 | else: 108 | raise NewConnectionError( 109 | self, 110 | "Failed to establish a new connection: %s" % error 111 | ) 112 | else: 113 | raise NewConnectionError( 114 | self, 115 | "Failed to establish a new connection: %s" % e 116 | ) 117 | 118 | except SocketError as e: # Defensive: PySocks should catch all these. 119 | raise NewConnectionError( 120 | self, "Failed to establish a new connection: %s" % e) 121 | 122 | return conn 123 | 124 | 125 | # We don't need to duplicate the Verified/Unverified distinction from 126 | # urllib3/connection.py here because the HTTPSConnection will already have been 127 | # correctly set to either the Verified or Unverified form by that module. This 128 | # means the SOCKSHTTPSConnection will automatically be the correct type. 129 | class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection): 130 | pass 131 | 132 | 133 | class SOCKSHTTPConnectionPool(HTTPConnectionPool): 134 | ConnectionCls = SOCKSConnection 135 | 136 | 137 | class SOCKSHTTPSConnectionPool(HTTPSConnectionPool): 138 | ConnectionCls = SOCKSHTTPSConnection 139 | 140 | 141 | class SOCKSProxyManager(PoolManager): 142 | """ 143 | A version of the urllib3 ProxyManager that routes connections via the 144 | defined SOCKS proxy. 145 | """ 146 | pool_classes_by_scheme = { 147 | 'http': SOCKSHTTPConnectionPool, 148 | 'https': SOCKSHTTPSConnectionPool, 149 | } 150 | 151 | def __init__(self, proxy_url, username=None, password=None, 152 | num_pools=10, headers=None, **connection_pool_kw): 153 | parsed = parse_url(proxy_url) 154 | 155 | if parsed.scheme == 'socks5': 156 | socks_version = socks.PROXY_TYPE_SOCKS5 157 | rdns = False 158 | elif parsed.scheme == 'socks5h': 159 | socks_version = socks.PROXY_TYPE_SOCKS5 160 | rdns = True 161 | elif parsed.scheme == 'socks4': 162 | socks_version = socks.PROXY_TYPE_SOCKS4 163 | rdns = False 164 | elif parsed.scheme == 'socks4a': 165 | socks_version = socks.PROXY_TYPE_SOCKS4 166 | rdns = True 167 | else: 168 | raise ValueError( 169 | "Unable to determine SOCKS version from %s" % proxy_url 170 | ) 171 | 172 | self.proxy_url = proxy_url 173 | 174 | socks_options = { 175 | 'socks_version': socks_version, 176 | 'proxy_host': parsed.host, 177 | 'proxy_port': parsed.port, 178 | 'username': username, 179 | 'password': password, 180 | 'rdns': rdns 181 | } 182 | connection_pool_kw['_socks_options'] = socks_options 183 | 184 | super(SOCKSProxyManager, self).__init__( 185 | num_pools, headers, **connection_pool_kw 186 | ) 187 | 188 | self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme 189 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .packages.six.moves.http_client import ( 3 | IncompleteRead as httplib_IncompleteRead 4 | ) 5 | # Base Exceptions 6 | 7 | 8 | class HTTPError(Exception): 9 | "Base exception used by this module." 10 | pass 11 | 12 | 13 | class HTTPWarning(Warning): 14 | "Base warning used by this module." 15 | pass 16 | 17 | 18 | class PoolError(HTTPError): 19 | "Base exception for errors caused within a pool." 20 | def __init__(self, pool, message): 21 | self.pool = pool 22 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 23 | 24 | def __reduce__(self): 25 | # For pickling purposes. 26 | return self.__class__, (None, None) 27 | 28 | 29 | class RequestError(PoolError): 30 | "Base exception for PoolErrors that have associated URLs." 31 | def __init__(self, pool, url, message): 32 | self.url = url 33 | PoolError.__init__(self, pool, message) 34 | 35 | def __reduce__(self): 36 | # For pickling purposes. 37 | return self.__class__, (None, self.url, None) 38 | 39 | 40 | class SSLError(HTTPError): 41 | "Raised when SSL certificate fails in an HTTPS connection." 42 | pass 43 | 44 | 45 | class ProxyError(HTTPError): 46 | "Raised when the connection to a proxy fails." 47 | pass 48 | 49 | 50 | class DecodeError(HTTPError): 51 | "Raised when automatic decoding based on Content-Type fails." 52 | pass 53 | 54 | 55 | class ProtocolError(HTTPError): 56 | "Raised when something unexpected happens mid-request/response." 57 | pass 58 | 59 | 60 | #: Renamed to ProtocolError but aliased for backwards compatibility. 61 | ConnectionError = ProtocolError 62 | 63 | 64 | # Leaf Exceptions 65 | 66 | class MaxRetryError(RequestError): 67 | """Raised when the maximum number of retries is exceeded. 68 | 69 | :param pool: The connection pool 70 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 71 | :param string url: The requested Url 72 | :param exceptions.Exception reason: The underlying error 73 | 74 | """ 75 | 76 | def __init__(self, pool, url, reason=None): 77 | self.reason = reason 78 | 79 | message = "Max retries exceeded with url: %s (Caused by %r)" % ( 80 | url, reason) 81 | 82 | RequestError.__init__(self, pool, url, message) 83 | 84 | 85 | class HostChangedError(RequestError): 86 | "Raised when an existing pool gets a request for a foreign host." 87 | 88 | def __init__(self, pool, url, retries=3): 89 | message = "Tried to open a foreign host with url: %s" % url 90 | RequestError.__init__(self, pool, url, message) 91 | self.retries = retries 92 | 93 | 94 | class TimeoutStateError(HTTPError): 95 | """ Raised when passing an invalid state to a timeout """ 96 | pass 97 | 98 | 99 | class TimeoutError(HTTPError): 100 | """ Raised when a socket timeout error occurs. 101 | 102 | Catching this error will catch both :exc:`ReadTimeoutErrors 103 | ` and :exc:`ConnectTimeoutErrors `. 104 | """ 105 | pass 106 | 107 | 108 | class ReadTimeoutError(TimeoutError, RequestError): 109 | "Raised when a socket timeout occurs while receiving data from a server" 110 | pass 111 | 112 | 113 | # This timeout error does not have a URL attached and needs to inherit from the 114 | # base HTTPError 115 | class ConnectTimeoutError(TimeoutError): 116 | "Raised when a socket timeout occurs while connecting to a server" 117 | pass 118 | 119 | 120 | class NewConnectionError(ConnectTimeoutError, PoolError): 121 | "Raised when we fail to establish a new connection. Usually ECONNREFUSED." 122 | pass 123 | 124 | 125 | class EmptyPoolError(PoolError): 126 | "Raised when a pool runs out of connections and no more are allowed." 127 | pass 128 | 129 | 130 | class ClosedPoolError(PoolError): 131 | "Raised when a request enters a pool after the pool has been closed." 132 | pass 133 | 134 | 135 | class LocationValueError(ValueError, HTTPError): 136 | "Raised when there is something wrong with a given URL input." 137 | pass 138 | 139 | 140 | class LocationParseError(LocationValueError): 141 | "Raised when get_host or similar fails to parse the URL input." 142 | 143 | def __init__(self, location): 144 | message = "Failed to parse: %s" % location 145 | HTTPError.__init__(self, message) 146 | 147 | self.location = location 148 | 149 | 150 | class ResponseError(HTTPError): 151 | "Used as a container for an error reason supplied in a MaxRetryError." 152 | GENERIC_ERROR = 'too many error responses' 153 | SPECIFIC_ERROR = 'too many {status_code} error responses' 154 | 155 | 156 | class SecurityWarning(HTTPWarning): 157 | "Warned when perfoming security reducing actions" 158 | pass 159 | 160 | 161 | class SubjectAltNameWarning(SecurityWarning): 162 | "Warned when connecting to a host with a certificate missing a SAN." 163 | pass 164 | 165 | 166 | class InsecureRequestWarning(SecurityWarning): 167 | "Warned when making an unverified HTTPS request." 168 | pass 169 | 170 | 171 | class SystemTimeWarning(SecurityWarning): 172 | "Warned when system time is suspected to be wrong" 173 | pass 174 | 175 | 176 | class InsecurePlatformWarning(SecurityWarning): 177 | "Warned when certain SSL configuration is not available on a platform." 178 | pass 179 | 180 | 181 | class SNIMissingWarning(HTTPWarning): 182 | "Warned when making a HTTPS request without SNI available." 183 | pass 184 | 185 | 186 | class DependencyWarning(HTTPWarning): 187 | """ 188 | Warned when an attempt is made to import a module with missing optional 189 | dependencies. 190 | """ 191 | pass 192 | 193 | 194 | class ResponseNotChunked(ProtocolError, ValueError): 195 | "Response needs to be chunked in order to read it as chunks." 196 | pass 197 | 198 | 199 | class BodyNotHttplibCompatible(HTTPError): 200 | """ 201 | Body should be httplib.HTTPResponse like (have an fp attribute which 202 | returns raw chunks) for read_chunked(). 203 | """ 204 | pass 205 | 206 | 207 | class IncompleteRead(HTTPError, httplib_IncompleteRead): 208 | """ 209 | Response length doesn't match expected Content-Length 210 | 211 | Subclass of http_client.IncompleteRead to allow int value 212 | for `partial` to avoid creating large objects on streamed 213 | reads. 214 | """ 215 | def __init__(self, partial, expected): 216 | super(IncompleteRead, self).__init__(partial, expected) 217 | 218 | def __repr__(self): 219 | return ('IncompleteRead(%i bytes read, ' 220 | '%i more expected)' % (self.partial, self.expected)) 221 | 222 | 223 | class InvalidHeader(HTTPError): 224 | "The header provided was somehow invalid." 225 | pass 226 | 227 | 228 | class ProxySchemeUnknown(AssertionError, ValueError): 229 | "ProxyManager does not support the supplied scheme" 230 | # TODO(t-8ch): Stop inheriting from AssertionError in v2.0. 231 | 232 | def __init__(self, scheme): 233 | message = "Not supported proxy scheme %s" % scheme 234 | super(ProxySchemeUnknown, self).__init__(message) 235 | 236 | 237 | class HeaderParsingError(HTTPError): 238 | "Raised by assert_header_parsing, but we convert it to a log.warning statement." 239 | def __init__(self, defects, unparsed_data): 240 | message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data) 241 | super(HeaderParsingError, self).__init__(message) 242 | 243 | 244 | class UnrewindableBodyError(HTTPError): 245 | "urllib3 encountered an error when trying to rewind a body" 246 | pass 247 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/fields.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import email.utils 3 | import mimetypes 4 | 5 | from .packages import six 6 | 7 | 8 | def guess_content_type(filename, default='application/octet-stream'): 9 | """ 10 | Guess the "Content-Type" of a file. 11 | 12 | :param filename: 13 | The filename to guess the "Content-Type" of using :mod:`mimetypes`. 14 | :param default: 15 | If no "Content-Type" can be guessed, default to `default`. 16 | """ 17 | if filename: 18 | return mimetypes.guess_type(filename)[0] or default 19 | return default 20 | 21 | 22 | def format_header_param(name, value): 23 | """ 24 | Helper function to format and quote a single header parameter. 25 | 26 | Particularly useful for header parameters which might contain 27 | non-ASCII values, like file names. This follows RFC 2231, as 28 | suggested by RFC 2388 Section 4.4. 29 | 30 | :param name: 31 | The name of the parameter, a string expected to be ASCII only. 32 | :param value: 33 | The value of the parameter, provided as a unicode string. 34 | """ 35 | if not any(ch in value for ch in '"\\\r\n'): 36 | result = '%s="%s"' % (name, value) 37 | try: 38 | result.encode('ascii') 39 | except (UnicodeEncodeError, UnicodeDecodeError): 40 | pass 41 | else: 42 | return result 43 | if not six.PY3 and isinstance(value, six.text_type): # Python 2: 44 | value = value.encode('utf-8') 45 | value = email.utils.encode_rfc2231(value, 'utf-8') 46 | value = '%s*=%s' % (name, value) 47 | return value 48 | 49 | 50 | class RequestField(object): 51 | """ 52 | A data container for request body parameters. 53 | 54 | :param name: 55 | The name of this request field. 56 | :param data: 57 | The data/value body. 58 | :param filename: 59 | An optional filename of the request field. 60 | :param headers: 61 | An optional dict-like object of headers to initially use for the field. 62 | """ 63 | def __init__(self, name, data, filename=None, headers=None): 64 | self._name = name 65 | self._filename = filename 66 | self.data = data 67 | self.headers = {} 68 | if headers: 69 | self.headers = dict(headers) 70 | 71 | @classmethod 72 | def from_tuples(cls, fieldname, value): 73 | """ 74 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 75 | 76 | Supports constructing :class:`~urllib3.fields.RequestField` from 77 | parameter of key/value strings AND key/filetuple. A filetuple is a 78 | (filename, data, MIME type) tuple where the MIME type is optional. 79 | For example:: 80 | 81 | 'foo': 'bar', 82 | 'fakefile': ('foofile.txt', 'contents of foofile'), 83 | 'realfile': ('barfile.txt', open('realfile').read()), 84 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 85 | 'nonamefile': 'contents of nonamefile field', 86 | 87 | Field names and filenames must be unicode. 88 | """ 89 | if isinstance(value, tuple): 90 | if len(value) == 3: 91 | filename, data, content_type = value 92 | else: 93 | filename, data = value 94 | content_type = guess_content_type(filename) 95 | else: 96 | filename = None 97 | content_type = None 98 | data = value 99 | 100 | request_param = cls(fieldname, data, filename=filename) 101 | request_param.make_multipart(content_type=content_type) 102 | 103 | return request_param 104 | 105 | def _render_part(self, name, value): 106 | """ 107 | Overridable helper function to format a single header parameter. 108 | 109 | :param name: 110 | The name of the parameter, a string expected to be ASCII only. 111 | :param value: 112 | The value of the parameter, provided as a unicode string. 113 | """ 114 | return format_header_param(name, value) 115 | 116 | def _render_parts(self, header_parts): 117 | """ 118 | Helper function to format and quote a single header. 119 | 120 | Useful for single headers that are composed of multiple items. E.g., 121 | 'Content-Disposition' fields. 122 | 123 | :param header_parts: 124 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format 125 | as `k1="v1"; k2="v2"; ...`. 126 | """ 127 | parts = [] 128 | iterable = header_parts 129 | if isinstance(header_parts, dict): 130 | iterable = header_parts.items() 131 | 132 | for name, value in iterable: 133 | if value is not None: 134 | parts.append(self._render_part(name, value)) 135 | 136 | return '; '.join(parts) 137 | 138 | def render_headers(self): 139 | """ 140 | Renders the headers for this request field. 141 | """ 142 | lines = [] 143 | 144 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] 145 | for sort_key in sort_keys: 146 | if self.headers.get(sort_key, False): 147 | lines.append('%s: %s' % (sort_key, self.headers[sort_key])) 148 | 149 | for header_name, header_value in self.headers.items(): 150 | if header_name not in sort_keys: 151 | if header_value: 152 | lines.append('%s: %s' % (header_name, header_value)) 153 | 154 | lines.append('\r\n') 155 | return '\r\n'.join(lines) 156 | 157 | def make_multipart(self, content_disposition=None, content_type=None, 158 | content_location=None): 159 | """ 160 | Makes this request field into a multipart request field. 161 | 162 | This method overrides "Content-Disposition", "Content-Type" and 163 | "Content-Location" headers to the request parameter. 164 | 165 | :param content_type: 166 | The 'Content-Type' of the request body. 167 | :param content_location: 168 | The 'Content-Location' of the request body. 169 | 170 | """ 171 | self.headers['Content-Disposition'] = content_disposition or 'form-data' 172 | self.headers['Content-Disposition'] += '; '.join([ 173 | '', self._render_parts( 174 | (('name', self._name), ('filename', self._filename)) 175 | ) 176 | ]) 177 | self.headers['Content-Type'] = content_type 178 | self.headers['Content-Location'] = content_location 179 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import codecs 3 | 4 | from uuid import uuid4 5 | from io import BytesIO 6 | 7 | from .packages import six 8 | from .packages.six import b 9 | from .fields import RequestField 10 | 11 | writer = codecs.lookup('utf-8')[3] 12 | 13 | 14 | def choose_boundary(): 15 | """ 16 | Our embarrassingly-simple replacement for mimetools.choose_boundary. 17 | """ 18 | return uuid4().hex 19 | 20 | 21 | def iter_field_objects(fields): 22 | """ 23 | Iterate over fields. 24 | 25 | Supports list of (k, v) tuples and dicts, and lists of 26 | :class:`~urllib3.fields.RequestField`. 27 | 28 | """ 29 | if isinstance(fields, dict): 30 | i = six.iteritems(fields) 31 | else: 32 | i = iter(fields) 33 | 34 | for field in i: 35 | if isinstance(field, RequestField): 36 | yield field 37 | else: 38 | yield RequestField.from_tuples(*field) 39 | 40 | 41 | def iter_fields(fields): 42 | """ 43 | .. deprecated:: 1.6 44 | 45 | Iterate over fields. 46 | 47 | The addition of :class:`~urllib3.fields.RequestField` makes this function 48 | obsolete. Instead, use :func:`iter_field_objects`, which returns 49 | :class:`~urllib3.fields.RequestField` objects. 50 | 51 | Supports list of (k, v) tuples and dicts. 52 | """ 53 | if isinstance(fields, dict): 54 | return ((k, v) for k, v in six.iteritems(fields)) 55 | 56 | return ((k, v) for k, v in fields) 57 | 58 | 59 | def encode_multipart_formdata(fields, boundary=None): 60 | """ 61 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 62 | 63 | :param fields: 64 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 65 | 66 | :param boundary: 67 | If not specified, then a random boundary will be generated using 68 | :func:`mimetools.choose_boundary`. 69 | """ 70 | body = BytesIO() 71 | if boundary is None: 72 | boundary = choose_boundary() 73 | 74 | for field in iter_field_objects(fields): 75 | body.write(b('--%s\r\n' % (boundary))) 76 | 77 | writer(body).write(field.render_headers()) 78 | data = field.data 79 | 80 | if isinstance(data, int): 81 | data = str(data) # Backwards compatibility 82 | 83 | if isinstance(data, six.text_type): 84 | writer(body).write(data) 85 | else: 86 | body.write(data) 87 | 88 | body.write(b'\r\n') 89 | 90 | body.write(b('--%s--\r\n' % (boundary))) 91 | 92 | content_type = str('multipart/form-data; boundary=%s' % boundary) 93 | 94 | return body.getvalue(), content_type 95 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | __all__ = ('ssl_match_hostname', ) 6 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/packages/backports/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/packages/backports/__init__.py -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/packages/backports/makefile.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | backports.makefile 4 | ~~~~~~~~~~~~~~~~~~ 5 | 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that 7 | wants to create a "fake" socket object. 8 | """ 9 | import io 10 | 11 | from socket import SocketIO 12 | 13 | 14 | def backport_makefile(self, mode="r", buffering=None, encoding=None, 15 | errors=None, newline=None): 16 | """ 17 | Backport of ``socket.makefile`` from Python 3.5. 18 | """ 19 | if not set(mode) <= set(["r", "w", "b"]): 20 | raise ValueError( 21 | "invalid mode %r (only r, w, b allowed)" % (mode,) 22 | ) 23 | writing = "w" in mode 24 | reading = "r" in mode or not writing 25 | assert reading or writing 26 | binary = "b" in mode 27 | rawmode = "" 28 | if reading: 29 | rawmode += "r" 30 | if writing: 31 | rawmode += "w" 32 | raw = SocketIO(self, rawmode) 33 | self._makefile_refs += 1 34 | if buffering is None: 35 | buffering = -1 36 | if buffering < 0: 37 | buffering = io.DEFAULT_BUFFER_SIZE 38 | if buffering == 0: 39 | if not binary: 40 | raise ValueError("unbuffered streams must be binary") 41 | return raw 42 | if reading and writing: 43 | buffer = io.BufferedRWPair(raw, raw, buffering) 44 | elif reading: 45 | buffer = io.BufferedReader(raw, buffering) 46 | else: 47 | assert writing 48 | buffer = io.BufferedWriter(raw, buffering) 49 | if binary: 50 | return buffer 51 | text = io.TextIOWrapper(buffer, encoding, errors, newline) 52 | text.mode = mode 53 | return text 54 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | try: 4 | # Our match_hostname function is the same as 3.5's, so we only want to 5 | # import the match_hostname function if it's at least that good. 6 | if sys.version_info < (3, 5): 7 | raise ImportError("Fallback to vendored code") 8 | 9 | from ssl import CertificateError, match_hostname 10 | except ImportError: 11 | try: 12 | # Backport of the function from a pypi module 13 | from backports.ssl_match_hostname import CertificateError, match_hostname 14 | except ImportError: 15 | # Our vendored copy 16 | from ._implementation import CertificateError, match_hostname 17 | 18 | # Not needed, but documenting what we provide. 19 | __all__ = ('CertificateError', 'match_hostname') 20 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | import sys 8 | 9 | # ipaddress has been backported to 2.6+ in pypi. If it is installed on the 10 | # system, use it to handle IPAddress ServerAltnames (this was added in 11 | # python-3.5) otherwise only do DNS matching. This allows 12 | # backports.ssl_match_hostname to continue to be used all the way back to 13 | # python-2.4. 14 | try: 15 | import ipaddress 16 | except ImportError: 17 | ipaddress = None 18 | 19 | __version__ = '3.5.0.1' 20 | 21 | 22 | class CertificateError(ValueError): 23 | pass 24 | 25 | 26 | def _dnsname_match(dn, hostname, max_wildcards=1): 27 | """Matching according to RFC 6125, section 6.4.3 28 | 29 | http://tools.ietf.org/html/rfc6125#section-6.4.3 30 | """ 31 | pats = [] 32 | if not dn: 33 | return False 34 | 35 | # Ported from python3-syntax: 36 | # leftmost, *remainder = dn.split(r'.') 37 | parts = dn.split(r'.') 38 | leftmost = parts[0] 39 | remainder = parts[1:] 40 | 41 | wildcards = leftmost.count('*') 42 | if wildcards > max_wildcards: 43 | # Issue #17980: avoid denials of service by refusing more 44 | # than one wildcard per fragment. A survey of established 45 | # policy among SSL implementations showed it to be a 46 | # reasonable choice. 47 | raise CertificateError( 48 | "too many wildcards in certificate DNS name: " + repr(dn)) 49 | 50 | # speed up common case w/o wildcards 51 | if not wildcards: 52 | return dn.lower() == hostname.lower() 53 | 54 | # RFC 6125, section 6.4.3, subitem 1. 55 | # The client SHOULD NOT attempt to match a presented identifier in which 56 | # the wildcard character comprises a label other than the left-most label. 57 | if leftmost == '*': 58 | # When '*' is a fragment by itself, it matches a non-empty dotless 59 | # fragment. 60 | pats.append('[^.]+') 61 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 62 | # RFC 6125, section 6.4.3, subitem 3. 63 | # The client SHOULD NOT attempt to match a presented identifier 64 | # where the wildcard character is embedded within an A-label or 65 | # U-label of an internationalized domain name. 66 | pats.append(re.escape(leftmost)) 67 | else: 68 | # Otherwise, '*' matches any dotless string, e.g. www* 69 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 70 | 71 | # add the remaining fragments, ignore any wildcards 72 | for frag in remainder: 73 | pats.append(re.escape(frag)) 74 | 75 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 76 | return pat.match(hostname) 77 | 78 | 79 | def _to_unicode(obj): 80 | if isinstance(obj, str) and sys.version_info < (3,): 81 | obj = unicode(obj, encoding='ascii', errors='strict') 82 | return obj 83 | 84 | def _ipaddress_match(ipname, host_ip): 85 | """Exact matching of IP addresses. 86 | 87 | RFC 6125 explicitly doesn't define an algorithm for this 88 | (section 1.7.2 - "Out of Scope"). 89 | """ 90 | # OpenSSL may add a trailing newline to a subjectAltName's IP address 91 | # Divergence from upstream: ipaddress can't handle byte str 92 | ip = ipaddress.ip_address(_to_unicode(ipname).rstrip()) 93 | return ip == host_ip 94 | 95 | 96 | def match_hostname(cert, hostname): 97 | """Verify that *cert* (in decoded format as returned by 98 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 99 | rules are followed, but IP addresses are not accepted for *hostname*. 100 | 101 | CertificateError is raised on failure. On success, the function 102 | returns nothing. 103 | """ 104 | if not cert: 105 | raise ValueError("empty or no certificate, match_hostname needs a " 106 | "SSL socket or SSL context with either " 107 | "CERT_OPTIONAL or CERT_REQUIRED") 108 | try: 109 | # Divergence from upstream: ipaddress can't handle byte str 110 | host_ip = ipaddress.ip_address(_to_unicode(hostname)) 111 | except ValueError: 112 | # Not an IP address (common case) 113 | host_ip = None 114 | except UnicodeError: 115 | # Divergence from upstream: Have to deal with ipaddress not taking 116 | # byte strings. addresses should be all ascii, so we consider it not 117 | # an ipaddress in this case 118 | host_ip = None 119 | except AttributeError: 120 | # Divergence from upstream: Make ipaddress library optional 121 | if ipaddress is None: 122 | host_ip = None 123 | else: 124 | raise 125 | dnsnames = [] 126 | san = cert.get('subjectAltName', ()) 127 | for key, value in san: 128 | if key == 'DNS': 129 | if host_ip is None and _dnsname_match(value, hostname): 130 | return 131 | dnsnames.append(value) 132 | elif key == 'IP Address': 133 | if host_ip is not None and _ipaddress_match(value, host_ip): 134 | return 135 | dnsnames.append(value) 136 | if not dnsnames: 137 | # The subject is only checked when there is no dNSName entry 138 | # in subjectAltName 139 | for sub in cert.get('subject', ()): 140 | for key, value in sub: 141 | # XXX according to RFC 2818, the most specific Common Name 142 | # must be used. 143 | if key == 'commonName': 144 | if _dnsname_match(value, hostname): 145 | return 146 | dnsnames.append(value) 147 | if len(dnsnames) > 1: 148 | raise CertificateError("hostname %r " 149 | "doesn't match either of %s" 150 | % (hostname, ', '.join(map(repr, dnsnames)))) 151 | elif len(dnsnames) == 1: 152 | raise CertificateError("hostname %r " 153 | "doesn't match %r" 154 | % (hostname, dnsnames[0])) 155 | else: 156 | raise CertificateError("no appropriate commonName or " 157 | "subjectAltName fields were found") 158 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/request.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .filepost import encode_multipart_formdata 4 | from .packages.six.moves.urllib.parse import urlencode 5 | 6 | 7 | __all__ = ['RequestMethods'] 8 | 9 | 10 | class RequestMethods(object): 11 | """ 12 | Convenience mixin for classes who implement a :meth:`urlopen` method, such 13 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and 14 | :class:`~urllib3.poolmanager.PoolManager`. 15 | 16 | Provides behavior for making common types of HTTP request methods and 17 | decides which type of request field encoding to use. 18 | 19 | Specifically, 20 | 21 | :meth:`.request_encode_url` is for sending requests whose fields are 22 | encoded in the URL (such as GET, HEAD, DELETE). 23 | 24 | :meth:`.request_encode_body` is for sending requests whose fields are 25 | encoded in the *body* of the request using multipart or www-form-urlencoded 26 | (such as for POST, PUT, PATCH). 27 | 28 | :meth:`.request` is for making any kind of request, it will look up the 29 | appropriate encoding format and use one of the above two methods to make 30 | the request. 31 | 32 | Initializer parameters: 33 | 34 | :param headers: 35 | Headers to include with all requests, unless other headers are given 36 | explicitly. 37 | """ 38 | 39 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) 40 | 41 | def __init__(self, headers=None): 42 | self.headers = headers or {} 43 | 44 | def urlopen(self, method, url, body=None, headers=None, 45 | encode_multipart=True, multipart_boundary=None, 46 | **kw): # Abstract 47 | raise NotImplemented("Classes extending RequestMethods must implement " 48 | "their own ``urlopen`` method.") 49 | 50 | def request(self, method, url, fields=None, headers=None, **urlopen_kw): 51 | """ 52 | Make a request using :meth:`urlopen` with the appropriate encoding of 53 | ``fields`` based on the ``method`` used. 54 | 55 | This is a convenience method that requires the least amount of manual 56 | effort. It can be used in most situations, while still having the 57 | option to drop down to more specific methods when necessary, such as 58 | :meth:`request_encode_url`, :meth:`request_encode_body`, 59 | or even the lowest level :meth:`urlopen`. 60 | """ 61 | method = method.upper() 62 | 63 | if method in self._encode_url_methods: 64 | return self.request_encode_url(method, url, fields=fields, 65 | headers=headers, 66 | **urlopen_kw) 67 | else: 68 | return self.request_encode_body(method, url, fields=fields, 69 | headers=headers, 70 | **urlopen_kw) 71 | 72 | def request_encode_url(self, method, url, fields=None, headers=None, 73 | **urlopen_kw): 74 | """ 75 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 76 | the url. This is useful for request methods like GET, HEAD, DELETE, etc. 77 | """ 78 | if headers is None: 79 | headers = self.headers 80 | 81 | extra_kw = {'headers': headers} 82 | extra_kw.update(urlopen_kw) 83 | 84 | if fields: 85 | url += '?' + urlencode(fields) 86 | 87 | return self.urlopen(method, url, **extra_kw) 88 | 89 | def request_encode_body(self, method, url, fields=None, headers=None, 90 | encode_multipart=True, multipart_boundary=None, 91 | **urlopen_kw): 92 | """ 93 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 94 | the body. This is useful for request methods like POST, PUT, PATCH, etc. 95 | 96 | When ``encode_multipart=True`` (default), then 97 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode 98 | the payload with the appropriate content type. Otherwise 99 | :meth:`urllib.urlencode` is used with the 100 | 'application/x-www-form-urlencoded' content type. 101 | 102 | Multipart encoding must be used when posting files, and it's reasonably 103 | safe to use it in other times too. However, it may break request 104 | signing, such as with OAuth. 105 | 106 | Supports an optional ``fields`` parameter of key/value strings AND 107 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where 108 | the MIME type is optional. For example:: 109 | 110 | fields = { 111 | 'foo': 'bar', 112 | 'fakefile': ('foofile.txt', 'contents of foofile'), 113 | 'realfile': ('barfile.txt', open('realfile').read()), 114 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 115 | 'image/jpeg'), 116 | 'nonamefile': 'contents of nonamefile field', 117 | } 118 | 119 | When uploading a file, providing a filename (the first parameter of the 120 | tuple) is optional but recommended to best mimick behavior of browsers. 121 | 122 | Note that if ``headers`` are supplied, the 'Content-Type' header will 123 | be overwritten because it depends on the dynamic random boundary string 124 | which is used to compose the body of the request. The random boundary 125 | string can be explicitly set with the ``multipart_boundary`` parameter. 126 | """ 127 | if headers is None: 128 | headers = self.headers 129 | 130 | extra_kw = {'headers': {}} 131 | 132 | if fields: 133 | if 'body' in urlopen_kw: 134 | raise TypeError( 135 | "request got values for both 'fields' and 'body', can only specify one.") 136 | 137 | if encode_multipart: 138 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) 139 | else: 140 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' 141 | 142 | extra_kw['body'] = body 143 | extra_kw['headers'] = {'Content-Type': content_type} 144 | 145 | extra_kw['headers'].update(headers) 146 | extra_kw.update(urlopen_kw) 147 | 148 | return self.urlopen(method, url, **extra_kw) 149 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # For backwards compatibility, provide imports that used to be here. 3 | from .connection import is_connection_dropped 4 | from .request import make_headers 5 | from .response import is_fp_closed 6 | from .ssl_ import ( 7 | SSLContext, 8 | HAS_SNI, 9 | IS_PYOPENSSL, 10 | IS_SECURETRANSPORT, 11 | assert_fingerprint, 12 | resolve_cert_reqs, 13 | resolve_ssl_version, 14 | ssl_wrap_socket, 15 | ) 16 | from .timeout import ( 17 | current_time, 18 | Timeout, 19 | ) 20 | 21 | from .retry import Retry 22 | from .url import ( 23 | get_host, 24 | parse_url, 25 | split_first, 26 | Url, 27 | ) 28 | from .wait import ( 29 | wait_for_read, 30 | wait_for_write 31 | ) 32 | 33 | __all__ = ( 34 | 'HAS_SNI', 35 | 'IS_PYOPENSSL', 36 | 'IS_SECURETRANSPORT', 37 | 'SSLContext', 38 | 'Retry', 39 | 'Timeout', 40 | 'Url', 41 | 'assert_fingerprint', 42 | 'current_time', 43 | 'is_connection_dropped', 44 | 'is_fp_closed', 45 | 'get_host', 46 | 'parse_url', 47 | 'make_headers', 48 | 'resolve_cert_reqs', 49 | 'resolve_ssl_version', 50 | 'split_first', 51 | 'ssl_wrap_socket', 52 | 'wait_for_read', 53 | 'wait_for_write' 54 | ) 55 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import socket 3 | from .wait import wait_for_read 4 | from .selectors import HAS_SELECT, SelectorError 5 | 6 | 7 | def is_connection_dropped(conn): # Platform-specific 8 | """ 9 | Returns True if the connection is dropped and should be closed. 10 | 11 | :param conn: 12 | :class:`httplib.HTTPConnection` object. 13 | 14 | Note: For platforms like AppEngine, this will always return ``False`` to 15 | let the platform handle connection recycling transparently for us. 16 | """ 17 | sock = getattr(conn, 'sock', False) 18 | if sock is False: # Platform-specific: AppEngine 19 | return False 20 | if sock is None: # Connection already closed (such as by httplib). 21 | return True 22 | 23 | if not HAS_SELECT: 24 | return False 25 | 26 | try: 27 | return bool(wait_for_read(sock, timeout=0.0)) 28 | except SelectorError: 29 | return True 30 | 31 | 32 | # This function is copied from socket.py in the Python 2.7 standard 33 | # library test suite. Added to its signature is only `socket_options`. 34 | # One additional modification is that we avoid binding to IPv6 servers 35 | # discovered in DNS if the system doesn't have IPv6 functionality. 36 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 37 | source_address=None, socket_options=None): 38 | """Connect to *address* and return the socket object. 39 | 40 | Convenience function. Connect to *address* (a 2-tuple ``(host, 41 | port)``) and return the socket object. Passing the optional 42 | *timeout* parameter will set the timeout on the socket instance 43 | before attempting to connect. If no *timeout* is supplied, the 44 | global default timeout setting returned by :func:`getdefaulttimeout` 45 | is used. If *source_address* is set it must be a tuple of (host, port) 46 | for the socket to bind as a source address before making the connection. 47 | An host of '' or port 0 tells the OS to use the default. 48 | """ 49 | 50 | host, port = address 51 | if host.startswith('['): 52 | host = host.strip('[]') 53 | err = None 54 | 55 | # Using the value from allowed_gai_family() in the context of getaddrinfo lets 56 | # us select whether to work with IPv4 DNS records, IPv6 records, or both. 57 | # The original create_connection function always returns all records. 58 | family = allowed_gai_family() 59 | 60 | for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM): 61 | af, socktype, proto, canonname, sa = res 62 | sock = None 63 | try: 64 | sock = socket.socket(af, socktype, proto) 65 | 66 | # If provided, set socket level options before connecting. 67 | _set_socket_options(sock, socket_options) 68 | 69 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 70 | sock.settimeout(timeout) 71 | if source_address: 72 | sock.bind(source_address) 73 | sock.connect(sa) 74 | return sock 75 | 76 | except socket.error as e: 77 | err = e 78 | if sock is not None: 79 | sock.close() 80 | sock = None 81 | 82 | if err is not None: 83 | raise err 84 | 85 | raise socket.error("getaddrinfo returns an empty list") 86 | 87 | 88 | def _set_socket_options(sock, options): 89 | if options is None: 90 | return 91 | 92 | for opt in options: 93 | sock.setsockopt(*opt) 94 | 95 | 96 | def allowed_gai_family(): 97 | """This function is designed to work in the context of 98 | getaddrinfo, where family=socket.AF_UNSPEC is the default and 99 | will perform a DNS search for both IPv6 and IPv4 records.""" 100 | 101 | family = socket.AF_INET 102 | if HAS_IPV6: 103 | family = socket.AF_UNSPEC 104 | return family 105 | 106 | 107 | def _has_ipv6(host): 108 | """ Returns True if the system can bind an IPv6 address. """ 109 | sock = None 110 | has_ipv6 = False 111 | 112 | if socket.has_ipv6: 113 | # has_ipv6 returns true if cPython was compiled with IPv6 support. 114 | # It does not tell us if the system has IPv6 support enabled. To 115 | # determine that we must bind to an IPv6 address. 116 | # https://github.com/shazow/urllib3/pull/611 117 | # https://bugs.python.org/issue658327 118 | try: 119 | sock = socket.socket(socket.AF_INET6) 120 | sock.bind((host, 0)) 121 | has_ipv6 = True 122 | except Exception: 123 | pass 124 | 125 | if sock: 126 | sock.close() 127 | return has_ipv6 128 | 129 | 130 | HAS_IPV6 = _has_ipv6('::1') 131 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from base64 import b64encode 3 | 4 | from ..packages.six import b, integer_types 5 | from ..exceptions import UnrewindableBodyError 6 | 7 | ACCEPT_ENCODING = 'gzip,deflate' 8 | _FAILEDTELL = object() 9 | 10 | 11 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 12 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 13 | """ 14 | Shortcuts for generating request headers. 15 | 16 | :param keep_alive: 17 | If ``True``, adds 'connection: keep-alive' header. 18 | 19 | :param accept_encoding: 20 | Can be a boolean, list, or string. 21 | ``True`` translates to 'gzip,deflate'. 22 | List will get joined by comma. 23 | String will be used as provided. 24 | 25 | :param user_agent: 26 | String representing the user-agent you want, such as 27 | "python-urllib3/0.6" 28 | 29 | :param basic_auth: 30 | Colon-separated username:password string for 'authorization: basic ...' 31 | auth header. 32 | 33 | :param proxy_basic_auth: 34 | Colon-separated username:password string for 'proxy-authorization: basic ...' 35 | auth header. 36 | 37 | :param disable_cache: 38 | If ``True``, adds 'cache-control: no-cache' header. 39 | 40 | Example:: 41 | 42 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 43 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 44 | >>> make_headers(accept_encoding=True) 45 | {'accept-encoding': 'gzip,deflate'} 46 | """ 47 | headers = {} 48 | if accept_encoding: 49 | if isinstance(accept_encoding, str): 50 | pass 51 | elif isinstance(accept_encoding, list): 52 | accept_encoding = ','.join(accept_encoding) 53 | else: 54 | accept_encoding = ACCEPT_ENCODING 55 | headers['accept-encoding'] = accept_encoding 56 | 57 | if user_agent: 58 | headers['user-agent'] = user_agent 59 | 60 | if keep_alive: 61 | headers['connection'] = 'keep-alive' 62 | 63 | if basic_auth: 64 | headers['authorization'] = 'Basic ' + \ 65 | b64encode(b(basic_auth)).decode('utf-8') 66 | 67 | if proxy_basic_auth: 68 | headers['proxy-authorization'] = 'Basic ' + \ 69 | b64encode(b(proxy_basic_auth)).decode('utf-8') 70 | 71 | if disable_cache: 72 | headers['cache-control'] = 'no-cache' 73 | 74 | return headers 75 | 76 | 77 | def set_file_position(body, pos): 78 | """ 79 | If a position is provided, move file to that point. 80 | Otherwise, we'll attempt to record a position for future use. 81 | """ 82 | if pos is not None: 83 | rewind_body(body, pos) 84 | elif getattr(body, 'tell', None) is not None: 85 | try: 86 | pos = body.tell() 87 | except (IOError, OSError): 88 | # This differentiates from None, allowing us to catch 89 | # a failed `tell()` later when trying to rewind the body. 90 | pos = _FAILEDTELL 91 | 92 | return pos 93 | 94 | 95 | def rewind_body(body, body_pos): 96 | """ 97 | Attempt to rewind body to a certain position. 98 | Primarily used for request redirects and retries. 99 | 100 | :param body: 101 | File-like object that supports seek. 102 | 103 | :param int pos: 104 | Position to seek to in file. 105 | """ 106 | body_seek = getattr(body, 'seek', None) 107 | if body_seek is not None and isinstance(body_pos, integer_types): 108 | try: 109 | body_seek(body_pos) 110 | except (IOError, OSError): 111 | raise UnrewindableBodyError("An error occurred when rewinding request " 112 | "body for redirect/retry.") 113 | elif body_pos is _FAILEDTELL: 114 | raise UnrewindableBodyError("Unable to record file position for rewinding " 115 | "request body during a redirect/retry.") 116 | else: 117 | raise ValueError("body_pos must be of type integer, " 118 | "instead it was %s." % type(body_pos)) 119 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from ..packages.six.moves import http_client as httplib 3 | 4 | from ..exceptions import HeaderParsingError 5 | 6 | 7 | def is_fp_closed(obj): 8 | """ 9 | Checks whether a given file-like object is closed. 10 | 11 | :param obj: 12 | The file-like object to check. 13 | """ 14 | 15 | try: 16 | # Check `isclosed()` first, in case Python3 doesn't set `closed`. 17 | # GH Issue #928 18 | return obj.isclosed() 19 | except AttributeError: 20 | pass 21 | 22 | try: 23 | # Check via the official file-like-object way. 24 | return obj.closed 25 | except AttributeError: 26 | pass 27 | 28 | try: 29 | # Check if the object is a container for another file-like object that 30 | # gets released on exhaustion (e.g. HTTPResponse). 31 | return obj.fp is None 32 | except AttributeError: 33 | pass 34 | 35 | raise ValueError("Unable to determine whether fp is closed.") 36 | 37 | 38 | def assert_header_parsing(headers): 39 | """ 40 | Asserts whether all headers have been successfully parsed. 41 | Extracts encountered errors from the result of parsing headers. 42 | 43 | Only works on Python 3. 44 | 45 | :param headers: Headers to verify. 46 | :type headers: `httplib.HTTPMessage`. 47 | 48 | :raises urllib3.exceptions.HeaderParsingError: 49 | If parsing errors are found. 50 | """ 51 | 52 | # This will fail silently if we pass in the wrong kind of parameter. 53 | # To make debugging easier add an explicit check. 54 | if not isinstance(headers, httplib.HTTPMessage): 55 | raise TypeError('expected httplib.Message, got {0}.'.format( 56 | type(headers))) 57 | 58 | defects = getattr(headers, 'defects', None) 59 | get_payload = getattr(headers, 'get_payload', None) 60 | 61 | unparsed_data = None 62 | if get_payload: # Platform-specific: Python 3. 63 | unparsed_data = get_payload() 64 | 65 | if defects or unparsed_data: 66 | raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data) 67 | 68 | 69 | def is_response_to_head(response): 70 | """ 71 | Checks whether the request of a response has been a HEAD-request. 72 | Handles the quirks of AppEngine. 73 | 74 | :param conn: 75 | :type conn: :class:`httplib.HTTPResponse` 76 | """ 77 | # FIXME: Can we do this somehow without accessing private httplib _method? 78 | method = response._method 79 | if isinstance(method, int): # Platform-specific: Appengine 80 | return method == 3 81 | return method.upper() == 'HEAD' 82 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/url.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from collections import namedtuple 3 | 4 | from ..exceptions import LocationParseError 5 | 6 | 7 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] 8 | 9 | # We only want to normalize urls with an HTTP(S) scheme. 10 | # urllib3 infers URLs without a scheme (None) to be http. 11 | NORMALIZABLE_SCHEMES = ('http', 'https', None) 12 | 13 | 14 | class Url(namedtuple('Url', url_attrs)): 15 | """ 16 | Datastructure for representing an HTTP URL. Used as a return value for 17 | :func:`parse_url`. Both the scheme and host are normalized as they are 18 | both case-insensitive according to RFC 3986. 19 | """ 20 | __slots__ = () 21 | 22 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, 23 | query=None, fragment=None): 24 | if path and not path.startswith('/'): 25 | path = '/' + path 26 | if scheme: 27 | scheme = scheme.lower() 28 | if host and scheme in NORMALIZABLE_SCHEMES: 29 | host = host.lower() 30 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path, 31 | query, fragment) 32 | 33 | @property 34 | def hostname(self): 35 | """For backwards-compatibility with urlparse. We're nice like that.""" 36 | return self.host 37 | 38 | @property 39 | def request_uri(self): 40 | """Absolute path including the query string.""" 41 | uri = self.path or '/' 42 | 43 | if self.query is not None: 44 | uri += '?' + self.query 45 | 46 | return uri 47 | 48 | @property 49 | def netloc(self): 50 | """Network location including host and port""" 51 | if self.port: 52 | return '%s:%d' % (self.host, self.port) 53 | return self.host 54 | 55 | @property 56 | def url(self): 57 | """ 58 | Convert self into a url 59 | 60 | This function should more or less round-trip with :func:`.parse_url`. The 61 | returned url may not be exactly the same as the url inputted to 62 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls 63 | with a blank port will have : removed). 64 | 65 | Example: :: 66 | 67 | >>> U = parse_url('http://google.com/mail/') 68 | >>> U.url 69 | 'http://google.com/mail/' 70 | >>> Url('http', 'username:password', 'host.com', 80, 71 | ... '/path', 'query', 'fragment').url 72 | 'http://username:password@host.com:80/path?query#fragment' 73 | """ 74 | scheme, auth, host, port, path, query, fragment = self 75 | url = '' 76 | 77 | # We use "is not None" we want things to happen with empty strings (or 0 port) 78 | if scheme is not None: 79 | url += scheme + '://' 80 | if auth is not None: 81 | url += auth + '@' 82 | if host is not None: 83 | url += host 84 | if port is not None: 85 | url += ':' + str(port) 86 | if path is not None: 87 | url += path 88 | if query is not None: 89 | url += '?' + query 90 | if fragment is not None: 91 | url += '#' + fragment 92 | 93 | return url 94 | 95 | def __str__(self): 96 | return self.url 97 | 98 | 99 | def split_first(s, delims): 100 | """ 101 | Given a string and an iterable of delimiters, split on the first found 102 | delimiter. Return two split parts and the matched delimiter. 103 | 104 | If not found, then the first part is the full input string. 105 | 106 | Example:: 107 | 108 | >>> split_first('foo/bar?baz', '?/=') 109 | ('foo', 'bar?baz', '/') 110 | >>> split_first('foo/bar?baz', '123') 111 | ('foo/bar?baz', '', None) 112 | 113 | Scales linearly with number of delims. Not ideal for large number of delims. 114 | """ 115 | min_idx = None 116 | min_delim = None 117 | for d in delims: 118 | idx = s.find(d) 119 | if idx < 0: 120 | continue 121 | 122 | if min_idx is None or idx < min_idx: 123 | min_idx = idx 124 | min_delim = d 125 | 126 | if min_idx is None or min_idx < 0: 127 | return s, '', None 128 | 129 | return s[:min_idx], s[min_idx + 1:], min_delim 130 | 131 | 132 | def parse_url(url): 133 | """ 134 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is 135 | performed to parse incomplete urls. Fields not provided will be None. 136 | 137 | Partly backwards-compatible with :mod:`urlparse`. 138 | 139 | Example:: 140 | 141 | >>> parse_url('http://google.com/mail/') 142 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...) 143 | >>> parse_url('google.com:80') 144 | Url(scheme=None, host='google.com', port=80, path=None, ...) 145 | >>> parse_url('/foo?bar') 146 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) 147 | """ 148 | 149 | # While this code has overlap with stdlib's urlparse, it is much 150 | # simplified for our needs and less annoying. 151 | # Additionally, this implementations does silly things to be optimal 152 | # on CPython. 153 | 154 | if not url: 155 | # Empty 156 | return Url() 157 | 158 | scheme = None 159 | auth = None 160 | host = None 161 | port = None 162 | path = None 163 | fragment = None 164 | query = None 165 | 166 | # Scheme 167 | if '://' in url: 168 | scheme, url = url.split('://', 1) 169 | 170 | # Find the earliest Authority Terminator 171 | # (http://tools.ietf.org/html/rfc3986#section-3.2) 172 | url, path_, delim = split_first(url, ['/', '?', '#']) 173 | 174 | if delim: 175 | # Reassemble the path 176 | path = delim + path_ 177 | 178 | # Auth 179 | if '@' in url: 180 | # Last '@' denotes end of auth part 181 | auth, url = url.rsplit('@', 1) 182 | 183 | # IPv6 184 | if url and url[0] == '[': 185 | host, url = url.split(']', 1) 186 | host += ']' 187 | 188 | # Port 189 | if ':' in url: 190 | _host, port = url.split(':', 1) 191 | 192 | if not host: 193 | host = _host 194 | 195 | if port: 196 | # If given, ports must be integers. No whitespace, no plus or 197 | # minus prefixes, no non-integer digits such as ^2 (superscript). 198 | if not port.isdigit(): 199 | raise LocationParseError(url) 200 | try: 201 | port = int(port) 202 | except ValueError: 203 | raise LocationParseError(url) 204 | else: 205 | # Blank ports are cool, too. (rfc3986#section-3.2.3) 206 | port = None 207 | 208 | elif not host and url: 209 | host = url 210 | 211 | if not path: 212 | return Url(scheme, auth, host, port, path, query, fragment) 213 | 214 | # Fragment 215 | if '#' in path: 216 | path, fragment = path.split('#', 1) 217 | 218 | # Query 219 | if '?' in path: 220 | path, query = path.split('?', 1) 221 | 222 | return Url(scheme, auth, host, port, path, query, fragment) 223 | 224 | 225 | def get_host(url): 226 | """ 227 | Deprecated. Use :func:`parse_url` instead. 228 | """ 229 | p = parse_url(url) 230 | return p.scheme or 'http', p.hostname, p.port 231 | -------------------------------------------------------------------------------- /thirdparty/connection/urllib3/util/wait.py: -------------------------------------------------------------------------------- 1 | from .selectors import ( 2 | HAS_SELECT, 3 | DefaultSelector, 4 | EVENT_READ, 5 | EVENT_WRITE 6 | ) 7 | 8 | 9 | def _wait_for_io_events(socks, events, timeout=None): 10 | """ Waits for IO events to be available from a list of sockets 11 | or optionally a single socket if passed in. Returns a list of 12 | sockets that can be interacted with immediately. """ 13 | if not HAS_SELECT: 14 | raise ValueError('Platform does not have a selector') 15 | if not isinstance(socks, list): 16 | # Probably just a single socket. 17 | if hasattr(socks, "fileno"): 18 | socks = [socks] 19 | # Otherwise it might be a non-list iterable. 20 | else: 21 | socks = list(socks) 22 | with DefaultSelector() as selector: 23 | for sock in socks: 24 | selector.register(sock, events) 25 | return [key[0].fileobj for key in 26 | selector.select(timeout) if key[1] & events] 27 | 28 | 29 | def wait_for_read(socks, timeout=None): 30 | """ Waits for reading to be available from a list of sockets 31 | or optionally a single socket if passed in. Returns a list of 32 | sockets that can be read from immediately. """ 33 | return _wait_for_io_events(socks, EVENT_READ, timeout) 34 | 35 | 36 | def wait_for_write(socks, timeout=None): 37 | """ Waits for writing to be available from a list of sockets 38 | or optionally a single socket if passed in. Returns a list of 39 | sockets that can be written to immediately. """ 40 | return _wait_for_io_events(socks, EVENT_WRITE, timeout) 41 | --------------------------------------------------------------------------------