├── IPlugin.py
├── Main.py
├── ProbeTool.py
├── README.md
├── TaskCenter.py
├── __init__.py
├── bin
    ├── masscan
    └── masscan.exe
├── common
    ├── IPy.py
    ├── __init__.py
    ├── db
    │   ├── __init__.py
    │   └── sqlite3_db.py
    ├── initsql.py
    ├── logger
    │   ├── __init__.py
    │   ├── log_config.py
    │   └── log_util.py
    ├── qqwry.py
    └── utils.py
├── constants.py
├── create_c_net_file.py
├── datas
    ├── ports.db
    └── qqwry.dat
├── fuzzdir
    ├── __init__.py
    ├── dict
    │   ├── directory.lst
    │   └── directory.test.lst
    └── dirfuzz.py
├── hosts.txt
├── http_banner.py
├── pool
    ├── __init__.py
    └── thread_pool.py
├── report
    ├── ReportCenter.py
    ├── __init__.py
    └── template
    │   ├── index.html
    │   ├── inspector.css
    │   ├── package.json
    │   └── utils.js
├── reportrs.jpg
├── result.txt
├── scanrs.jpg
└── thirdparty
    ├── __init__.py
    ├── chardet
        ├── __init__.py
        ├── big5freq.py
        ├── big5prober.py
        ├── chardistribution.py
        ├── charsetgroupprober.py
        ├── charsetprober.py
        ├── cli
        │   ├── __init__.py
        │   └── chardetect.py
        ├── codingstatemachine.py
        ├── compat.py
        ├── cp949prober.py
        ├── enums.py
        ├── escprober.py
        ├── escsm.py
        ├── eucjpprober.py
        ├── euckrfreq.py
        ├── euckrprober.py
        ├── euctwfreq.py
        ├── euctwprober.py
        ├── gb2312freq.py
        ├── gb2312prober.py
        ├── hebrewprober.py
        ├── jisfreq.py
        ├── jpcntx.py
        ├── langbulgarianmodel.py
        ├── langcyrillicmodel.py
        ├── langgreekmodel.py
        ├── langhebrewmodel.py
        ├── langhungarianmodel.py
        ├── langthaimodel.py
        ├── langturkishmodel.py
        ├── latin1prober.py
        ├── mbcharsetprober.py
        ├── mbcsgroupprober.py
        ├── mbcssm.py
        ├── sbcharsetprober.py
        ├── sbcsgroupprober.py
        ├── sjisprober.py
        ├── universaldetector.py
        ├── utf8prober.py
        └── version.py
    └── connection
        ├── __init__.py
        ├── compat.py
        ├── exceptions.py
        ├── http_urllib3.py
        └── urllib3
            ├── __init__.py
            ├── _collections.py
            ├── connection.py
            ├── connectionpool.py
            ├── contrib
                ├── __init__.py
                ├── _securetransport
                │   ├── __init__.py
                │   ├── bindings.py
                │   └── low_level.py
                ├── appengine.py
                ├── ntlmpool.py
                ├── pyopenssl.py
                ├── securetransport.py
                └── socks.py
            ├── exceptions.py
            ├── fields.py
            ├── filepost.py
            ├── packages
                ├── __init__.py
                ├── backports
                │   ├── __init__.py
                │   └── makefile.py
                ├── ordered_dict.py
                ├── six.py
                └── ssl_match_hostname
                │   ├── __init__.py
                │   └── _implementation.py
            ├── poolmanager.py
            ├── request.py
            ├── response.py
            └── util
                ├── __init__.py
                ├── connection.py
                ├── request.py
                ├── response.py
                ├── retry.py
                ├── selectors.py
                ├── ssl_.py
                ├── timeout.py
                ├── url.py
                └── wait.py


/IPlugin.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import threading
  3 | from common.logger.log_util import LogUtil as logging
  4 | from common.utils import md5_string
  5 | 
  6 | logger = logging.getLogger(__name__)
  7 | mu = threading.Lock()
  8 | 
  9 | class PLGSTATE:
 10 |     NORUN = 0x01
 11 |     RUNNING = 0x10
 12 |     FINISHED = 0x11
 13 | 
 14 | class ReportManage(object):
 15 | 
 16 |     def __init__(self):
 17 |         self._results = None
 18 |         self._output_queue = None
 19 |         self._unique_hash = None
 20 | 
 21 |     @property
 22 |     def results(self):
 23 |         return self._results
 24 | 
 25 |     @property
 26 |     def unique_hash(self):
 27 |         return self._unique_hash
 28 | 
 29 |     @property
 30 |     def output_queue(self):
 31 |         return self._output_queue
 32 | 
 33 |     @classmethod
 34 |     def instance(self):
 35 |         rpt = ReportManage()
 36 |         rpt._results = []
 37 |         rpt._unique_hash = []
 38 |         rpt._output_queue = []
 39 |         return rpt
 40 | 
 41 | 
 42 | class IPlugin(object):
 43 | 
 44 |     def __init__(self, knowledgebase=None):
 45 |         self._id = None
 46 |         self._name = None
 47 |         self._level = 1
 48 |         self._cookies = ""
 49 |         self._proxies = {}
 50 |         self._result_manage = ReportManage.instance()
 51 |         self._state = PLGSTATE.NORUN
 52 | 
 53 | 
 54 |     @property
 55 |     def state(self):
 56 |         return self._state
 57 | 
 58 |     @property
 59 |     def id(self):
 60 |         if self._id is None:
 61 |             raise Exception("must be set plugin's id ")
 62 |         return self._id
 63 | 
 64 |     @property
 65 |     def name(self):
 66 |         if self._name is None:
 67 |             raise Exception("must be set plugin's name ")
 68 |         return self._name
 69 | 
 70 |     @property
 71 |     def cookies(self):
 72 |         return self._cookies
 73 | 
 74 |     @cookies.setter
 75 |     def cookies(self, value):
 76 |         self._cookies = value
 77 | 
 78 |     @property
 79 |     def proxies(self,type=1):
 80 |         return self._proxies
 81 | 
 82 |     #('http', ('127.0.0.1', 9050), ('username', 'password')),('socks5', ('127.0.0.1', 1080))
 83 |     """
 84 |     'socks5': pycurl.PROXYTYPE_SOCKS5
 85 |     'socks4': pycurl.PROXYTYPE_SOCKS4
 86 |     'http': pycurl.PROXYTYPE_HTTP
 87 |     'https': pycurl.PROXYTYPE_HTTP
 88 |     """
 89 |     @proxies.setter
 90 |     def proxies(self, value):
 91 |         self._proxies = value
 92 | 
 93 |     @property
 94 |     def result_manage(self):
 95 |         return self._result_manage
 96 | 
 97 |     @result_manage.setter
 98 |     def result_manage(self,value):
 99 |         self._result_manage = value
100 | 
101 |     @property
102 |     def level(self):
103 |         return self._level
104 | 
105 |     def _report(self,package,unique=[]):
106 |         if isinstance(package,list) or isinstance(package,tuple):
107 |             if unique:
108 |                 rsdiff = list(set(unique) - set(range(len(package))))
109 |                 if rsdiff:
110 |                     raise Exception('Unknown filter index({0})'.format(rsdiff))
111 |             else:
112 |                 unique = range(len(package))
113 |             filters = [str(package[x]) for x in unique]
114 |             unique_hash = md5_string(",".join(filters))
115 |         elif isinstance(package,dict):
116 |             filters = {}
117 |             if unique:
118 |                 rsdiff = list(set(unique)-set(package.keys()))
119 |                 if rsdiff:
120 |                     raise Exception('Unknown filter fields({0})'.format(rsdiff))
121 |             else:
122 |                 unique = range(len(package.keys()))
123 |             for x in unique:
124 |                 filters.update({x:package.get(x)})
125 | 
126 |             unique_hash = md5_string(",".join([ str(x) for x in filters.values()]))
127 |         else:
128 |             raise Exception('unique Only support list，tuple，dict')
129 | 
130 |         if self.result_manage is None:
131 |             self.result_manage = ReportManage.instance()
132 | 
133 |         if mu.acquire(True):
134 |             if not unique_hash in self.result_manage.unique_hash:
135 |                 self.result_manage.unique_hash.append(unique_hash)
136 |                 self.result_manage.results.append(filters)
137 |                 self.result_manage.output_queue.append(package)
138 |             mu.release()
139 | 
140 |     def _run(self, *args,**kwargs):
141 |         raise Exception('unimplemented method')
142 | 
143 |     def _store(self, *args,**kwargs):
144 |         raise Exception('unimplemented method')
145 | 
146 |     def _create_report(self, *args,**kwargs):
147 |         raise Exception('unimplemented method')
148 | 
149 |     def cmd_run(self, *args,**kwargs):
150 |         self._state = PLGSTATE.RUNNING
151 |         try:
152 |             t = threading.Thread(target=self._store)
153 |             t.start()
154 |             self._run(*args,**kwargs)
155 |         except:
156 |             import traceback
157 |             msg = traceback.format_exc()
158 |             logger.warn(msg)
159 |         self._state = PLGSTATE.FINISHED
160 | 
161 |     def __str__(self):
162 |         return 'plugin(plg_id=%d, name=%s)' % (self.id, self._name)
163 | 
164 | 


--------------------------------------------------------------------------------
/Main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import base64
  3 | import os
  4 | import socket
  5 | import sys
  6 | import uuid
  7 | import subprocess
  8 | import time
  9 | import re
 10 | import cgi
 11 | from optparse import OptionParser
 12 | from multiprocessing import Process, Queue
 13 | from TaskCenter import TaskCenter, TaskStatus
 14 | from common.initsql import SQL1,SQL2
 15 | from common.db.sqlite3_db import sqlite3_db
 16 | from common.utils import query_service_and_banner, get_socket_banner, char_convert, computing_ports, CommonUtils, md5_string
 17 | from ProbeTool import HttpWeb
 18 | from constants import default_ports
 19 | from fuzzdir.dirfuzz import DirFuzz
 20 | from pool.thread_pool import ThreadPool
 21 | from IPlugin import IPlugin
 22 | 
 23 | from common.logger.log_util import LogUtil as logging
 24 | from report.ReportCenter import Report
 25 | from thirdparty.connection.http_urllib3 import HttpUtil
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | class PortScan(IPlugin):
 29 |     def __init__(self,msgqueue=None,taskstatus=None,statusqueue=None):
 30 |         super(PortScan, self).__init__()
 31 |         self.msgqueue = msgqueue
 32 |         self.statusqueue = statusqueue
 33 |         self.taskstatus = taskstatus
 34 |         self._id = 10000
 35 |         self._name = "portscan"
 36 |         self._level = 10
 37 |         self.rate = 500
 38 |         self.uuid_hash = md5_string(str(uuid.uuid4()))
 39 |         self.finished = False
 40 |         self.db = None
 41 |         self.taskid = 0
 42 |         self.portdb = os.path.join(os.path.dirname(__file__), 'repertory',format(time.strftime("%Y-%m-%d", time.localtime())),"{0}.port.db".format(time.strftime("%H_%M_%S", time.localtime())))
 43 |         if not os.path.exists(os.path.dirname(self.portdb)):
 44 |             os.makedirs(os.path.dirname(self.portdb))
 45 | 
 46 |     def init_db(self):
 47 |         self.db = sqlite3_db(self.portdb)
 48 |         self.db.create_table(SQL1)
 49 |         self.db.create_table(SQL2)
 50 |         logger.info("database (port.db) initialization completed")
 51 |         name = "assetscan_task_{0}".format(self.uuid_hash)
 52 |         self.db.insert('porttask', {"name":name,"status":1}, filter=False)
 53 |         rs = self.db.query_row("select id from porttask where name='{0}'".format(name))
 54 |         self.taskid = rs[0]
 55 | 
 56 |     def report(self,ip,port,protocol):
 57 |         package = (ip,port,protocol,)
 58 |         self._report(package)
 59 | 
 60 |     def start_scanning(self,scanmode,command):
 61 |         if scanmode == "fast":
 62 |             preg = re.compile(r".*Discovered open port (?P<port>\d+)/(?P<protocol>\w+) on (?P<ip>((25[0-5]|2[0-4]\d|[01]?\d\d?)($|(?!\.$)\.)){4}).*",re.I)
 63 |             cmddir = os.path.abspath(os.path.join(os.path.dirname(__file__), 'bin'))
 64 |             process = subprocess.Popen(command, cwd=cmddir, shell=False, stdout=subprocess.PIPE,
 65 |                                        stderr=subprocess.STDOUT)
 66 |             while True:
 67 |                 time.sleep(0.1)
 68 |                 returncode = process.poll()
 69 |                 line = process.stdout.readline()
 70 |                 line = line.strip()
 71 |                 if line:
 72 |                     rs = re.match(preg, line)
 73 |                     if rs:
 74 |                         self.report(rs.group("ip"), rs.group("port"), rs.group("protocol"))
 75 |                 pid = process.pid
 76 |                 if returncode is None:
 77 |                     continue
 78 |                 else:
 79 |                     break
 80 |         else:
 81 |             pool = ThreadPool(50)
 82 |             iplist,portlist = command
 83 |             for ip in iplist:
 84 |                 for port in portlist:
 85 |                     pool.add_task(self.port_scan,ip,port)
 86 |             pool.wait_all_complete()
 87 | 
 88 |     def port_scan(self,ip,port):
 89 |         try:
 90 |             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 91 |             s.settimeout(0.2)
 92 |             port = int(port)
 93 |             if s.connect_ex((ip, port)) == 0:
 94 |                 self.report(ip, port, "tcp")
 95 |         except Exception as e:
 96 |             pass
 97 |         finally:
 98 |             s.close()
 99 | 
100 |     def _run(self, *args,**kwargs):
101 |         self.init_db()
102 |         logger.info("tasks start running")
103 |         if any([not kwargs.get("ipscope",None),not kwargs.get("ports",None)]):
104 |             return
105 |         ipscope = CommonUtils.package_ipscope(kwargs.get("ipscope"))
106 |         ports = computing_ports(kwargs.get("ports"))
107 |         scanmode = kwargs.get("scanmode","fast")
108 |         pseudo_ip = kwargs.get("pseudo_ip","")
109 |         pseudo_port = kwargs.get("pseudo_port","")
110 |         sps = len(ports) / 1000
111 |         if (sps <= 1):
112 |             ports_list = [ports]
113 |         else:
114 |             ports_list = CommonUtils.div_list(ports, sps)
115 | 
116 |         if len(ports_list) <= 1:
117 |             for plist in ports_list:
118 |                 pl = ",".join([str(x) for x in plist])
119 |                 command = CommonUtils.create_command(scanmode,ipscope=ipscope,ports=pl,pseudo_ip=pseudo_ip,pseudo_port=pseudo_port,rate=self.rate)
120 |                 self.start_scanning(scanmode,command)
121 |         else:
122 |             pool = ThreadPool(5)
123 |             for plist in ports_list:
124 |                 pl = ",".join([str(x) for x in plist])
125 |                 command = CommonUtils.create_command(scanmode,ipscope=ipscope,ports=pl,pseudo_ip=pseudo_ip,pseudo_port=pseudo_port,rate=self.rate)
126 |                 pool.add_task(self.start_scanning,scanmode,command)
127 |             pool.wait_all_complete()
128 |         self.finished = True
129 |         TaskCenter.update_task_status(self.statusqueue,"portscan",TaskStatus.FINISHED) if self.statusqueue else None
130 | 
131 |     def _store(self):
132 |         logger.info("start collecting results information.........")
133 |         self.product = 0
134 |         httpclient = HttpUtil()
135 |         while not self.finished:
136 |             time.sleep(0.2)
137 |             if not self.result_manage.output_queue:
138 |                 continue
139 |             else:
140 |                 ip, port, protocol = self.result_manage.output_queue.pop()
141 |                 ref_service, ref_banner = query_service_and_banner(port, protocol)
142 |                 web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port,httpclient)
143 |                 banner = web_banner if web_banner else get_socket_banner(ip, port, ref_banner)
144 |                 banner = banner.replace("\n", "").replace("\r", "")
145 |                 banner = char_convert(banner)
146 |                 banner = base64.b64encode(banner)
147 |                 service = web_service if web_service else ref_service
148 |                 banner = cgi.escape(banner)
149 |                 rs_one = {"ip": ip,"taskid":self.taskid,"domain": domain,"port": str(port), "service": service, "banner": banner, "protocol": protocol,"assettype": assettype, "position": position, "proext": proext}
150 |                 self.db.insert('asset',rs_one,filter=False)
151 |                 if self.msgqueue:
152 |                     rs = self.db.query_row("select id from asset where ip='{0}' and port='{1}' and taskid='{2}'".format(ip, port,self.taskid))
153 |                     rs_one.update({"assetid":rs[0]})
154 |                     self.product = self.product + 1
155 |                     self.msgqueue.put(rs_one)
156 | 
157 | def cmdLineParser():
158 |     optparser = OptionParser()
159 |     optparser.add_option("-i", "--ipscope", dest="ipscope", type="string", help="Specify IP scan range,eg: 127.0.0.1/24 or 10.65.10.3-10.65.10.255")
160 |     optparser.add_option("-p", "--portscope", dest="portscope", type="string",default="web_ports",help="Specify Port scan range,eg: 80,443,8080 or web_ports or top_100 or top_1000")
161 |     optparser.add_option("-m", "--scanmode", dest="scanmode", type="string", default="fast", help="Scan mode[fast,low],default:fast")
162 |     optparser.add_option("-f", "--file", dest="file", type="string",default="",help="asset's file")
163 |     optparser.add_option("-t", "--task-run",action="store_true", dest="taskstart", default=False,help="Start in task mode,default cmd run")
164 |     try:
165 |         (options, args) = optparser.parse_args()
166 |     except Exception, err:
167 |         sys.exit(0)
168 | 
169 |     if len(sys.argv) < 2:
170 |         optparser.print_help()
171 |         sys.exit(0)
172 | 
173 |     ipscope = options.ipscope
174 |     portscope = options.portscope
175 |     assetfile = options.file
176 |     scanmode = options.scanmode
177 |     taskstart = options.taskstart
178 |     if assetfile:
179 |         with open(assetfile,"rb+") as file:
180 |             ipscope = file.read()
181 |     portscope = default_ports.get(portscope,portscope)
182 |     if taskstart:
183 |         msgqueue = Queue()
184 |         statusqueue = Queue()
185 |         mainscan = PortScan(msgqueue,statusqueue=statusqueue)
186 |         dirfuzz = DirFuzz(statusqueue=statusqueue)
187 |         TaskCenter.register(statusqueue,[mainscan.name,dirfuzz.name])
188 |         dirdb = dirfuzz.fuzzdb
189 |         portdb = mainscan.portdb
190 |         rpt_tools = Report(portdb, dirdb)
191 |         mainprocess = Process(target=mainscan.cmd_run, kwargs={"ipscope":ipscope,"ports":portscope,"scanmode":scanmode})
192 |         dirfuzzprocess = Process(target=dirfuzz.funzz,args=(msgqueue,))
193 |         taskcenterprocess = Process(target=TaskCenter.run,args=(statusqueue,))
194 |         mainprocess.start()
195 |         dirfuzzprocess.start()
196 |         taskcenterprocess.start()
197 |         mainprocess.join()
198 |         dirfuzzprocess.join()
199 |         taskcenterprocess.join()
200 |         mainprocess.terminate()
201 |         dirfuzzprocess.terminate()
202 |         taskcenterprocess.terminate()
203 |         rpt_tools.report_html()
204 | 
205 |     else:
206 |         test = PortScan()
207 |         test.cmd_run(ipscope=ipscope, ports=portscope,scanmode=scanmode)
208 |         rpt_tools = Report(test.portdb)
209 |         rpt_tools.report_html()
210 | 
211 | if __name__ == "__main__":
212 |     cmdLineParser()


--------------------------------------------------------------------------------
/ProbeTool.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | from __future__ import division
  3 | import re
  4 | from common.utils import get_server_profile, get_banner_by_content
  5 | from constants import finger2https
  6 | from thirdparty.connection.http_urllib3 import HttpUtil
  7 | 
  8 | class HttpWeb(object):
  9 |     NOT_DETECT_PORTS = [22,21,3389]
 10 | 
 11 |     @classmethod
 12 |     def is_ssl_request(cls,content):
 13 |         for x in finger2https:
 14 |             if len(content) < 2000 and x.lower() in content.lower():
 15 |                 return True
 16 |         return False
 17 | 
 18 |     @classmethod
 19 |     def detect(cls,ip,port,httpclient=None):
 20 |         assettype = 0
 21 |         port = int(port)
 22 |         if httpclient is None:
 23 |             httpclient = HttpUtil()
 24 |         banner,service,ostype,proext,domain,position= "","","","","",""
 25 |         if not port in cls.NOT_DETECT_PORTS:
 26 |             if port == 80:
 27 |                 schemas = ["http"]
 28 |             elif port == 443 or port == 8443:
 29 |                 schemas = ["https"]
 30 |             else:
 31 |                 schemas = ["http", "https"]
 32 |             pregx = re.compile(r"<title>(.*?)</title>", re.I)
 33 |             for schema in schemas:
 34 |                 url = "{schema}://{ip}:{port}".format(schema=schema, ip=ip if not domain else domain, port=port)
 35 |                 try:
 36 |                     res = httpclient.request(url,timeout=1,redirect=True)
 37 |                     content = res.content
 38 |                     if cls.is_ssl_request(content):
 39 |                         continue
 40 |                     headers = res.headers
 41 |                     ostype, server, server_app = get_server_profile(headers)
 42 |                     ostype = OsType.get_ostype(port=port,server=server,server_app=server_app,res=res)
 43 |                     service = "{0} web application ".format(schema)
 44 |                     if server:
 45 |                         service = service + " server: {0}".format(server)
 46 |                     if server_app:
 47 |                         service = service + " application: {0}".format(server_app)
 48 | 
 49 |                     if content:
 50 |                         rs = re.findall(pregx, content)
 51 |                         if rs and len(rs) > 0:
 52 |                             banner = rs[0] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + rs[0]
 53 |                         else:
 54 |                             banner = content[0:100] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + res.content[0:100]
 55 |                     assettype = 1
 56 |                     proext = schema
 57 |                     break
 58 |                 except:
 59 |                     banner = ""
 60 |                     assettype = 0
 61 |         return banner,service,ostype,assettype,domain,position,proext
 62 | 
 63 | class OsType(object):
 64 |     WINDOWS_PORTS = [3389]
 65 |     LINUX_PORTS = []
 66 |     LINUX = "unix"
 67 |     WINDOWS = "windows"
 68 |     @classmethod
 69 |     def get_ostype(cls, port=None, server=None, server_app=None, res=None):
 70 |         ostype = "unknown"
 71 |         only_windows_ports = list(set(cls.WINDOWS_PORTS)-set(cls.LINUX_PORTS))
 72 |         only_linux_ports = list(set(cls.LINUX_PORTS)-set(cls.WINDOWS_PORTS))
 73 |         if port:
 74 |             if isinstance(port,int):
 75 |                 if port and (port in only_windows_ports):
 76 |                     ostype = cls.WINDOWS
 77 |                 elif port in only_linux_ports:
 78 |                     ostype = cls.LINUX
 79 | 
 80 |             elif isinstance(port,list):
 81 |                 counts = len(set(cls.WINDOWS_PORTS+cls.LINUX_PORTS))
 82 |                 win_num = len(set(port) & set(cls.WINDOWS_PORTS))
 83 |                 lin_num = len(set(port) & set(cls.LINUX_PORTS))
 84 |                 diff = abs(win_num-lin_num)/counts
 85 |                 if diff > 0.7:
 86 |                     if win_num > lin_num:
 87 |                         ostype = cls.WINDOWS
 88 |                     else:
 89 |                         ostype = cls.LINUX
 90 |         if server:
 91 |             if isinstance(server,list):
 92 |                 server = ",".join(server)
 93 |             regx = re.compile(r"Microsof|iis",re.I)
 94 |             if regx.findall(server):
 95 |                 ostype = cls.WINDOWS
 96 | 
 97 |         if server_app:
 98 |             if any(["asp" in server_app,"aspx" in server_app]):
 99 |                 ostype = cls.WINDOWS
100 | 
101 |         if res and res.status_code == 500:
102 |             regx = re.compile(r"[a-zA-Z]:(?:\\(?:[a-zA-Z0-9_]+.[a-zA-Z0-9_]{1,16}))+", re.I)
103 |             if regx.findall(res.content):
104 |                 ostype = "windows"
105 | 
106 |         return ostype
107 | 
108 | if __name__ == "__main__":
109 |     banner, service, ostype, assettype, domain,position,proext =  HttpWeb.detect('42.48.85.15',8082)
110 |     print banner
111 |     print service
112 |     print ostype
113 |     print assettype
114 |     print position
115 |     print domain
116 |     print proext
117 | 
118 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # assetscan
 2 | 资产扫描工具
 3 | ```
 4 | Usage: Main.py [options]
 5 | 
 6 | Options:
 7 |   -h, --help            show this help message and exit
 8 |   -i IPSCOPE, --ipscope=IPSCOPE
 9 |                         Specify IP scan range,eg: 127.0.0.1/24 or
10 |                         10.65.10.3-10.65.10.255
11 |   -p PORTSCOPE, --portscope=PORTSCOPE
12 |                         Specify Port scan range,eg: 80,443,8080 or web_ports
13 |                         or top_100 or top_1000
14 |   -m SCANMODE, --scanmode=SCANMODE
15 |                         Scan mode[fast,low],default:fast
16 |   -f FILE, --file=FILE  asset's file
17 |   -t, --task-run        Start in task mode,default cmd run
18 | ```
19 | ###参数介绍
20 | ```
21 | ipscope   为指定扫描范围，格式如下 10.0.0.1/24 或者 10.0.0.1-255 或者 10.0.0.1-10.0.0.255
22 | portscope 为指定扫描端口，系统提供web_ports,top_100,top_1000三大类，形式如下：
23 |           1.python Main.py -i 10.0.0.1/24 -p 80,443,8080
24 |           2.python Main.py -i 10.0.0.1/24 -p 80,8080-10000
25 |           3.python Main.py -i 10.0.0.1/24 -p web_ports
26 | scanmode  为指定扫描模式，默认情况下是fast模式，由于有些内网权限非root，所以此时建议采用慢模式扫描
27 | file      为指定文件进行批量扫描
28 | task-run  为任务模式扫描，此时任务模式采用多进程多线程模式，会一并扫描目录模块
29 | ```
30 | ###扫描console信息如下
31 | 
32 | ![avatar](scanrs.jpg)
33 | 
34 | ###生成的报表信息如下
35 | ![avatar](reportrs.jpg)
36 | 
37 | 


--------------------------------------------------------------------------------
/TaskCenter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import time
 3 | from common.logger.log_util import LogUtil as logging
 4 | logger = logging.getLogger(__name__)
 5 | class TaskStatus:
 6 |     START = 0x01
 7 |     RUNNING = 0x02
 8 |     ERROR = 0x03
 9 |     FINISHED = 0x04
10 | 
11 | class TaskCenter(object):
12 | 
13 |     @classmethod
14 |     def register(cls,statusqueue,tskname):
15 |         taskinfo = {}
16 |         for tname in tskname:
17 |             taskinfo.update({tname:TaskStatus.START})
18 |         if not statusqueue.empty():
19 |             sts = statusqueue.get(True)
20 |             sts.update(taskinfo)
21 |             statusqueue.put(sts)
22 |         else:
23 |             statusqueue.put(taskinfo)
24 | 
25 |     @classmethod
26 |     def run(cls,statusqueue):
27 |         finished = False
28 |         while not finished:
29 |             time.sleep(0.2)
30 |             if not statusqueue.empty():
31 |                 status = statusqueue.get(True)
32 |                 rs = [x for x in status.values() if x == TaskStatus.FINISHED]
33 |                 if len(rs) == len(status):
34 |                     finished = True
35 |                     logger.info("All tasks({0}) completed".format(",".join(status.keys())))
36 |                 statusqueue.put(status)
37 | 
38 |     @classmethod
39 |     def update_task_status(cls,statusqueue,taskname,taskstatus):
40 |         if not statusqueue.empty():
41 |             status = statusqueue.get(True)
42 |             if taskname in status.keys():
43 |                 status.update({taskname:taskstatus})
44 |             statusqueue.put(status)
45 | 
46 |     @classmethod
47 |     def task_is_finished(cls,statusqueue,taskname):
48 |         if not statusqueue.empty():
49 |             status = statusqueue.get(True)
50 |             statusqueue.put(status)
51 |         else:
52 |             status = {}
53 |         if status.get(taskname,None) ==  TaskStatus.FINISHED:
54 |             return True
55 |         else:
56 |             return False
57 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/__init__.py


--------------------------------------------------------------------------------
/bin/masscan:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/bin/masscan


--------------------------------------------------------------------------------
/bin/masscan.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/bin/masscan.exe


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/__init__.py


--------------------------------------------------------------------------------
/common/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/db/__init__.py


--------------------------------------------------------------------------------
/common/initsql.py:
--------------------------------------------------------------------------------
 1 | SQL1="""
 2 | CREATE TABLE `porttask` (
 3 |     `id` INTEGER PRIMARY KEY AUTOINCREMENT,
 4 |     `name` varchar(100) DEFAULT NULL,
 5 |    `status` INTEGER DEFAULT 0
 6 | )
 7 | """
 8 | SQL2="""
 9 | CREATE TABLE `asset` (
10 |     `id` INTEGER PRIMARY KEY AUTOINCREMENT,
11 |     `taskid` INTEGER DEFAULT NULL,
12 |     `ip` varchar(100) NOT NULL,
13 |     `port` varchar(100) DEFAULT NULL,
14 |     `domain` varchar(100) DEFAULT NULL,
15 |     `banner` varchar(500) DEFAULT NULL,
16 |     `protocol` varchar(100) DEFAULT NULL,
17 |     `service` varchar(200) DEFAULT NULL,
18 |     `assettype` int(10) DEFAULT NULL,
19 |     `position` varchar(200) DEFAULT NULL,
20 |     `proext` varchar(50) DEFAULT NULL
21 | )
22 | """
23 | SQL3="""
24 | CREATE TABLE `fuzztask` (
25 |     `id` INTEGER PRIMARY KEY AUTOINCREMENT,
26 |     `taskid` INTEGER DEFAULT NULL,
27 |     `assetid` INTEGER DEFAULT NULL,
28 |     `url` varchar(500) DEFAULT NULL,
29 |     `path` varchar(500) DEFAULT NULL,
30 |     `reqcode` INTEGER DEFAULT 0,
31 |     `banner` varchar(500) DEFAULT NULL,
32 |     `reslength` INTEGER DEFAULT 0,
33 |     `status` INTEGER DEFAULT 0
34 | )
35 | """
36 | 


--------------------------------------------------------------------------------
/common/logger/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/common/logger/__init__.py


--------------------------------------------------------------------------------
/common/logger/log_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import os
 4 | import logging, logging.config
 5 | import time
 6 | 
 7 | SYSLOG_NAME = format(time.strftime("%y-%m-%d_%H_%M_%S", time.localtime()))+".log"
 8 | LOGGERPATH = os.path.join(os.path.dirname(__file__), "../../log/")
 9 | if not os.path.exists(LOGGERPATH):
10 |     os.makedirs(LOGGERPATH)
11 | 
12 | LOGGING = {
13 |     'version': 1,
14 |     'disable_existing_loggers': True,
15 |     'formatters': {
16 |         'verbose': {
17 |             'format': '%(asctime)s %(levelname)s {%(process)d-%(thread)d %(module)s.%(funcName)s:%(lineno)d} %(message)s',
18 |             'datefmt': '%y-%m-%d %H:%M:%S'
19 |         },
20 |         'simple': {'format': '%(levelname)s %(message)s'},
21 |         'default': {
22 |             'format': '%(asctime)s %(message)s',
23 |             'datefmt': '%Y-%m-%d %H:%M:%S'
24 |         }
25 |     },
26 |     'handlers': {
27 |         'null': {
28 |             'level': 'DEBUG',
29 |             'class': 'logging.NullHandler',
30 |         },
31 |         'console': {
32 |             'level': 'DEBUG',
33 |             'class': 'logging.StreamHandler',
34 |             'formatter': 'default'
35 |         },
36 | 
37 |         'file': {
38 |             'level': 'DEBUG',
39 |             'class': 'logging.handlers.TimedRotatingFileHandler',
40 |             'filename': os.path.join(os.environ.get('LOG_HOME', ''),
41 |                                      os.environ.get('LOG_FILE_NAME', os.path.join(LOGGERPATH, SYSLOG_NAME))),
42 |             'formatter': 'verbose',
43 |             'encoding': 'utf8'
44 | 
45 |         },
46 |     },
47 |     'loggers': {
48 |         'mylogger': {
49 |             'level': 'DEBUG',
50 |             'handlers': ['file', 'console'],
51 |             'propagate': True
52 |         }
53 |     }
54 | }
55 | 
56 | if __name__ == '__main__':
57 |     logging.config.dictConfig(LOGGING)
58 |     logger = logging.getLogger('mylogger')
59 |     logger.info('Hello')
60 | 


--------------------------------------------------------------------------------
/common/logger/log_util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | import threading,sys,os
 4 | import log_config
 5 | LEGEND = 70
 6 | class LogUtil(object):
 7 |     import logging
 8 |     from logging import config
 9 | 
10 |     logging.addLevelName(LEGEND, 'LEGEND')
11 |     config.dictConfig(log_config.LOGGING)
12 | 
13 |     _log_server = None
14 |     _mutex = threading.Condition()
15 | 
16 |     ERROR = logging.ERROR
17 |     WARN = logging.WARN
18 |     INFO = logging.INFO
19 |     DEBUG = logging.DEBUG
20 | 
21 |     @staticmethod
22 |     def getLogger(name=None, via_socket=True):
23 |         import logging
24 |         logger = logging.getLogger('mylogger')
25 | 
26 |         def findCaller():
27 |             f = sys._getframe(2)
28 |             rv = "(unknown file)", 0, "(unknown function)"
29 |             while hasattr(f, "f_code"):
30 |                 co = f.f_code
31 |                 filename = os.path.normcase(co.co_filename)
32 |                 if filename == __file__:
33 |                     f = f.f_back
34 |                     continue
35 |                 rv = (co.co_filename, f.f_lineno, co.co_name)
36 |                 break
37 |             return rv
38 | 
39 |         def legend(msg, *args, **kwargs):
40 |             try:
41 |                 fn, lno, func = findCaller()
42 |             except ValueError:
43 |                 fn, lno, func = "(unknown file)", 0, "(unknown function)"
44 |             fn = os.path.splitext(os.path.basename(fn))[0]
45 |             logger.log(LEGEND, '{%s.%s:%d} %s', fn, func, lno, msg, *args, **kwargs)
46 |         logger.__setattr__('legend', legend)
47 | 
48 |         return logger


--------------------------------------------------------------------------------
/common/qqwry.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | '''用Python脚本查询纯真IP库
  4 | 
  5 | QQWry.Dat的格式如下:
  6 | 
  7 | +----------+
  8 | |  文件头  |  (8字节)
  9 | +----------+
 10 | |  记录区  | （不定长）
 11 | +----------+
 12 | |  索引区  | （大小由文件头决定）
 13 | +----------+
 14 | 
 15 | 文件头：4字节开始索引偏移值+4字节结尾索引偏移值
 16 | 
 17 | 记录区： 每条IP记录格式 ==> IP地址[国家信息][地区信息]
 18 | 
 19 |    对于国家记录，可以有三种表示方式：
 20 | 
 21 |        字符串形式(IP记录第5字节不等于0x01和0x02的情况)，
 22 |        重定向模式1(第5字节为0x01),则接下来3字节为国家信息存储地的偏移值
 23 |        重定向模式(第5字节为0x02),
 24 | 
 25 |    对于地区记录，可以有两种表示方式： 字符串形式和重定向
 26 | 
 27 |    最后一条规则：重定向模式1的国家记录后不能跟地区记录
 28 | 
 29 | 索引区： 每条索引记录格式 ==> 4字节起始IP地址 + 3字节指向IP记录的偏移值
 30 | 
 31 |    索引区的IP和它指向的记录区一条记录中的IP构成一个IP范围。查询信息是这个
 32 |    范围内IP的信息
 33 | 
 34 | '''
 35 | 
 36 | import sys
 37 | import socket
 38 | from struct import pack, unpack
 39 | 
 40 | import os
 41 | 
 42 | 
 43 | class IPInfo(object):
 44 |     '''QQWry.Dat数据库查询功能集合
 45 |     '''
 46 | 
 47 |     def __init__(self, dbname=None):
 48 |         ''' 初始化类，读取数据库内容为一个字符串，
 49 |         通过开始8字节确定数据库的索引信息'''
 50 |         self.dbname = dbname = os.path.join(os.path.dirname(os.path.abspath(__file__)),"../datas/qqwry.dat") if not dbname else dbname
 51 |         # f = file(dbname, 'r')
 52 | 
 53 |         # Demon注：在Windows下用'r'会有问题，会把\r\n转换成\n
 54 |         # 详见http://demon.tw/programming/python-open-mode.html
 55 |         # 还有Python文档中不提倡用file函数来打开文件，推荐用open
 56 |         f = open(dbname, 'rb')
 57 | 
 58 |         self.img = f.read()
 59 |         f.close()
 60 | 
 61 |         # QQWry.Dat文件的开始8字节是索引信息,前4字节是开始索引的偏移值，
 62 |         # 后4字节是结束索引的偏移值。
 63 |         # (self.firstIndex, self.lastIndex) = unpack('II', self.img[:8])
 64 | 
 65 |         # Demon注：unpack默认使用的endian是和机器有关的
 66 |         # Intel x86和AMD64(x86-64)是little-endian
 67 |         # Motorola 68000和PowerPC G5是big-endian
 68 |         # 而纯真数据库全部采用了little-endian字节序
 69 |         # 所以在某些big-endian的机器上原代码会出错
 70 |         (self.firstIndex, self.lastIndex) = unpack('<II', self.img[:8])
 71 | 
 72 |         # 每条索引长7字节，这里得到索引总个数
 73 |         self.indexCount = (self.lastIndex - self.firstIndex) / 7 + 1
 74 | 
 75 |     def getString(self, offset=0):
 76 |         ''' 读取字符串信息，包括"国家"信息和"地区"信息
 77 | 
 78 |         QQWry.Dat的记录区每条信息都是一个以'\0'结尾的字符串'''
 79 | 
 80 |         o2 = self.img.find('\0', offset)
 81 |         # return self.img[offset:o2]
 82 |         # 有可能只有国家信息没有地区信息，
 83 |         gb2312_str = self.img[offset:o2]
 84 |         try:
 85 |             utf8_str = unicode(gb2312_str, 'gb2312').encode('utf-8')
 86 |         except:
 87 |             return '未知'
 88 |         return utf8_str
 89 | 
 90 |     def getLong3(self, offset=0):
 91 |         '''QQWry.Dat中的偏移记录都是3字节，本函数取得3字节的偏移量的常规表示
 92 |         QQWry.Dat使用“字符串“存储这些值'''
 93 |         s = self.img[offset: offset + 3]
 94 |         s += '\0'
 95 |         # unpack用一个'I'作为format，后面的字符串必须是4字节
 96 |         # return unpack('I', s)[0]
 97 | 
 98 |         # Demon注：和上面一样，强制使用little-endian
 99 |         return unpack('<I', s)[0]
100 | 
101 |     def getAreaAddr(self, offset=0):
102 |         ''' 通过给出偏移值，取得区域信息字符串，'''
103 | 
104 |         byte = ord(self.img[offset])
105 |         if byte == 1 or byte == 2:
106 |             # 第一个字节为1或者2时，取得2-4字节作为一个偏移量调用自己
107 |             p = self.getLong3(offset + 1)
108 |             return self.getAreaAddr(p)
109 |         else:
110 |             return self.getString(offset)
111 | 
112 |     def getAddr(self, offset, ip=0):
113 |         img = self.img
114 |         o = offset
115 |         byte = ord(img[o])
116 | 
117 |         if byte == 1:
118 |             # 重定向模式1
119 |             # [IP][0x01][国家和地区信息的绝对偏移地址]
120 |             # 使用接下来的3字节作为偏移量调用字节取得信息
121 |             return self.getAddr(self.getLong3(o + 1))
122 | 
123 |         if byte == 2:
124 |             # 重定向模式2
125 |             # [IP][0x02][国家信息的绝对偏移][地区信息字符串]
126 |             # 使用国家信息偏移量调用自己取得字符串信息
127 |             cArea = self.getAreaAddr(self.getLong3(o + 1))
128 |             o += 4
129 |             # 跳过前4字节取字符串作为地区信息
130 |             aArea = self.getAreaAddr(o)
131 |             return (cArea, aArea)
132 | 
133 |         if byte != 1 and byte != 2:
134 |             # 最简单的IP记录形式，[IP][国家信息][地区信息]
135 |             # 重定向模式1有种情况就是偏移量指向包含国家和地区信息两个字符串
136 |             # 即偏移量指向的第一个字节不是1或2,就使用这里的分支
137 |             # 简单地说：取连续取两个字符串！
138 | 
139 |             cArea = self.getString(o)
140 |             # o += len(cArea) + 1
141 |             # 我们已经修改cArea为utf-8字符编码了，len取得的长度会有变，
142 |             # 用下面方法得到offset
143 |             o = self.img.find('\0', o) + 1
144 |             aArea = self.getString(o)
145 |             return (cArea, aArea)
146 | 
147 |     def find(self, ip, l, r):
148 |         ''' 使用二分法查找网络字节编码的IP地址的索引记录'''
149 |         if r - l <= 1:
150 |             return l
151 | 
152 |         m = (l + r) / 2
153 |         o = self.firstIndex + m * 7
154 |         # new_ip = unpack('I', self.img[o: o+4])[0]
155 | 
156 |         # Demon注：和上面一样，强制使用little-endian
157 |         new_ip = unpack('<I', self.img[o: o + 4])[0]
158 | 
159 |         if ip <= new_ip:
160 |             return self.find(ip, l, m)
161 |         else:
162 |             return self.find(ip, m, r)
163 | 
164 |     def getIPAddr(self, ip):
165 |         ''' 调用其他函数，取得信息！'''
166 |         # 使用网络字节编码IP地址
167 |         ip = unpack('!I', socket.inet_aton(ip))[0]
168 |         # 使用 self.find 函数查找ip的索引偏移
169 |         i = self.find(ip, 0, self.indexCount - 1)
170 |         # 得到索引记录
171 |         o = self.firstIndex + i * 7
172 |         # 索引记录格式是： 前4字节IP信息+3字节指向IP记录信息的偏移量
173 |         # 这里就是使用后3字节作为偏移量得到其常规表示（QQWry.Dat用字符串表示值）
174 |         o2 = self.getLong3(o + 4)
175 |         # IP记录偏移值+4可以丢弃前4字节的IP地址信息。
176 |         (c, a) = self.getAddr(o2 + 4)
177 |         return (c, a)
178 | 
179 |     def output(self, first, last):
180 |         for i in range(first, last):
181 |             o = self.firstIndex + i * 7
182 |             ip = socket.inet_ntoa(pack('!I', unpack('I', self.img[o:o + 4])[0]))
183 |             offset = self.getLong3(o + 4)
184 |             (c, a) = self.getAddr(offset + 4)
185 |             print "%s %d %s/%s" % (ip, offset, c, a)
186 | 
187 |     @classmethod
188 |     def position_info(cls,ip):
189 |         i = IPInfo()
190 |         (c, a) = i.getIPAddr(ip)
191 |         if 'u' == a:
192 |             a = ""
193 |         return '%s%s' % (c, a)
194 | 
195 | if __name__ == "__main__":
196 |     print IPInfo.position_info("220.191.208.40").decode(encoding='UTF-8',errors='strict')
197 |     print type(IPInfo.position_info("220.191.208.40").decode(encoding='UTF-8',errors='strict'))


--------------------------------------------------------------------------------
/constants.py:
--------------------------------------------------------------------------------
  1 | fingerprint = {
  2 |     "weblogic":[{"type":"regex","content":"From RFC.*Hypertext Transfer Protocol"}],
  3 |     "shiro":[{"type":"string","resheader":"rememberMe=deleteMe"}]
  4 | }
  5 | 
  6 | finger2https = ["The plain HTTP request was sent to HTTPS port"]
  7 | 
  8 | default_ports = {
  9 |     'web_ports': [80, 443,1000,1111,1433,2100,2222,3333,4001,4444,4321,5001,5432,5555,6001,6666,7001, 7002, 7003, 7004, 7005, 7006, 7007, 7008,7777,8000,8090,8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008, 8080, 8081, 8082, 8083, 8084, 8099, 8443,8888,9001,
 10 |  9002, 9003, 9004, 9005, 9006, 9007, 9008,9020,9080,9090,9200,9999,15672,50030,50060,50070,50075,50090],
 11 |     'top_50': [21, 22, 25, 53, 80, 110, 113, 135, 139, 143, 179, 199, 443, 445, 465, 514, 548, 554, 587, 646,993, 995,
 12 |                     1025, 1026, 1433, 1720, 1723, 2000, 3306, 3389, 5060, 5666, 5900, 6001, 8000, 8008,8080, 8443,
 13 |                     8888,10000, 32768, 49152, 49154],
 14 |     'top_100': [7, 9, 13, 21, 22, 25, 37, 53, 79, 80, 88, 106, 110, 113, 119, 135, 139, 143, 179, 199, 389,
 15 |                      427, 443, 444, 465, 513, 514, 543, 548, 554, 587, 631, 646, 873, 990, 993, 995, 1025, 1026,
 16 |                      1027, 1028, 1110, 1433, 1720, 1723, 1755, 1900, 2000, 2049, 2121, 2717, 3000, 3128, 3306,
 17 |                      3389, 3986, 4899, 5000, 5009, 5051, 5060, 5101, 5190, 5357, 5432, 5631, 5666, 5800, 5900,
 18 |                      6000, 6646, 7070, 8000, 8008, 8080, 8443, 8888, 9100, 9999, 32768, 49152, 49153, 49154,
 19 |                      49155, 49156],
 20 |     'top_1000': [1, 3, 6, 9, 13, 17, 19, 20, 21, 22, 23, 24, 25, 30, 32, 37, 42, 49, 53, 70, 79, 80, 81, 82, 83,
 21 |                       84,
 22 |                       88, 89, 99, 106, 109, 110, 113, 119, 125, 135, 139, 143, 146, 161, 163, 179, 199, 211, 222, 254,
 23 |                       255,
 24 |                       259, 264, 280, 301, 306, 311, 340, 366, 389, 406, 416, 425, 427, 443, 444, 458, 464, 481, 497,
 25 |                       500,
 26 |                       512, 513, 514, 524, 541, 543, 544, 548, 554, 563, 587, 593, 616, 625, 631, 636, 646, 648, 666,
 27 |                       667,
 28 |                       683, 687, 691, 700, 705, 711, 714, 720, 722, 726, 749, 765, 777, 783, 787, 800, 808, 843, 873,
 29 |                       880,
 30 |                       888, 898, 900, 901, 902, 911, 981, 987, 990, 992, 995, 999, 1000, 1001, 1007, 1009, 1010, 1021,
 31 |                       1022,
 32 |                       1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038,
 33 |                       1039,
 34 |                       1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
 35 |                       1056,
 36 |                       1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072,
 37 |                       1073,
 38 |                       1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089,
 39 |                       1090,
 40 |                       1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1102, 1104, 1105, 1106, 1107, 1110, 1111,
 41 |                       1112,
 42 |                       1113, 1117, 1119, 1121, 1122, 1123, 1126, 1130, 1131, 1137, 1141, 1145, 1147, 1148, 1151, 1154,
 43 |                       1163,
 44 |                       1164, 1165, 1169, 1174, 1183, 1185, 1186, 1192, 1198, 1201, 1213, 1216, 1217, 1233, 1236, 1244,
 45 |                       1247,
 46 |                       1259, 1271, 1277, 1287, 1296, 1300, 1309, 1310, 1322, 1328, 1334, 1352, 1417, 1433, 1443, 1455,
 47 |                       1461,
 48 |                       1494, 1500, 1503, 1521, 1524, 1533, 1556, 1580, 1583, 1594, 1600, 1641, 1658, 1666, 1687, 1700,
 49 |                       1717,
 50 |                       1718, 1719, 1720, 1723, 1755, 1761, 1782, 1801, 1805, 1812, 1839, 1862, 1863, 1875, 1900, 1914,
 51 |                       1935,
 52 |                       1947, 1971, 1974, 1984, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
 53 |                       2013,
 54 |                       2020, 2021, 2030, 2033, 2034, 2038, 2040, 2041, 2042, 2045, 2046, 2047, 2048, 2065, 2068, 2099,
 55 |                       2103,
 56 |                       2105, 2106, 2111, 2119, 2121, 2126, 2135, 2144, 2160, 2170, 2179, 2190, 2196, 2200, 2222, 2251,
 57 |                       2260,
 58 |                       2288, 2301, 2323, 2366, 2381, 2382, 2393, 2399, 2401, 2492, 2500, 2522, 2525, 2557, 2601, 2604,
 59 |                       2607,
 60 |                       2638, 2701, 2710, 2717, 2725, 2800, 2809, 2811, 2869, 2875, 2909, 2920, 2967, 2998, 3000, 3003,
 61 |                       3005,
 62 |                       3006, 3011, 3013, 3017, 3030, 3052, 3071, 3077, 3128, 3168, 3211, 3221, 3260, 3268, 3283, 3300,
 63 |                       3306,
 64 |                       3322, 3323, 3324, 3333, 3351, 3367, 3369, 3370, 3371, 3389, 3404, 3476, 3493, 3517, 3527, 3546,
 65 |                       3551,
 66 |                       3580, 3659, 3689, 3703, 3737, 3766, 3784, 3800, 3809, 3814, 3826, 3827, 3851, 3869, 3871, 3878,
 67 |                       3880,
 68 |                       3889, 3905, 3914, 3918, 3920, 3945, 3971, 3986, 3995, 3998, 4000, 4001, 4002, 4003, 4004, 4005,
 69 |                       4045,
 70 |                       4111, 4125, 4129, 4224, 4242, 4279, 4321, 4343, 4443, 4444, 4445, 4449, 4550, 4567, 4662, 4848,
 71 |                       4899,
 72 |                       4998, 5000, 5001, 5002, 5003, 5009, 5030, 5033, 5050, 5054, 5060, 5080, 5087, 5100, 5101, 5120,
 73 |                       5190,
 74 |                       5200, 5214, 5221, 5225, 5269, 5280, 5298, 5357, 5405, 5414, 5431, 5440, 5500, 5510, 5544, 5550,
 75 |                       5555,
 76 |                       5560, 5566, 5631, 5633, 5666, 5678, 5718, 5730, 5800, 5801, 5810, 5815, 5822, 5825, 5850, 5859,
 77 |                       5862,
 78 |                       5877, 5900, 5901, 5902, 5903, 5906, 5910, 5915, 5922, 5925, 5950, 5952, 5959, 5960, 5961, 5962,
 79 |                       5987,
 80 |                       5988, 5998, 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6009, 6025, 6059, 6100, 6106, 6112,
 81 |                       6123,
 82 |                       6129, 6156, 6346, 6389, 6502, 6510, 6543, 6547, 6565, 6566, 6580, 6646, 6666, 6667, 6668, 6689,
 83 |                       6692,
 84 |                       6699, 6779, 6788, 6792, 6839, 6881, 6901, 6969, 7000, 7001, 7004, 7007, 7019, 7025, 7070, 7100,
 85 |                       7103,
 86 |                       7106, 7200, 7402, 7435, 7443, 7496, 7512, 7625, 7627, 7676, 7741, 7777, 7800, 7911, 7920, 7937,
 87 |                       7999,
 88 |                       8000, 8001, 8007, 8008, 8009, 8010, 8021, 8031, 8042, 8045, 8080, 8081, 8082, 8083, 8084, 8085,
 89 |                       8086,
 90 |                       8087, 8088, 8089, 8093, 8099, 8180, 8192, 8193, 8200, 8222, 8254, 8290, 8291, 8300, 8333, 8383,
 91 |                       8400,
 92 |                       8402, 8443, 8500, 8600, 8649, 8651, 8654, 8701, 8800, 8873, 8888, 8899, 8994, 9000, 9001, 9002,
 93 |                       9009,
 94 |                       9010, 9040, 9050, 9071, 9080, 9090, 9099, 9100, 9101, 9102, 9110, 9200, 9207, 9220, 9290, 9415,
 95 |                       9418,
 96 |                       9485, 9500, 9502, 9535, 9575, 9593, 9594, 9618, 9666, 9876, 9877, 9898, 9900, 9917, 9929, 9943,
 97 |                       9968,
 98 |                       9998, 9999, 10000, 10001, 10002, 10003, 10009, 10012, 10024, 10082, 10180, 10215, 10243, 10566,
 99 |                       10616, 10621, 10626, 10628, 10778, 11110, 11967, 12000, 12174, 12265, 12345, 13456, 13722,
100 |                       13782,
101 |                       14000, 14238, 14441, 15000, 15002, 15003, 15660, 15742, 16000, 16012, 16016, 16018, 16080,
102 |                       16113,
103 |                       16992, 17877, 17988, 18040, 18101, 18988, 19101, 19283, 19315, 19350, 19780, 19801, 19842,
104 |                       20000,
105 |                       20005, 20031, 20221, 20828, 21571, 22939, 23502, 24444, 24800, 25734, 26214, 27000, 27352,
106 |                       27355,
107 |                       27715, 28201, 30000, 30718, 30951, 31038, 31337, 32768, 32769, 32770, 32771, 32772, 32773,
108 |                       32774,
109 |                       32775, 32776, 32777, 32778, 32779, 32780, 32781, 32782, 32783, 32784, 33354, 33899, 34571,
110 |                       34572,
111 |                       35500, 38292, 40193, 40911, 41511, 42510, 44176, 44442, 44501, 45100, 48080, 49152, 49153,
112 |                       49154,
113 |                       49155, 49156, 49157, 49158, 49159, 49160, 49163, 49165, 49167, 49175, 49400, 49999, 50000,
114 |                       50001,
115 |                       50002, 50006, 50300, 50389, 50500, 50636, 50800, 51103, 51493, 52673, 52822, 52848, 52869,
116 |                       54045,
117 |                       54328, 55055, 55555, 55600, 56737, 57294, 57797, 58080, 60020, 60443, 61532, 61900, 62078,
118 |                       63331,
119 |                       64623, 64680, 65000, 65129, 65389]
120 | }


--------------------------------------------------------------------------------
/create_c_net_file.py:
--------------------------------------------------------------------------------
 1 | from common.utils import CommonUtils
 2 | 
 3 | with open("hosts.txt", "rb+") as file:
 4 |     ipscope = file.read()
 5 | 
 6 | domains = CommonUtils.package_ipscope_c_net(ipscope)
 7 | 
 8 | flag = 0
 9 | for ipc in domains:
10 |     with open("rhosts.txt","ab+") as file1:
11 |         if flag == 0:
12 |             file1.truncate()
13 |             flag=1
14 |         file1.write(ipc+"\n")


--------------------------------------------------------------------------------
/datas/ports.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/datas/ports.db


--------------------------------------------------------------------------------
/datas/qqwry.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/datas/qqwry.dat


--------------------------------------------------------------------------------
/fuzzdir/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/fuzzdir/__init__.py


--------------------------------------------------------------------------------
/fuzzdir/dict/directory.lst:
--------------------------------------------------------------------------------
  1 | /WarehouseEJB/services
  2 | /WarehouseWeb
  3 | /WarehouseWebservlet
  4 | /WEB-INF/web.xml
  5 | /WebAdmin
  6 | /Webalizer
  7 | /WebServicesSamples
  8 | /WebSphere
  9 | /WebSphereBank
 10 | /WebSphereBankDeposit
 11 | /WebSphereBankDepositservlet
 12 | /WebSphereBankservlet
 13 | /WebSphereSamples
 14 | /wp-admin
 15 | /services
 16 | /webservice
 17 | /webservices
 18 | /phpsso_server/caches/configs
 19 | /include
 20 | /.git
 21 | /.svn
 22 | /druid
 23 | /editor
 24 | /fckeditor
 25 | /admin/publish
 26 | /admin
 27 | /publish
 28 | /dwr
 29 | /a/login
 30 | /editor/filemanager/browser/default/connectors/jsp
 31 | /editor/filemanager/upload
 32 | /resource/fckeditor/editor/filemanager/browser/default
 33 | /fckeditor/editor/filemanager/connectors
 34 | /wls-wsat/RegistrationRequesterPortType11
 35 | /wls-wsat/ParticipantPortType11
 36 | /wls-wsat/RegistrationPortTypeRPC11
 37 | /wls-wsat/CoordinatorPortType11
 38 | /wls-wsat/RegistrationRequesterPortType
 39 | /wls-wsat/ParticipantPortType
 40 | /wls-wsat/RegistrationPortTypeRPC
 41 | /wls-wsat/CoordinatorPortType
 42 | /wls_utc/CallbackHandler
 43 | /_async/AsyncResponseService
 44 | /jolokia
 45 | /actuator
 46 | /actuator/jolokia
 47 | /trace
 48 | /health
 49 | /loggers
 50 | /metrics
 51 | /autoconfig
 52 | /heapdump
 53 | /env
 54 | /info
 55 | /dump
 56 | /configprops
 57 | /mappings
 58 | /auditevents
 59 | /beans
 60 | /option/v2/api-docs
 61 | /swagger-ui.html
 62 | /api/swagger-ui.html
 63 | /apidoc
 64 | /server-info
 65 | /server-status
 66 | /druid/basic.json
 67 | /api/console/api_server
 68 | /runningpods/
 69 | /solr/admin/info/system?wt=json
 70 | /admin/info/system?wt=json
 71 | /api/beans
 72 | /beans
 73 | /swagger-ui.html
 74 | /swagger/index.html
 75 | /swagger/ui/index
 76 | /swagger/
 77 | /Swagger/ui/index
 78 | /docs/index
 79 | /api_index.html
 80 | /doc/swagger-ui/
 81 | /swagger-ui/
 82 | /apidoc-ui/index.html
 83 | /v2/api-docs
 84 | /service/rest/swagger.json
 85 | /rest/default/schema
 86 | /topic/list
 87 | /topic/detail
 88 | /services/AdminService
 89 | /services/FreeMarkerService
 90 | /axis/services/AdminService
 91 | /axis/services/FreeMarkerService
 92 | /services/WsInfo
 93 | /v2/keys/?recursive=true
 94 | /web/test-call
 95 | /version
 96 | /all_docs
 97 | /application.wadl
 98 | /xampp/lang.tmp
 99 | /ws_utc/config.do
100 | /j_security_check
101 | /app/timelion
102 | /invoker/readonly
103 | /web-console
104 | /api/systeminfo
105 | /user/login
106 | /cms/web/test.txt
107 | /app/jspx.jspx
108 | /sysinfo.html
109 | /system.html
110 | /sysinfo/envs
111 | /webdav/test
112 | /manager/html
113 | /actuator/beans
114 | /sysadmin
115 | /supperadmin


--------------------------------------------------------------------------------
/fuzzdir/dict/directory.test.lst:
--------------------------------------------------------------------------------
1 | /sysadmin


--------------------------------------------------------------------------------
/fuzzdir/dirfuzz.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import base64
  3 | import cgi
  4 | import os
  5 | import random
  6 | import sys
  7 | import threading
  8 | import time
  9 | from optparse import OptionParser
 10 | from thirdparty.connection.http_urllib3 import HttpUtil
 11 | 
 12 | from TaskCenter import TaskStatus, TaskCenter
 13 | from common.initsql import SQL3
 14 | from common.utils import char_convert, get_banner_by_content
 15 | from pool.thread_pool import ThreadPool
 16 | from ProbeTool import HttpWeb
 17 | from common.db.sqlite3_db import sqlite3_db
 18 | from common.logger.log_util import LogUtil as logging
 19 | from urlparse import urljoin
 20 | logger = logging.getLogger(__name__)
 21 | mu = threading.Lock()
 22 | 
 23 | class DirFuzz(object):
 24 | 
 25 |     def __init__(self,dbname=None,url=None,statusqueue=None):
 26 |         self.dbname = dbname
 27 |         self.filename = []
 28 |         self.taskid = None
 29 |         self.fuzzdb = None
 30 |         self.url = url
 31 |         self.statusqueue = statusqueue
 32 |         self.taskrun = False
 33 |         self.finished = False
 34 |         self.single = False if not self.url else True
 35 |         self.name = "dirscan"
 36 |         if not self.taskrun:
 37 |             if self.single:
 38 |                 self.fuzzdb = os.path.join(os.path.dirname(__file__),'..','repertory','tmp',"{0}.fuzz.db".format(time.strftime("%Y-%m-%d_%H_%M_%S", time.localtime())))
 39 |             else:
 40 |                 self.fuzzdb = os.path.join(os.path.dirname(__file__),'..','repertory',format(time.strftime("%Y-%m-%d", time.localtime())),"{0}.fuzz.db".format(time.strftime("%H_%M_%S", time.localtime())))
 41 |         else:
 42 |             self.fuzzdb = os.path.join(os.path.dirname(__file__), '..', 'repertory',format(time.strftime("%Y-%m-%d", time.localtime())),"{0}.fuzz.db".format(time.strftime("%H_%M_%S", time.localtime())))
 43 | 
 44 |         if not os.path.exists(os.path.dirname(self.fuzzdb)):
 45 |             os.makedirs(os.path.dirname(self.fuzzdb))
 46 | 
 47 |     def init_db(self):
 48 |         self.fuzzdb = sqlite3_db(self.fuzzdb)
 49 |         self.fuzzdb.create_table(SQL3)
 50 |         logger.info("database (fuzz.db) initialization completed")
 51 | 
 52 |     def init_dir_dict(self):
 53 |         filename = os.path.join(os.path.join(os.path.dirname(__file__), 'dict'),"directory.lst")
 54 |         with open(filename,"rb+") as file:
 55 |             self.filename = [x.strip() for x in file.readlines()]
 56 | 
 57 |     def cache_content(self,taskid,assetid,url):
 58 |         try:
 59 |             filename = "".join(random.sample('abcdefghijklmnopqrstuvwxyz0123456789', 6)) + ".css"
 60 |             dirname = "".join(random.sample('abcdefghijklmnopqrstuvwxyz0123456789', 6)) + "/"
 61 |             res1 = self.httpclient.request(urljoin(url,filename), timeout=3,redirect=True)
 62 |             res2 = self.httpclient.request(urljoin(url, dirname), timeout=3,redirect=True)
 63 |             res3 = self.httpclient.request(url,timeout=3,redirect=True)
 64 |             content = res3.content
 65 |             rs_one = {"taskid": taskid, "assetid": assetid, "url": url,"banner": base64.b64encode(content[0:100]), "reslength": len(content), "status": 1}
 66 |             self.fuzzdb.insert('fuzztask', rs_one, filter=False)
 67 |             rs = [res1.content,res2.content,res3.content]
 68 |         except:
 69 |             rs = None
 70 |         return rs
 71 | 
 72 |     def req_ad_file(self,taskid,assetid,url,filename,cache):
 73 |         newurl = urljoin(url,filename)
 74 |         try:
 75 |             res = self.httpclient.request(newurl,timeout=3,redirect=True)
 76 |             condition1 = (abs(len(res.content)-len(cache[0])) <=20) or (abs(len(res.content)-len(cache[1])) <= 20) or (abs(len(res.content.replace(filename,"").replace(newurl,""))-len(cache[2].replace(filename,"").replace(newurl,""))) <= 20)
 77 |             condition2 = (res.status_code not in [401,405]) and ((res.status_code >= 400 and res.status_code < 500) or (res.status_code > 500) or (res.status_code < 200))
 78 |             if condition2:
 79 |                 pass
 80 |             else:
 81 |                 if not condition1:
 82 |                     if mu.acquire():
 83 |                         content = res.content[0:100] if not get_banner_by_content(res) else "["+get_banner_by_content(res)+"] ==" + res.content[0:100]
 84 |                         content = content.replace("\n","").replace("\r","")
 85 |                         rs_one = {"taskid":taskid,"assetid":assetid,"url":newurl,"path":filename,"reqcode":res.status_code,"banner":cgi.escape(base64.b64encode(char_convert(content))),"reslength":len(res.content),"status":1}
 86 |                         self.fuzzdb.insert('fuzztask', rs_one, filter=False)
 87 |                         mu.release()
 88 |         except:
 89 |             pass
 90 | 
 91 |     def result_unique(self):
 92 |         sql = "select * from (select *,count(reslength) as flag from fuzztask where taskid={0} group by reslength)".format(self.taskid)
 93 |         rs = self.fuzzdb.queryall(sql)
 94 |         sql_1 = "delete from fuzztask"
 95 |         sql_2 = "update sqlite_sequence SET seq = 0 where name ='fuzztask'"
 96 |         self.fuzzdb.query(sql_1)
 97 |         self.fuzzdb.query(sql_2)
 98 |         for id,taskid,assetid,url,path,reqcode,banner,reslength,status,count in rs:
 99 |             rs_one = {"taskid": taskid, "assetid": assetid, "url": url,"path":path,"reqcode":reqcode,"banner": banner, "reslength": reslength, "status": 1}
100 |             self.fuzzdb.insert('fuzztask', rs_one, filter=False)
101 |             logger.info("url:{0} ".format(url))
102 | 
103 |     def funzz(self,msgqueue=None):
104 |         if msgqueue:
105 |             self.taskrun = True
106 |         self.init_db()
107 |         self.init_dir_dict()
108 |         tp = ThreadPool(10)
109 |         self.httpclient = HttpUtil()
110 |         if msgqueue is None:
111 |             if not self.single:
112 |                 rs = self.assetdb.query_all("select * from asset")
113 |                 for id, taskid,ip, port, domain, banner, protocol, service, assettype, position, schema in rs:
114 |                     if self.taskid is None:
115 |                         self.taskid = taskid
116 |                     web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port,self.httpclient)
117 |                     if proext:
118 |                         url = "{schema}://{ip}:{port}".format(schema=proext,ip=ip,port=port)
119 |                         rs = self.cache_content(taskid,id,url)
120 |                         if rs:
121 |                             for x in self.filename:
122 |                                 tp.add_task(self.req_ad_file,taskid,id,url,x,rs)
123 |             else:
124 |                 self.taskid = -100
125 |                 rs = self.cache_content(self.taskid,-100,self.url)
126 |                 for x in self.filename:
127 |                     tp.add_task(self.req_ad_file, self.taskid,-100,self.url, x, rs)
128 |         else:
129 |             task_null_count = 0
130 |             while not self.finished:
131 |                 time.sleep(0.2)
132 |                 if  task_null_count >= 5:
133 |                     TaskCenter.update_task_status(self.statusqueue, "dirscan", TaskStatus.FINISHED)
134 |                     self.finished = True
135 |                     continue
136 |                 if not msgqueue.empty():
137 |                     rs_one = msgqueue.get(True)
138 |                     self.taskid = rs_one.get("taskid")
139 |                     web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(rs_one.get("ip"), rs_one.get("port"),self.httpclient)
140 |                     if proext:
141 |                         url = "{schema}://{ip}:{port}".format(schema=proext, ip=rs_one.get("ip"), port=rs_one.get("port"))
142 |                         rs = self.cache_content(self.taskid,rs_one.get("assetid"),url)
143 |                         if rs:
144 |                             for x in self.filename:
145 |                                 tp.add_task(self.req_ad_file,self.taskid,rs_one.get("assetid"), url, x, rs)
146 |                 else:
147 |                     if TaskCenter.task_is_finished(self.statusqueue,"portscan"):
148 |                         task_null_count = task_null_count+1
149 |                         time.sleep(0.5)
150 | 
151 |         tp.wait_all_complete()
152 |         self.result_unique()
153 | 
154 | if __name__ == "__main__":
155 |     optparser = OptionParser()
156 |     optparser.add_option("-d", "--dbname", dest="dbname", type="string", default="", help="port scan result's db")
157 |     optparser.add_option("-u", "--url", dest="url", type="string", default="", help="url cues")
158 |     try:
159 |         (options, args) = optparser.parse_args()
160 |     except Exception, err:
161 |         sys.exit(0)
162 |     if len(sys.argv) < 2:
163 |         optparser.print_help()
164 |         sys.exit(0)
165 |     dbname = options.dbname
166 |     url = options.url
167 |     test = DirFuzz(dbname=dbname,url=url)
168 |     test.funzz()
169 | 


--------------------------------------------------------------------------------
/hosts.txt:
--------------------------------------------------------------------------------
 1 | wx.cctaa-wx.cn
 2 | cdn.cctaa-wx.cn
 3 | bbs.cctaa-wx.cn
 4 | test.cctaa-wx.cn
 5 | 2015.cctaa-wx.cn
 6 | www.cctaa-wx.cn
 7 | mp.cctaa-wx.cn
 8 | js.cctaa-wx.cn
 9 | book.cctaa-wx.cn
10 | 


--------------------------------------------------------------------------------
/http_banner.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | from ProbeTool import HttpWeb
 3 | from common.utils import query_service_and_banner, get_socket_banner, CommonUtils
 4 | from constants import default_ports
 5 | from pool.thread_pool import ThreadPool
 6 | mu = threading.Lock()
 7 | ports = default_ports.get("web_ports")
 8 | with open("hosts.txt", "rb+") as file:
 9 |     ipscope = file.read()
10 | 
11 | domains = CommonUtils.package_ipscope(ipscope,handle_ip=False,retType="list")
12 | def scanner(ip,port):
13 |     ref_service, ref_banner = query_service_and_banner(port, "tcp")
14 |     web_banner, web_service, ostype, assettype, domain, position, proext = HttpWeb.detect(ip, port)
15 |     banner = web_banner if web_banner else get_socket_banner(domain, port, ref_banner)
16 |     if mu.acquire(True):
17 |         if proext:
18 |             msg = "{proext}://{domain}:{port}      {banner}\n".format(proext=proext,domain=ip,port=port,banner=banner)
19 |             f = open("result.txt","ab+")
20 |             f.write(msg)
21 |             f.close()
22 |         mu.release()
23 | 
24 | f = open("result.txt","wb+")
25 | f.truncate()
26 | f.close()
27 | pool = ThreadPool(30)
28 | for domain in domains:
29 |     for port in ports:
30 |         pool.add_task(scanner,domain,port)
31 | pool.wait_all_complete()


--------------------------------------------------------------------------------
/pool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/pool/__init__.py


--------------------------------------------------------------------------------
/pool/thread_pool.py:
--------------------------------------------------------------------------------
 1 | import Queue
 2 | import threading
 3 | 
 4 | 
 5 | class ThreadPool(object):
 6 |     def __init__(self, thread_num):
 7 |         self.task_queue = Queue.Queue()
 8 |         self.threads = []
 9 |         self.__init_thread_pool(thread_num)
10 | 
11 |     def __init_thread_pool(self,thread_num):
12 |         for i in range(thread_num):
13 |             worker = Worker(self.task_queue)
14 |             worker.setDaemon(True)
15 |             worker.start()
16 |             self.threads.append(worker)
17 | 
18 |     def add_task(self, func, *args):
19 |         self.task_queue.put((func, args))
20 | 
21 |     def wait_all_complete(self):
22 |         self.task_queue.join()
23 |         self._terminate_workers()
24 | 
25 |     def force_complete(self):
26 |         self.clear_tasks()
27 |         self._terminate_workers()
28 | 
29 |     def clear_tasks(self):
30 |         while not self.task_queue.empty():
31 |             self.task_queue.get_nowait()
32 |             self.task_queue.task_done()
33 |     def _terminate_workers(self):
34 |         for worker in self.threads:
35 |             worker.terminate()
36 | class Worker(threading.Thread):
37 |     def __init__(self, task_queue):
38 |         super(Worker, self).__init__()
39 |         self.task_queue = task_queue
40 |         self.stop = False
41 | 
42 |     def run(self):
43 |         max_len = 64
44 |         while not self.stop:
45 |             try:
46 |                 do, args = self.task_queue.get(timeout=1)
47 |                 args_desc = str(args)
48 |                 if len(args_desc) > max_len:
49 |                     pass
50 |                 try:
51 |                     do(*args)
52 |                 except:
53 |                     pass
54 |                 if self.stop:
55 |                     pass
56 |                 self.task_queue.task_done()
57 |             except:
58 |                 pass
59 |     def terminate(self):
60 |         self.stop = True


--------------------------------------------------------------------------------
/report/ReportCenter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import base64
 3 | import json
 4 | import os
 5 | import shutil
 6 | import time
 7 | 
 8 | from common.db.sqlite3_db import sqlite3_db
 9 | from common.utils import update_file_content
10 | from common.logger.log_util import LogUtil as logging
11 | logger = logging.getLogger(__name__)
12 | 
13 | class Report(object):
14 |     def __init__(self,portdb,dirdb=None):
15 |         self.portdb = portdb
16 |         self.dirdb = dirdb
17 |         self.report_dir = os.path.dirname(self.portdb)
18 |         self.report_tpl_dir = os.path.join(os.path.dirname(__file__),"template")
19 |         self.port_db = sqlite3_db(self.portdb)
20 |         self.dir_db = sqlite3_db(self.dirdb) if self.dirdb else None
21 | 
22 | 
23 |     def create_scan_datajson(self):
24 |         port_rs = self.port_db.queryall("select * from asset")
25 |         ip_port_map = {}
26 |         ip_children = []
27 |         for id, taskid, ip, port, domain, banner, protocol, service, assettype, position, schema in port_rs:
28 |             if self.dir_db:
29 |                 dir_rs = self.dir_db.queryall("select * from fuzztask where taskid={0} and assetid={1}".format(taskid,id))
30 |                 if dir_rs:
31 |                     path_children = []
32 |                     for id,taskid,assetid,url,path,reqcode,banner,reslength,status in dir_rs:
33 |                         path_children.append({"name": "/{0} (code:{1})".format(path, reqcode),"children": [{"name": banner, "type": "path", "value": url}]})
34 |                 else:
35 |                     if schema:
36 |                         path_children = [{"name": "/","children": [{"name": banner, "type": "path", "value":"{schema}://{ip}:{port}".format(schema=schema, ip=ip, port=port)}]}]
37 |                     else:
38 |                         path_children = [{"name": "Unknown", "children": [{"name": base64.b64decode(banner)}]}]
39 |             else:
40 |                 if schema:
41 |                     path_children = [{"name": "/", "children": [{"name": banner, "type": "path","value": "{schema}://{ip}:{port}".format(schema=schema,ip=ip,port=port)}]}]
42 |                 else:
43 |                     path_children = [{"name": "Unknown", "children": [{"name": base64.b64decode(banner)}]}]
44 | 
45 |             if ip_port_map.has_key(ip):
46 |                 if port not in ip_port_map.get(ip):
47 |                     ip_port_map.get(ip).append(port)
48 |                     for x in ip_children:
49 |                         if x.get("name") == ip:
50 |                             x.get("children").append({"name": port, "children": path_children})
51 |                             break
52 |             else:
53 |                 ip_port_map.update({ip:[port]})
54 |                 ip_children.append({"name": ip, "children": [{"name": port, "children": path_children}]})
55 |         datajson = json.dumps({"name":u"结果","children":ip_children})
56 |         return datajson
57 | 
58 |     def report_html(self):
59 |         files = ["index.html","inspector.css","package.json","utils.js"]
60 |         report_files = os.path.join(self.report_dir,"{0}_files".format(time.strftime("%H_%M_%S", time.localtime())))
61 |         if not os.path.exists(report_files):
62 |             os.makedirs(report_files)
63 |         for f in files:
64 |             shutil.copy(os.path.join(self.report_tpl_dir,f),report_files)
65 | 
66 |         jsondata = self.create_scan_datajson()
67 |         update_file_content(os.path.join(report_files,"index.html"),"$$$JSONDATA$$$",jsondata)
68 |         logger.info("scan result: {0}".format(os.path.join(report_files,"index.html")))
69 | 
70 | if __name__ == "__main__":
71 |     dirdb = r"D:\gitproject\assetscan\repertory\2020-06-01\13_49_47.fuzz.db"
72 |     portdb = r"D:\gitproject\assetscan\repertory\2020-06-01\13_49_47.port.db"
73 |     test = Report(portdb,dirdb)
74 |     print test.create_scan_datajson()


--------------------------------------------------------------------------------
/report/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/report/__init__.py


--------------------------------------------------------------------------------
/report/template/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <meta charset="utf-8">
  3 | <title>Collapsible Tree</title>
  4 | <link rel="stylesheet" type="text/css" href="./inspector.css">
  5 | <script src="utils.js" type="application/javascript"></script>
  6 | <script type="module">
  7 |     function define(runtime, observer) {
  8 | 
  9 |         const main = runtime.module();
 10 | 
 11 |         main.variable(observer()).define(["md"], function(md){return(
 12 |             md`# 资产扫描结果：`
 13 |         )});
 14 |         main.variable(observer("chart")).define("chart", ["d3","data","dy","margin","width","dx","tree","diagonal"], function(d3,data,dy,margin,width,dx,tree,diagonal)
 15 |             {
 16 |                 const jsondata = $$$JSONDATA$$$;
 17 | 
 18 |                 const root = d3.hierarchy(jsondata);
 19 | 
 20 |                 root.x0 = dy / 2;
 21 |                 root.y0 = 0;
 22 |                 root.descendants().forEach((d, i) => {
 23 |                     d.id = i;
 24 |                     d._children = d.children;
 25 |                     // if (d.depth && d.data.name.length !== 7) d.children = null;
 26 |                 });
 27 | 
 28 |                 const svg = d3.create("svg")
 29 |                     .attr("viewBox", [-margin.left, -margin.top, width, dx])
 30 |                     .style("font", "10px sans-serif")
 31 |                     .style("user-select", "none");
 32 | 
 33 |                 const gLink = svg.append("g")
 34 |                     .attr("fill", "none")
 35 |                     .attr("stroke", "#555")
 36 |                     .attr("stroke-opacity", 0.4)
 37 |                     .attr("stroke-width", 1.5);
 38 | 
 39 |                 const gNode = svg.append("g")
 40 |                     .attr("cursor", "pointer")
 41 |                     .attr("pointer-events", "all");
 42 |                 
 43 |                 function modify_mynode(d) {
 44 |                     if (d.data.type === "path"){
 45 |                         return window.atob(d.data.name);
 46 |                     }else{
 47 |                         return d.data.name
 48 |                     }
 49 |                 }
 50 |                 function update(source) {
 51 |                     const duration = d3.event && d3.event.altKey ? 2500 : 250;
 52 |                     const nodes = root.descendants().reverse();
 53 |                     const links = root.links();
 54 | 
 55 |                     // Compute the new tree layout.
 56 |                     tree(root);
 57 | 
 58 |                     let left = root;
 59 |                     let right = root;
 60 |                     root.eachBefore(node => {
 61 |                         if (node.x < left.x) left = node;
 62 |                         if (node.x > right.x) right = node;
 63 |                     });
 64 | 
 65 |                     const height = right.x - left.x + margin.top + margin.bottom;
 66 | 
 67 |                     const transition = svg.transition()
 68 |                         .duration(duration)
 69 |                         .attr("viewBox", [-margin.left, left.x - margin.top, width, height])
 70 |                         .tween("resize", window.ResizeObserver ? null : () => () => svg.dispatch("toggle"));
 71 | 
 72 |                     // Update the nodes…
 73 |                     const node = gNode.selectAll("g")
 74 |                         .data(nodes, d => d.id);
 75 | 
 76 |                     // Enter any new nodes at the parent's previous position.
 77 |                     const nodeEnter = node.enter().append("g")
 78 |                         .attr("transform", d => `translate(${source.y0},${source.x0})`)
 79 |                         .attr("fill-opacity", 0)
 80 |                         .attr("stroke-opacity", 0)
 81 |                         .on("click", d => {
 82 |                             if(d.data.type === "path"){
 83 |                                 window.open(d.data.value);
 84 |                             }
 85 |                             d.children = d.children ? null : d._children;
 86 |                             update(d);
 87 |                         });
 88 | 
 89 |                     nodeEnter.append("circle")
 90 |                         .attr("r", 2.5)
 91 |                         .attr("fill", d => d._children ? "#555" : "#999")
 92 |                         .attr("stroke-width", 10);
 93 | 
 94 |                     nodeEnter.append("text")
 95 |                         .attr("dy", "0.31em")
 96 |                         .attr("x", d => d._children ? -6 : 6)
 97 |                         .attr("text-anchor", d => d._children ? "end" : "start")
 98 |                         .text(d => modify_mynode(d))
 99 |                         .clone(true).lower()
100 |                         .attr("stroke-linejoin", "round")
101 |                         .attr("stroke-width", 3)
102 |                         .attr("stroke", "white");
103 | 
104 |                     // Transition nodes to their new position.
105 |                     const nodeUpdate = node.merge(nodeEnter).transition(transition)
106 |                         .attr("transform", d => `translate(${d.y},${d.x})`)
107 |                         .attr("fill-opacity", 1)
108 |                         .attr("stroke-opacity", 1);
109 | 
110 |                     // Transition exiting nodes to the parent's new position.
111 |                     const nodeExit = node.exit().transition(transition).remove()
112 |                         .attr("transform", d => `translate(${source.y},${source.x})`)
113 |                         .attr("fill-opacity", 0)
114 |                         .attr("stroke-opacity", 0);
115 | 
116 |                     // Update the links…
117 |                     const link = gLink.selectAll("path")
118 |                         .data(links, d => d.target.id);
119 | 
120 |                     // Enter any new links at the parent's previous position.
121 |                     const linkEnter = link.enter().append("path")
122 |                         .attr("d", d => {
123 |                             const o = {x: source.x0, y: source.y0};
124 |                             return diagonal({source: o, target: o});
125 |                         });
126 | 
127 |                     // Transition links to their new position.
128 |                     link.merge(linkEnter).transition(transition)
129 |                         .attr("d", diagonal);
130 | 
131 |                     // Transition exiting nodes to the parent's new position.
132 |                     link.exit().transition(transition).remove()
133 |                         .attr("d", d => {
134 |                             const o = {x: source.x, y: source.y};
135 |                             return diagonal({source: o, target: o});
136 |                         });
137 | 
138 |                     // Stash the old positions for transition.
139 |                     root.eachBefore(d => {
140 |                         d.x0 = d.x;
141 |                         d.y0 = d.y;
142 |                     });
143 |                 }
144 | 
145 |                 update(root);
146 | 
147 |                 return svg.node();
148 |             }
149 |         );
150 |         main.variable(observer("diagonal")).define("diagonal", ["d3"], function(d3){return(
151 |             d3.linkHorizontal().x(d => d.y).y(d => d.x)
152 |         )});
153 |         main.variable(observer("tree")).define("tree", ["d3","dx","dy"], function(d3,dx,dy){return(
154 |             d3.tree().nodeSize([dx, dy])
155 |         )});
156 |         main.variable(observer("data")).define("data", ["FileAttachment"], function(FileAttachment){return(
157 |             FileAttachment("flare-2.json").json()
158 |         )});
159 |         main.variable(observer("dx")).define("dx", function(){return(
160 |             10
161 |         )});
162 |         main.variable(observer("dy")).define("dy", ["width"], function(width){return(
163 |             width / 6
164 |         )});
165 |         main.variable(observer("margin")).define("margin", function(){return(
166 |             {top: 10, right: 120, bottom: 10, left: 40}
167 |         )});
168 |         main.variable(observer("d3")).define("d3", ["require"], function(require){return(
169 |             require("d3@5")
170 |         )});
171 |         return main;
172 |     }
173 | var Inspector = ue;
174 | var Library = rt;
175 | var Runtime = St;
176 | var RuntimeError = it;
177 | const runtime = new Runtime();
178 | const main = runtime.module(define, Inspector.into(document.body));
179 | const arr =  document.getElementsByClassName("observablehq");
180 | for(let i = 0; i < arr.length; i++) {
181 |     if(i>=2){
182 |         arr[i].style.display = "none";
183 |     }
184 | }
185 | </script>
186 | 


--------------------------------------------------------------------------------
/report/template/inspector.css:
--------------------------------------------------------------------------------
1 | :root{--syntax_normal:#1b1e23;--syntax_comment:#a9b0bc;--syntax_number:#20a5ba;--syntax_keyword:#c30771;--syntax_atom:#10a778;--syntax_string:#008ec4;--syntax_error:#ffbedc;--syntax_unknown_variable:#838383;--syntax_known_variable:#005f87;--syntax_matchbracket:#20bbfc;--syntax_key:#6636b4;--mono_fonts:82%/1.5 Menlo,Consolas,monospace}.observablehq--collapsed,.observablehq--expanded,.observablehq--function,.observablehq--gray,.observablehq--import,.observablehq--string:after,.observablehq--string:before{color:var(--syntax_normal)}.observablehq--collapsed,.observablehq--inspect a{cursor:pointer}.observablehq--field{text-indent:-1em;margin-left:1em}.observablehq--empty{color:var(--syntax_comment)}.observablehq--blue,.observablehq--keyword{color:#3182bd}.observablehq--forbidden,.observablehq--pink{color:#e377c2}.observablehq--orange{color:#e6550d}.observablehq--boolean,.observablehq--null,.observablehq--undefined{color:var(--syntax_atom)}.observablehq--bigint,.observablehq--date,.observablehq--green,.observablehq--number,.observablehq--regexp,.observablehq--symbol{color:var(--syntax_number)}.observablehq--index,.observablehq--key{color:var(--syntax_key)}.observablehq--prototype-key{color:#aaa}.observablehq--empty{font-style:oblique}.observablehq--purple,.observablehq--string{color:var(--syntax_string)}.observablehq--error,.observablehq--red{color:#e7040f}.observablehq--inspect{font:var(--mono_fonts);overflow-x:auto;display:block;white-space:pre}.observablehq--error .observablehq--inspect{word-break:break-all;white-space:pre-wrap}


--------------------------------------------------------------------------------
/report/template/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@d3/collapsible-tree",
 3 |   "main": "main.js",
 4 |   "version": "353.0.0",
 5 |   "homepage": "https://observablehq.com/@d3/collapsible-tree",
 6 |   "author": {
 7 |     "name": "D3",
 8 |     "url": "https://observablehq.com/@d3"
 9 |   },
10 |   "type": "module",
11 |   "peerDependencies": {
12 |     "@observablehq/runtime": "4"
13 |   }
14 | }


--------------------------------------------------------------------------------
/reportrs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/reportrs.jpg


--------------------------------------------------------------------------------
/result.txt:
--------------------------------------------------------------------------------
1 | https://222.175.107.35:443      Welcome to OpenResty!
2 | https://www.bobdirectbank.com:443      北京银行直销银行
3 | http://www.bobdirectbank.com:80      北京银行直销银行
4 | 


--------------------------------------------------------------------------------
/scanrs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/scanrs.jpg


--------------------------------------------------------------------------------
/thirdparty/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/__init__.py


--------------------------------------------------------------------------------
/thirdparty/chardet/__init__.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # This library is free software; you can redistribute it and/or
 3 | # modify it under the terms of the GNU Lesser General Public
 4 | # License as published by the Free Software Foundation; either
 5 | # version 2.1 of the License, or (at your option) any later version.
 6 | #
 7 | # This library is distributed in the hope that it will be useful,
 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301  USA
16 | ######################### END LICENSE BLOCK #########################
17 | 
18 | 
19 | from .compat import PY2, PY3
20 | from .universaldetector import UniversalDetector
21 | from .version import __version__, VERSION
22 | 
23 | 
24 | def detect(byte_str):
25 |     """
26 |     Detect the encoding of the given byte string.
27 | 
28 |     :param byte_str:     The byte sequence to examine.
29 |     :type byte_str:      ``bytes`` or ``bytearray``
30 |     """
31 |     if not isinstance(byte_str, bytearray):
32 |         if not isinstance(byte_str, bytes):
33 |             raise TypeError('Expected object of type bytes or bytearray, got: '
34 |                             '{0}'.format(type(byte_str)))
35 |         else:
36 |             byte_str = bytearray(byte_str)
37 |     detector = UniversalDetector()
38 |     detector.feed(byte_str)
39 |     return detector.close()
40 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/big5prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Communicator client code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import BIG5_SM_MODEL
32 | 
33 | 
34 | class Big5Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(Big5Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(BIG5_SM_MODEL)
38 |         self.distribution_analyzer = Big5DistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "Big5"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Chinese"
48 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Communicator client code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .enums import ProbingState
 29 | from .charsetprober import CharSetProber
 30 | 
 31 | 
 32 | class CharSetGroupProber(CharSetProber):
 33 |     def __init__(self, lang_filter=None):
 34 |         super(CharSetGroupProber, self).__init__(lang_filter=lang_filter)
 35 |         self._active_num = 0
 36 |         self.probers = []
 37 |         self._best_guess_prober = None
 38 | 
 39 |     def reset(self):
 40 |         super(CharSetGroupProber, self).reset()
 41 |         self._active_num = 0
 42 |         for prober in self.probers:
 43 |             if prober:
 44 |                 prober.reset()
 45 |                 prober.active = True
 46 |                 self._active_num += 1
 47 |         self._best_guess_prober = None
 48 | 
 49 |     @property
 50 |     def charset_name(self):
 51 |         if not self._best_guess_prober:
 52 |             self.get_confidence()
 53 |             if not self._best_guess_prober:
 54 |                 return None
 55 |         return self._best_guess_prober.charset_name
 56 | 
 57 |     @property
 58 |     def language(self):
 59 |         if not self._best_guess_prober:
 60 |             self.get_confidence()
 61 |             if not self._best_guess_prober:
 62 |                 return None
 63 |         return self._best_guess_prober.language
 64 | 
 65 |     def feed(self, byte_str):
 66 |         for prober in self.probers:
 67 |             if not prober:
 68 |                 continue
 69 |             if not prober.active:
 70 |                 continue
 71 |             state = prober.feed(byte_str)
 72 |             if not state:
 73 |                 continue
 74 |             if state == ProbingState.FOUND_IT:
 75 |                 self._best_guess_prober = prober
 76 |                 return self.state
 77 |             elif state == ProbingState.NOT_ME:
 78 |                 prober.active = False
 79 |                 self._active_num -= 1
 80 |                 if self._active_num <= 0:
 81 |                     self._state = ProbingState.NOT_ME
 82 |                     return self.state
 83 |         return self.state
 84 | 
 85 |     def get_confidence(self):
 86 |         state = self.state
 87 |         if state == ProbingState.FOUND_IT:
 88 |             return 0.99
 89 |         elif state == ProbingState.NOT_ME:
 90 |             return 0.01
 91 |         best_conf = 0.0
 92 |         self._best_guess_prober = None
 93 |         for prober in self.probers:
 94 |             if not prober:
 95 |                 continue
 96 |             if not prober.active:
 97 |                 self.logger.debug('%s not active', prober.charset_name)
 98 |                 continue
 99 |             conf = prober.get_confidence()
100 |             self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf)
101 |             if best_conf < conf:
102 |                 best_conf = conf
103 |                 self._best_guess_prober = prober
104 |         if not self._best_guess_prober:
105 |             return 0.0
106 |         return best_conf
107 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/charsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | import logging
 30 | import re
 31 | 
 32 | from .enums import ProbingState
 33 | 
 34 | 
 35 | class CharSetProber(object):
 36 | 
 37 |     SHORTCUT_THRESHOLD = 0.95
 38 | 
 39 |     def __init__(self, lang_filter=None):
 40 |         self._state = None
 41 |         self.lang_filter = lang_filter
 42 |         self.logger = logging.getLogger(__name__)
 43 | 
 44 |     def reset(self):
 45 |         self._state = ProbingState.DETECTING
 46 | 
 47 |     @property
 48 |     def charset_name(self):
 49 |         return None
 50 | 
 51 |     def feed(self, buf):
 52 |         pass
 53 | 
 54 |     @property
 55 |     def state(self):
 56 |         return self._state
 57 | 
 58 |     def get_confidence(self):
 59 |         return 0.0
 60 | 
 61 |     @staticmethod
 62 |     def filter_high_byte_only(buf):
 63 |         buf = re.sub(b'([\x00-\x7F])+', b' ', buf)
 64 |         return buf
 65 | 
 66 |     @staticmethod
 67 |     def filter_international_words(buf):
 68 |         """
 69 |         We define three types of bytes:
 70 |         alphabet: english alphabets [a-zA-Z]
 71 |         international: international characters [\x80-\xFF]
 72 |         marker: everything else [^a-zA-Z\x80-\xFF]
 73 | 
 74 |         The input buffer can be thought to contain a series of words delimited
 75 |         by markers. This function works to filter all words that contain at
 76 |         least one international character. All contiguous sequences of markers
 77 |         are replaced by a single space ascii character.
 78 | 
 79 |         This filter applies to all scripts which do not use English characters.
 80 |         """
 81 |         filtered = bytearray()
 82 | 
 83 |         # This regex expression filters out only words that have at-least one
 84 |         # international character. The word may include one marker character at
 85 |         # the end.
 86 |         words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?',
 87 |                            buf)
 88 | 
 89 |         for word in words:
 90 |             filtered.extend(word[:-1])
 91 | 
 92 |             # If the last character in the word is a marker, replace it with a
 93 |             # space as markers shouldn't affect our analysis (they are used
 94 |             # similarly across all languages and may thus have similar
 95 |             # frequencies).
 96 |             last_char = word[-1:]
 97 |             if not last_char.isalpha() and last_char < b'\x80':
 98 |                 last_char = b' '
 99 |             filtered.extend(last_char)
100 | 
101 |         return filtered
102 | 
103 |     @staticmethod
104 |     def filter_with_english_letters(buf):
105 |         """
106 |         Returns a copy of ``buf`` that retains only the sequences of English
107 |         alphabet and high byte characters that are not between <> characters.
108 |         Also retains English alphabet and high byte characters immediately
109 |         before occurrences of >.
110 | 
111 |         This filter can be applied to all scripts which contain both English
112 |         characters and extended ASCII characters, but is currently only used by
113 |         ``Latin1Prober``.
114 |         """
115 |         filtered = bytearray()
116 |         in_tag = False
117 |         prev = 0
118 | 
119 |         for curr in range(len(buf)):
120 |             # Slice here to get bytes instead of an int with Python 3
121 |             buf_char = buf[curr:curr + 1]
122 |             # Check if we're coming out of or entering an HTML tag
123 |             if buf_char == b'>':
124 |                 in_tag = False
125 |             elif buf_char == b'<':
126 |                 in_tag = True
127 | 
128 |             # If current character is not extended-ASCII and not alphabetic...
129 |             if buf_char < b'\x80' and not buf_char.isalpha():
130 |                 # ...and we're not in a tag
131 |                 if curr > prev and not in_tag:
132 |                     # Keep everything after last non-extended-ASCII,
133 |                     # non-alphabetic character
134 |                     filtered.extend(buf[prev:curr])
135 |                     # Output a space to delimit stretch we kept
136 |                     filtered.extend(b' ')
137 |                 prev = curr + 1
138 | 
139 |         # If we're not in a tag...
140 |         if not in_tag:
141 |             # Keep everything after last non-extended-ASCII, non-alphabetic
142 |             # character
143 |             filtered.extend(buf[prev:])
144 | 
145 |         return filtered
146 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/cli/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/cli/chardetect.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script which takes one or more file paths and reports on their detected
 4 | encodings
 5 | 
 6 | Example::
 7 | 
 8 |     % chardetect somefile someotherfile
 9 |     somefile: windows-1252 with confidence 0.5
10 |     someotherfile: ascii with confidence 1.0
11 | 
12 | If no paths are provided, it takes its input from stdin.
13 | 
14 | """
15 | 
16 | from __future__ import absolute_import, print_function, unicode_literals
17 | 
18 | import argparse
19 | import sys
20 | 
21 | from thirdparty.chardet import __version__
22 | from thirdparty.chardet import PY2
23 | from thirdparty.chardet import UniversalDetector
24 | 
25 | 
26 | def description_of(lines, name='stdin'):
27 |     """
28 |     Return a string describing the probable encoding of a file or
29 |     list of strings.
30 | 
31 |     :param lines: The lines to get the encoding of.
32 |     :type lines: Iterable of bytes
33 |     :param name: Name of file or collection of lines
34 |     :type name: str
35 |     """
36 |     u = UniversalDetector()
37 |     for line in lines:
38 |         line = bytearray(line)
39 |         u.feed(line)
40 |         # shortcut out of the loop to save reading further - particularly useful if we read a BOM.
41 |         if u.done:
42 |             break
43 |     u.close()
44 |     result = u.result
45 |     if PY2:
46 |         name = name.decode(sys.getfilesystemencoding(), 'ignore')
47 |     if result['encoding']:
48 |         return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
49 |                                                      result['confidence'])
50 |     else:
51 |         return '{0}: no result'.format(name)
52 | 
53 | 
54 | def main(argv=None):
55 |     """
56 |     Handles command line arguments and gets things started.
57 | 
58 |     :param argv: List of arguments, as if specified on the command-line.
59 |                  If None, ``sys.argv[1:]`` is used instead.
60 |     :type argv: list of str
61 |     """
62 |     # Get command line arguments
63 |     parser = argparse.ArgumentParser(
64 |         description="Takes one or more file paths and reports their detected \
65 |                      encodings")
66 |     parser.add_argument('input',
67 |                         help='File whose encoding we would like to determine. \
68 |                               (default: stdin)',
69 |                         type=argparse.FileType('rb'), nargs='*',
70 |                         default=[sys.stdin if PY2 else sys.stdin.buffer])
71 |     parser.add_argument('--version', action='version',
72 |                         version='%(prog)s {0}'.format(__version__))
73 |     args = parser.parse_args(argv)
74 | 
75 |     for f in args.input:
76 |         if f.isatty():
77 |             print("You are running chardetect interactively. Press " +
78 |                   "CTRL-D twice at the start of a blank line to signal the " +
79 |                   "end of your input. If you want help, run chardetect " +
80 |                   "--help\n", file=sys.stderr)
81 |         print(description_of(f, f.name))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main()
86 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | import logging
29 | 
30 | from .enums import MachineState
31 | 
32 | 
33 | class CodingStateMachine(object):
34 |     """
35 |     A state machine to verify a byte sequence for a particular encoding. For
36 |     each byte the detector receives, it will feed that byte to every active
37 |     state machine available, one byte at a time. The state machine changes its
38 |     state based on its previous state and the byte it receives. There are 3
39 |     states in a state machine that are of interest to an auto-detector:
40 | 
41 |     START state: This is the state to start with, or a legal byte sequence
42 |                  (i.e. a valid code point) for character has been identified.
43 | 
44 |     ME state:  This indicates that the state machine identified a byte sequence
45 |                that is specific to the charset it is designed for and that
46 |                there is no other possible encoding which can contain this byte
47 |                sequence. This will to lead to an immediate positive answer for
48 |                the detector.
49 | 
50 |     ERROR state: This indicates the state machine identified an illegal byte
51 |                  sequence for that encoding. This will lead to an immediate
52 |                  negative answer for this encoding. Detector will exclude this
53 |                  encoding from consideration from here on.
54 |     """
55 |     def __init__(self, sm):
56 |         self._model = sm
57 |         self._curr_byte_pos = 0
58 |         self._curr_char_len = 0
59 |         self._curr_state = None
60 |         self.logger = logging.getLogger(__name__)
61 |         self.reset()
62 | 
63 |     def reset(self):
64 |         self._curr_state = MachineState.START
65 | 
66 |     def next_state(self, c):
67 |         # for each byte we get its class
68 |         # if it is first byte, we also get byte length
69 |         byte_class = self._model['class_table'][c]
70 |         if self._curr_state == MachineState.START:
71 |             self._curr_byte_pos = 0
72 |             self._curr_char_len = self._model['char_len_table'][byte_class]
73 |         # from byte's class and state_table, we get its next state
74 |         curr_state = (self._curr_state * self._model['class_factor']
75 |                       + byte_class)
76 |         self._curr_state = self._model['state_table'][curr_state]
77 |         self._curr_byte_pos += 1
78 |         return self._curr_state
79 | 
80 |     def get_current_charlen(self):
81 |         return self._curr_char_len
82 | 
83 |     def get_coding_state_machine(self):
84 |         return self._model['name']
85 | 
86 |     @property
87 |     def language(self):
88 |         return self._model['language']
89 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/compat.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # Contributor(s):
 3 | #   Dan Blanchard
 4 | #   Ian Cordasco
 5 | #
 6 | # This library is free software; you can redistribute it and/or
 7 | # modify it under the terms of the GNU Lesser General Public
 8 | # License as published by the Free Software Foundation; either
 9 | # version 2.1 of the License, or (at your option) any later version.
10 | #
11 | # This library is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 | # Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public
17 | # License along with this library; if not, write to the Free Software
18 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
19 | # 02110-1301  USA
20 | ######################### END LICENSE BLOCK #########################
21 | 
22 | import sys
23 | 
24 | 
25 | if sys.version_info < (3, 0):
26 |     PY2 = True
27 |     PY3 = False
28 |     base_str = (str, unicode)
29 |     text_type = unicode
30 | else:
31 |     PY2 = False
32 |     PY3 = True
33 |     base_str = (bytes, str)
34 |     text_type = str
35 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/cp949prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .chardistribution import EUCKRDistributionAnalysis
29 | from .codingstatemachine import CodingStateMachine
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .mbcssm import CP949_SM_MODEL
32 | 
33 | 
34 | class CP949Prober(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(CP949Prober, self).__init__()
37 |         self.coding_sm = CodingStateMachine(CP949_SM_MODEL)
38 |         # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 |         #       not different.
40 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
41 |         self.reset()
42 | 
43 |     @property
44 |     def charset_name(self):
45 |         return "CP949"
46 | 
47 |     @property
48 |     def language(self):
49 |         return "Korean"
50 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/enums.py:
--------------------------------------------------------------------------------
 1 | """
 2 | All of the Enums that are used throughout the chardet package.
 3 | 
 4 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 5 | """
 6 | 
 7 | 
 8 | class InputState(object):
 9 |     """
10 |     This enum represents the different states a universal detector can be in.
11 |     """
12 |     PURE_ASCII = 0
13 |     ESC_ASCII = 1
14 |     HIGH_BYTE = 2
15 | 
16 | 
17 | class LanguageFilter(object):
18 |     """
19 |     This enum represents the different language filters we can apply to a
20 |     ``UniversalDetector``.
21 |     """
22 |     CHINESE_SIMPLIFIED = 0x01
23 |     CHINESE_TRADITIONAL = 0x02
24 |     JAPANESE = 0x04
25 |     KOREAN = 0x08
26 |     NON_CJK = 0x10
27 |     ALL = 0x1F
28 |     CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
29 |     CJK = CHINESE | JAPANESE | KOREAN
30 | 
31 | 
32 | class ProbingState(object):
33 |     """
34 |     This enum represents the different states a prober can be in.
35 |     """
36 |     DETECTING = 0
37 |     FOUND_IT = 1
38 |     NOT_ME = 2
39 | 
40 | 
41 | class MachineState(object):
42 |     """
43 |     This enum represents the different states a state machine can be in.
44 |     """
45 |     START = 0
46 |     ERROR = 1
47 |     ITS_ME = 2
48 | 
49 | 
50 | class SequenceLikelihood(object):
51 |     """
52 |     This enum represents the likelihood of a character following the previous one.
53 |     """
54 |     NEGATIVE = 0
55 |     UNLIKELY = 1
56 |     LIKELY = 2
57 |     POSITIVE = 3
58 | 
59 |     @classmethod
60 |     def get_num_categories(cls):
61 |         """:returns: The number of likelihood categories in the enum."""
62 |         return 4
63 | 
64 | 
65 | class CharacterCategory(object):
66 |     """
67 |     This enum represents the different categories language models for
68 |     ``SingleByteCharsetProber`` put characters into.
69 | 
70 |     Anything less than CONTROL is considered a letter.
71 |     """
72 |     UNDEFINED = 255
73 |     LINE_BREAK = 254
74 |     SYMBOL = 253
75 |     DIGIT = 252
76 |     CONTROL = 251
77 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/escprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is mozilla.org code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 1998
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #
 12 | # This library is free software; you can redistribute it and/or
 13 | # modify it under the terms of the GNU Lesser General Public
 14 | # License as published by the Free Software Foundation; either
 15 | # version 2.1 of the License, or (at your option) any later version.
 16 | #
 17 | # This library is distributed in the hope that it will be useful,
 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 20 | # Lesser General Public License for more details.
 21 | #
 22 | # You should have received a copy of the GNU Lesser General Public
 23 | # License along with this library; if not, write to the Free Software
 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 25 | # 02110-1301  USA
 26 | ######################### END LICENSE BLOCK #########################
 27 | 
 28 | from .charsetprober import CharSetProber
 29 | from .codingstatemachine import CodingStateMachine
 30 | from .enums import LanguageFilter, ProbingState, MachineState
 31 | from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,
 32 |                     ISO2022KR_SM_MODEL)
 33 | 
 34 | 
 35 | class EscCharSetProber(CharSetProber):
 36 |     """
 37 |     This CharSetProber uses a "code scheme" approach for detecting encodings,
 38 |     whereby easily recognizable escape or shift sequences are relied on to
 39 |     identify these encodings.
 40 |     """
 41 | 
 42 |     def __init__(self, lang_filter=None):
 43 |         super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
 44 |         self.coding_sm = []
 45 |         if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
 46 |             self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
 47 |             self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
 48 |         if self.lang_filter & LanguageFilter.JAPANESE:
 49 |             self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
 50 |         if self.lang_filter & LanguageFilter.KOREAN:
 51 |             self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
 52 |         self.active_sm_count = None
 53 |         self._detected_charset = None
 54 |         self._detected_language = None
 55 |         self._state = None
 56 |         self.reset()
 57 | 
 58 |     def reset(self):
 59 |         super(EscCharSetProber, self).reset()
 60 |         for coding_sm in self.coding_sm:
 61 |             if not coding_sm:
 62 |                 continue
 63 |             coding_sm.active = True
 64 |             coding_sm.reset()
 65 |         self.active_sm_count = len(self.coding_sm)
 66 |         self._detected_charset = None
 67 |         self._detected_language = None
 68 | 
 69 |     @property
 70 |     def charset_name(self):
 71 |         return self._detected_charset
 72 | 
 73 |     @property
 74 |     def language(self):
 75 |         return self._detected_language
 76 | 
 77 |     def get_confidence(self):
 78 |         if self._detected_charset:
 79 |             return 0.99
 80 |         else:
 81 |             return 0.00
 82 | 
 83 |     def feed(self, byte_str):
 84 |         for c in byte_str:
 85 |             for coding_sm in self.coding_sm:
 86 |                 if not coding_sm or not coding_sm.active:
 87 |                     continue
 88 |                 coding_state = coding_sm.next_state(c)
 89 |                 if coding_state == MachineState.ERROR:
 90 |                     coding_sm.active = False
 91 |                     self.active_sm_count -= 1
 92 |                     if self.active_sm_count <= 0:
 93 |                         self._state = ProbingState.NOT_ME
 94 |                         return self.state
 95 |                 elif coding_state == MachineState.ITS_ME:
 96 |                     self._state = ProbingState.FOUND_IT
 97 |                     self._detected_charset = coding_sm.get_coding_state_machine()
 98 |                     self._detected_language = coding_sm.language
 99 |                     return self.state
100 | 
101 |         return self.state
102 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .enums import ProbingState, MachineState
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import EUCJPDistributionAnalysis
32 | from .jpcntx import EUCJPContextAnalysis
33 | from .mbcssm import EUCJP_SM_MODEL
34 | 
35 | 
36 | class EUCJPProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(EUCJPProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL)
40 |         self.distribution_analyzer = EUCJPDistributionAnalysis()
41 |         self.context_analyzer = EUCJPContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(EUCJPProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return "EUC-JP"
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
59 |             coding_state = self.coding_sm.next_state(byte_str[i])
60 |             if coding_state == MachineState.ERROR:
61 |                 self.logger.debug('%s %s prober hit error at byte %s',
62 |                                   self.charset_name, self.language, i)
63 |                 self._state = ProbingState.NOT_ME
64 |                 break
65 |             elif coding_state == MachineState.ITS_ME:
66 |                 self._state = ProbingState.FOUND_IT
67 |                 break
68 |             elif coding_state == MachineState.START:
69 |                 char_len = self.coding_sm.get_current_charlen()
70 |                 if i == 0:
71 |                     self._last_char[1] = byte_str[0]
72 |                     self.context_analyzer.feed(self._last_char, char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i - 1:i + 1],
76 |                                                 char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                      char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/euckrprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKR_SM_MODEL
32 | 
33 | 
34 | class EUCKRProber(MultiByteCharSetProber):
35 |     def __init__(self):
36 |         super(EUCKRProber, self).__init__()
37 |         self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL)
38 |         self.distribution_analyzer = EUCKRDistributionAnalysis()
39 |         self.reset()
40 | 
41 |     @property
42 |     def charset_name(self):
43 |         return "EUC-KR"
44 | 
45 |     @property
46 |     def language(self):
47 |         return "Korean"
48 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/euctwprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTW_SM_MODEL
32 | 
33 | class EUCTWProber(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(EUCTWProber, self).__init__()
36 |         self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL)
37 |         self.distribution_analyzer = EUCTWDistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "EUC-TW"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Taiwan"
47 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312_SM_MODEL
32 | 
33 | class GB2312Prober(MultiByteCharSetProber):
34 |     def __init__(self):
35 |         super(GB2312Prober, self).__init__()
36 |         self.coding_sm = CodingStateMachine(GB2312_SM_MODEL)
37 |         self.distribution_analyzer = GB2312DistributionAnalysis()
38 |         self.reset()
39 | 
40 |     @property
41 |     def charset_name(self):
42 |         return "GB2312"
43 | 
44 |     @property
45 |     def language(self):
46 |         return "Chinese"
47 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/latin1prober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import ProbingState
 31 | 
 32 | FREQ_CAT_NUM = 4
 33 | 
 34 | UDF = 0  # undefined
 35 | OTH = 1  # other
 36 | ASC = 2  # ascii capital letter
 37 | ASS = 3  # ascii small letter
 38 | ACV = 4  # accent capital vowel
 39 | ACO = 5  # accent capital other
 40 | ASV = 6  # accent small vowel
 41 | ASO = 7  # accent small other
 42 | CLASS_NUM = 8  # total classes
 43 | 
 44 | Latin1_CharToClass = (
 45 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 00 - 07
 46 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 08 - 0F
 47 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 10 - 17
 48 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 18 - 1F
 49 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 20 - 27
 50 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 28 - 2F
 51 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 30 - 37
 52 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 38 - 3F
 53 |     OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 40 - 47
 54 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 48 - 4F
 55 |     ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC,   # 50 - 57
 56 |     ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH,   # 58 - 5F
 57 |     OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 60 - 67
 58 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 68 - 6F
 59 |     ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS,   # 70 - 77
 60 |     ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH,   # 78 - 7F
 61 |     OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH,   # 80 - 87
 62 |     OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF,   # 88 - 8F
 63 |     UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # 90 - 97
 64 |     OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO,   # 98 - 9F
 65 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A0 - A7
 66 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # A8 - AF
 67 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B0 - B7
 68 |     OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH,   # B8 - BF
 69 |     ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO,   # C0 - C7
 70 |     ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV,   # C8 - CF
 71 |     ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH,   # D0 - D7
 72 |     ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO,   # D8 - DF
 73 |     ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO,   # E0 - E7
 74 |     ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV,   # E8 - EF
 75 |     ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH,   # F0 - F7
 76 |     ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO,   # F8 - FF
 77 | )
 78 | 
 79 | # 0 : illegal
 80 | # 1 : very unlikely
 81 | # 2 : normal
 82 | # 3 : very likely
 83 | Latin1ClassModel = (
 84 | # UDF OTH ASC ASS ACV ACO ASV ASO
 85 |     0,  0,  0,  0,  0,  0,  0,  0,  # UDF
 86 |     0,  3,  3,  3,  3,  3,  3,  3,  # OTH
 87 |     0,  3,  3,  3,  3,  3,  3,  3,  # ASC
 88 |     0,  3,  3,  3,  1,  1,  3,  3,  # ASS
 89 |     0,  3,  3,  3,  1,  2,  1,  2,  # ACV
 90 |     0,  3,  3,  3,  3,  3,  3,  3,  # ACO
 91 |     0,  3,  1,  3,  1,  1,  1,  3,  # ASV
 92 |     0,  3,  1,  3,  1,  1,  3,  3,  # ASO
 93 | )
 94 | 
 95 | 
 96 | class Latin1Prober(CharSetProber):
 97 |     def __init__(self):
 98 |         super(Latin1Prober, self).__init__()
 99 |         self._last_char_class = None
100 |         self._freq_counter = None
101 |         self.reset()
102 | 
103 |     def reset(self):
104 |         self._last_char_class = OTH
105 |         self._freq_counter = [0] * FREQ_CAT_NUM
106 |         CharSetProber.reset(self)
107 | 
108 |     @property
109 |     def charset_name(self):
110 |         return "ISO-8859-1"
111 | 
112 |     @property
113 |     def language(self):
114 |         return ""
115 | 
116 |     def feed(self, byte_str):
117 |         byte_str = self.filter_with_english_letters(byte_str)
118 |         for c in byte_str:
119 |             char_class = Latin1_CharToClass[c]
120 |             freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
121 |                                     + char_class]
122 |             if freq == 0:
123 |                 self._state = ProbingState.NOT_ME
124 |                 break
125 |             self._freq_counter[freq] += 1
126 |             self._last_char_class = char_class
127 | 
128 |         return self.state
129 | 
130 |     def get_confidence(self):
131 |         if self.state == ProbingState.NOT_ME:
132 |             return 0.01
133 | 
134 |         total = sum(self._freq_counter)
135 |         if total < 0.01:
136 |             confidence = 0.0
137 |         else:
138 |             confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0)
139 |                           / total)
140 |         if confidence < 0.0:
141 |             confidence = 0.0
142 |         # lower the confidence of latin1 so that other more accurate
143 |         # detector can take priority.
144 |         confidence = confidence * 0.73
145 |         return confidence
146 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetprober import CharSetProber
31 | from .enums import ProbingState, MachineState
32 | 
33 | 
34 | class MultiByteCharSetProber(CharSetProber):
35 |     """
36 |     MultiByteCharSetProber
37 |     """
38 | 
39 |     def __init__(self, lang_filter=None):
40 |         super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter)
41 |         self.distribution_analyzer = None
42 |         self.coding_sm = None
43 |         self._last_char = [0, 0]
44 | 
45 |     def reset(self):
46 |         super(MultiByteCharSetProber, self).reset()
47 |         if self.coding_sm:
48 |             self.coding_sm.reset()
49 |         if self.distribution_analyzer:
50 |             self.distribution_analyzer.reset()
51 |         self._last_char = [0, 0]
52 | 
53 |     @property
54 |     def charset_name(self):
55 |         raise NotImplementedError
56 | 
57 |     @property
58 |     def language(self):
59 |         raise NotImplementedError
60 | 
61 |     def feed(self, byte_str):
62 |         for i in range(len(byte_str)):
63 |             coding_state = self.coding_sm.next_state(byte_str[i])
64 |             if coding_state == MachineState.ERROR:
65 |                 self.logger.debug('%s %s prober hit error at byte %s',
66 |                                   self.charset_name, self.language, i)
67 |                 self._state = ProbingState.NOT_ME
68 |                 break
69 |             elif coding_state == MachineState.ITS_ME:
70 |                 self._state = ProbingState.FOUND_IT
71 |                 break
72 |             elif coding_state == MachineState.START:
73 |                 char_len = self.coding_sm.get_current_charlen()
74 |                 if i == 0:
75 |                     self._last_char[1] = byte_str[0]
76 |                     self.distribution_analyzer.feed(self._last_char, char_len)
77 |                 else:
78 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
79 |                                                     char_len)
80 | 
81 |         self._last_char[0] = byte_str[-1]
82 | 
83 |         if self.state == ProbingState.DETECTING:
84 |             if (self.distribution_analyzer.got_enough_data() and
85 |                     (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
86 |                 self._state = ProbingState.FOUND_IT
87 | 
88 |         return self.state
89 | 
90 |     def get_confidence(self):
91 |         return self.distribution_analyzer.get_confidence()
92 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #   Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301  USA
28 | ######################### END LICENSE BLOCK #########################
29 | 
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 | 
40 | 
41 | class MBCSGroupProber(CharSetGroupProber):
42 |     def __init__(self, lang_filter=None):
43 |         super(MBCSGroupProber, self).__init__(lang_filter=lang_filter)
44 |         self.probers = [
45 |             UTF8Prober(),
46 |             SJISProber(),
47 |             EUCJPProber(),
48 |             GB2312Prober(),
49 |             EUCKRProber(),
50 |             CP949Prober(),
51 |             Big5Prober(),
52 |             EUCTWProber()
53 |         ]
54 |         self.reset()
55 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/sbcharsetprober.py:
--------------------------------------------------------------------------------
  1 | ######################## BEGIN LICENSE BLOCK ########################
  2 | # The Original Code is Mozilla Universal charset detector code.
  3 | #
  4 | # The Initial Developer of the Original Code is
  5 | # Netscape Communications Corporation.
  6 | # Portions created by the Initial Developer are Copyright (C) 2001
  7 | # the Initial Developer. All Rights Reserved.
  8 | #
  9 | # Contributor(s):
 10 | #   Mark Pilgrim - port to Python
 11 | #   Shy Shalom - original C code
 12 | #
 13 | # This library is free software; you can redistribute it and/or
 14 | # modify it under the terms of the GNU Lesser General Public
 15 | # License as published by the Free Software Foundation; either
 16 | # version 2.1 of the License, or (at your option) any later version.
 17 | #
 18 | # This library is distributed in the hope that it will be useful,
 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 21 | # Lesser General Public License for more details.
 22 | #
 23 | # You should have received a copy of the GNU Lesser General Public
 24 | # License along with this library; if not, write to the Free Software
 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 26 | # 02110-1301  USA
 27 | ######################### END LICENSE BLOCK #########################
 28 | 
 29 | from .charsetprober import CharSetProber
 30 | from .enums import CharacterCategory, ProbingState, SequenceLikelihood
 31 | 
 32 | 
 33 | class SingleByteCharSetProber(CharSetProber):
 34 |     SAMPLE_SIZE = 64
 35 |     SB_ENOUGH_REL_THRESHOLD = 1024  #  0.25 * SAMPLE_SIZE^2
 36 |     POSITIVE_SHORTCUT_THRESHOLD = 0.95
 37 |     NEGATIVE_SHORTCUT_THRESHOLD = 0.05
 38 | 
 39 |     def __init__(self, model, reversed=False, name_prober=None):
 40 |         super(SingleByteCharSetProber, self).__init__()
 41 |         self._model = model
 42 |         # TRUE if we need to reverse every pair in the model lookup
 43 |         self._reversed = reversed
 44 |         # Optional auxiliary prober for name decision
 45 |         self._name_prober = name_prober
 46 |         self._last_order = None
 47 |         self._seq_counters = None
 48 |         self._total_seqs = None
 49 |         self._total_char = None
 50 |         self._freq_char = None
 51 |         self.reset()
 52 | 
 53 |     def reset(self):
 54 |         super(SingleByteCharSetProber, self).reset()
 55 |         # char order of last character
 56 |         self._last_order = 255
 57 |         self._seq_counters = [0] * SequenceLikelihood.get_num_categories()
 58 |         self._total_seqs = 0
 59 |         self._total_char = 0
 60 |         # characters that fall in our sampling range
 61 |         self._freq_char = 0
 62 | 
 63 |     @property
 64 |     def charset_name(self):
 65 |         if self._name_prober:
 66 |             return self._name_prober.charset_name
 67 |         else:
 68 |             return self._model['charset_name']
 69 | 
 70 |     @property
 71 |     def language(self):
 72 |         if self._name_prober:
 73 |             return self._name_prober.language
 74 |         else:
 75 |             return self._model.get('language')
 76 | 
 77 |     def feed(self, byte_str):
 78 |         if not self._model['keep_english_letter']:
 79 |             byte_str = self.filter_international_words(byte_str)
 80 |         if not byte_str:
 81 |             return self.state
 82 |         char_to_order_map = self._model['char_to_order_map']
 83 |         for i, c in enumerate(byte_str):
 84 |             # XXX: Order is in range 1-64, so one would think we want 0-63 here,
 85 |             #      but that leads to 27 more test failures than before.
 86 |             order = char_to_order_map[c]
 87 |             # XXX: This was SYMBOL_CAT_ORDER before, with a value of 250, but
 88 |             #      CharacterCategory.SYMBOL is actually 253, so we use CONTROL
 89 |             #      to make it closer to the original intent. The only difference
 90 |             #      is whether or not we count digits and control characters for
 91 |             #      _total_char purposes.
 92 |             if order < CharacterCategory.CONTROL:
 93 |                 self._total_char += 1
 94 |             if order < self.SAMPLE_SIZE:
 95 |                 self._freq_char += 1
 96 |                 if self._last_order < self.SAMPLE_SIZE:
 97 |                     self._total_seqs += 1
 98 |                     if not self._reversed:
 99 |                         i = (self._last_order * self.SAMPLE_SIZE) + order
100 |                         model = self._model['precedence_matrix'][i]
101 |                     else:  # reverse the order of the letters in the lookup
102 |                         i = (order * self.SAMPLE_SIZE) + self._last_order
103 |                         model = self._model['precedence_matrix'][i]
104 |                     self._seq_counters[model] += 1
105 |             self._last_order = order
106 | 
107 |         charset_name = self._model['charset_name']
108 |         if self.state == ProbingState.DETECTING:
109 |             if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD:
110 |                 confidence = self.get_confidence()
111 |                 if confidence > self.POSITIVE_SHORTCUT_THRESHOLD:
112 |                     self.logger.debug('%s confidence = %s, we have a winner',
113 |                                       charset_name, confidence)
114 |                     self._state = ProbingState.FOUND_IT
115 |                 elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD:
116 |                     self.logger.debug('%s confidence = %s, below negative '
117 |                                       'shortcut threshhold %s', charset_name,
118 |                                       confidence,
119 |                                       self.NEGATIVE_SHORTCUT_THRESHOLD)
120 |                     self._state = ProbingState.NOT_ME
121 | 
122 |         return self.state
123 | 
124 |     def get_confidence(self):
125 |         r = 0.01
126 |         if self._total_seqs > 0:
127 |             r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) /
128 |                  self._total_seqs / self._model['typical_positive_ratio'])
129 |             r = r * self._freq_char / self._total_char
130 |             if r >= 1.0:
131 |                 r = 0.99
132 |         return r
133 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is Mozilla Universal charset detector code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 2001
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #   Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301  USA
27 | ######################### END LICENSE BLOCK #########################
28 | 
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 |                                 Latin5CyrillicModel, MacCyrillicModel,
33 |                                 Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | # from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 | from .langturkishmodel import Latin5TurkishModel
41 | 
42 | 
43 | class SBCSGroupProber(CharSetGroupProber):
44 |     def __init__(self):
45 |         super(SBCSGroupProber, self).__init__()
46 |         self.probers = [
47 |             SingleByteCharSetProber(Win1251CyrillicModel),
48 |             SingleByteCharSetProber(Koi8rModel),
49 |             SingleByteCharSetProber(Latin5CyrillicModel),
50 |             SingleByteCharSetProber(MacCyrillicModel),
51 |             SingleByteCharSetProber(Ibm866Model),
52 |             SingleByteCharSetProber(Ibm855Model),
53 |             SingleByteCharSetProber(Latin7GreekModel),
54 |             SingleByteCharSetProber(Win1253GreekModel),
55 |             SingleByteCharSetProber(Latin5BulgarianModel),
56 |             SingleByteCharSetProber(Win1251BulgarianModel),
57 |             # TODO: Restore Hungarian encodings (iso-8859-2 and windows-1250)
58 |             #       after we retrain model.
59 |             # SingleByteCharSetProber(Latin2HungarianModel),
60 |             # SingleByteCharSetProber(Win1250HungarianModel),
61 |             SingleByteCharSetProber(TIS620ThaiModel),
62 |             SingleByteCharSetProber(Latin5TurkishModel),
63 |         ]
64 |         hebrew_prober = HebrewProber()
65 |         logical_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel,
66 |                                                         False, hebrew_prober)
67 |         visual_hebrew_prober = SingleByteCharSetProber(Win1255HebrewModel, True,
68 |                                                        hebrew_prober)
69 |         hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober)
70 |         self.probers.extend([hebrew_prober, logical_hebrew_prober,
71 |                              visual_hebrew_prober])
72 | 
73 |         self.reset()
74 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/sjisprober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import SJISDistributionAnalysis
31 | from .jpcntx import SJISContextAnalysis
32 | from .mbcssm import SJIS_SM_MODEL
33 | from .enums import ProbingState, MachineState
34 | 
35 | 
36 | class SJISProber(MultiByteCharSetProber):
37 |     def __init__(self):
38 |         super(SJISProber, self).__init__()
39 |         self.coding_sm = CodingStateMachine(SJIS_SM_MODEL)
40 |         self.distribution_analyzer = SJISDistributionAnalysis()
41 |         self.context_analyzer = SJISContextAnalysis()
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(SJISProber, self).reset()
46 |         self.context_analyzer.reset()
47 | 
48 |     @property
49 |     def charset_name(self):
50 |         return self.context_analyzer.charset_name
51 | 
52 |     @property
53 |     def language(self):
54 |         return "Japanese"
55 | 
56 |     def feed(self, byte_str):
57 |         for i in range(len(byte_str)):
58 |             coding_state = self.coding_sm.next_state(byte_str[i])
59 |             if coding_state == MachineState.ERROR:
60 |                 self.logger.debug('%s %s prober hit error at byte %s',
61 |                                   self.charset_name, self.language, i)
62 |                 self._state = ProbingState.NOT_ME
63 |                 break
64 |             elif coding_state == MachineState.ITS_ME:
65 |                 self._state = ProbingState.FOUND_IT
66 |                 break
67 |             elif coding_state == MachineState.START:
68 |                 char_len = self.coding_sm.get_current_charlen()
69 |                 if i == 0:
70 |                     self._last_char[1] = byte_str[0]
71 |                     self.context_analyzer.feed(self._last_char[2 - char_len:],
72 |                                                char_len)
73 |                     self.distribution_analyzer.feed(self._last_char, char_len)
74 |                 else:
75 |                     self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3
76 |                                                         - char_len], char_len)
77 |                     self.distribution_analyzer.feed(byte_str[i - 1:i + 1],
78 |                                                     char_len)
79 | 
80 |         self._last_char[0] = byte_str[-1]
81 | 
82 |         if self.state == ProbingState.DETECTING:
83 |             if (self.context_analyzer.got_enough_data() and
84 |                (self.get_confidence() > self.SHORTCUT_THRESHOLD)):
85 |                 self._state = ProbingState.FOUND_IT
86 | 
87 |         return self.state
88 | 
89 |     def get_confidence(self):
90 |         context_conf = self.context_analyzer.get_confidence()
91 |         distrib_conf = self.distribution_analyzer.get_confidence()
92 |         return max(context_conf, distrib_conf)
93 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/utf8prober.py:
--------------------------------------------------------------------------------
 1 | ######################## BEGIN LICENSE BLOCK ########################
 2 | # The Original Code is mozilla.org code.
 3 | #
 4 | # The Initial Developer of the Original Code is
 5 | # Netscape Communications Corporation.
 6 | # Portions created by the Initial Developer are Copyright (C) 1998
 7 | # the Initial Developer. All Rights Reserved.
 8 | #
 9 | # Contributor(s):
10 | #   Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301  USA
26 | ######################### END LICENSE BLOCK #########################
27 | 
28 | from .charsetprober import CharSetProber
29 | from .enums import ProbingState, MachineState
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8_SM_MODEL
32 | 
33 | 
34 | 
35 | class UTF8Prober(CharSetProber):
36 |     ONE_CHAR_PROB = 0.5
37 | 
38 |     def __init__(self):
39 |         super(UTF8Prober, self).__init__()
40 |         self.coding_sm = CodingStateMachine(UTF8_SM_MODEL)
41 |         self._num_mb_chars = None
42 |         self.reset()
43 | 
44 |     def reset(self):
45 |         super(UTF8Prober, self).reset()
46 |         self.coding_sm.reset()
47 |         self._num_mb_chars = 0
48 | 
49 |     @property
50 |     def charset_name(self):
51 |         return "utf-8"
52 | 
53 |     @property
54 |     def language(self):
55 |         return ""
56 | 
57 |     def feed(self, byte_str):
58 |         for c in byte_str:
59 |             coding_state = self.coding_sm.next_state(c)
60 |             if coding_state == MachineState.ERROR:
61 |                 self._state = ProbingState.NOT_ME
62 |                 break
63 |             elif coding_state == MachineState.ITS_ME:
64 |                 self._state = ProbingState.FOUND_IT
65 |                 break
66 |             elif coding_state == MachineState.START:
67 |                 if self.coding_sm.get_current_charlen() >= 2:
68 |                     self._num_mb_chars += 1
69 | 
70 |         if self.state == ProbingState.DETECTING:
71 |             if self.get_confidence() > self.SHORTCUT_THRESHOLD:
72 |                 self._state = ProbingState.FOUND_IT
73 | 
74 |         return self.state
75 | 
76 |     def get_confidence(self):
77 |         unlike = 0.99
78 |         if self._num_mb_chars < 6:
79 |             unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars
80 |             return 1.0 - unlike
81 |         else:
82 |             return unlike
83 | 


--------------------------------------------------------------------------------
/thirdparty/chardet/version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module exists only to simplify retrieving the version number of chardet
 3 | from within setup.py and from chardet subpackages.
 4 | 
 5 | :author: Dan Blanchard (dan.blanchard@gmail.com)
 6 | """
 7 | 
 8 | __version__ = "3.0.4"
 9 | VERSION = __version__.split('.')
10 | 


--------------------------------------------------------------------------------
/thirdparty/connection/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/__init__.py


--------------------------------------------------------------------------------
/thirdparty/connection/compat.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | PY2 = sys.version_info[0] == 2
 4 | 
 5 | if PY2:
 6 |     string_types = basestring,
 7 |     from urllib import quote_plus, urlencode, unquote
 8 |     from urlparse import  urlparse
 9 |     from itertools import imap as map
10 | else:
11 |     string_types = str, bytes
12 |     from urllib.parse import quote_plus, urlencode, urlparse, unquote
13 |     map = map
14 | 


--------------------------------------------------------------------------------
/thirdparty/connection/exceptions.py:
--------------------------------------------------------------------------------
  1 | __all__ = [
  2 |     'ImproperlyConfigured', 'ElasticsearchException', 'SerializationError',
  3 |     'TransportError', 'NotFoundError', 'ConflictError', 'RequestError', 'ConnectionError',
  4 |     'SSLError', 'ConnectionTimeout'
  5 | ]
  6 | 
  7 | class ImproperlyConfigured(Exception):
  8 |     """
  9 |     Exception raised when the config passed to the client is inconsistent or invalid.
 10 |     """
 11 | 
 12 | 
 13 | class ElasticsearchException(Exception):
 14 |     """
 15 |     Base class for all exceptions raised by this package's operations (doesn't
 16 |     apply to :class:`~elasticsearch.ImproperlyConfigured`).
 17 |     """
 18 | 
 19 | 
 20 | class SerializationError(ElasticsearchException):
 21 |     """
 22 |     Data passed in failed to serialize properly in the ``Serializer`` being
 23 |     used.
 24 |     """
 25 | 
 26 | 
 27 | class TransportError(ElasticsearchException):
 28 |     """
 29 |     Exception raised when ES returns a non-OK (>=400) HTTP status code. Or when
 30 |     an actual connection error happens; in that case the ``status_code`` will
 31 |     be set to ``'N/A'``.
 32 |     """
 33 |     @property
 34 |     def status_code(self):
 35 |         """
 36 |         The HTTP status code of the response that precipitated the error or
 37 |         ``'N/A'`` if not applicable.
 38 |         """
 39 |         return self.args[0]
 40 | 
 41 |     @property
 42 |     def error(self):
 43 |         """ A string error message. """
 44 |         return self.args[1]
 45 | 
 46 |     @property
 47 |     def info(self):
 48 |         """ Dict of returned error info from ES, where available. """
 49 |         return self.args[2]
 50 | 
 51 |     def __str__(self):
 52 |         cause = ''
 53 |         try:
 54 |             if self.info:
 55 |                 cause = ', %r' % self.info['error']['root_cause'][0]['reason']
 56 |         except LookupError:
 57 |             pass
 58 |         return 'TransportError(%s, %r%s)' % (self.status_code, self.error, cause)
 59 | 
 60 | 
 61 | class ConnectionError(TransportError):
 62 |     """
 63 |     Error raised when there was an exception while talking to ES. Original
 64 |     exception from the underlying :class:`~elasticsearch.Connection`
 65 |     implementation is available as ``.info.``
 66 |     """
 67 |     def __str__(self):
 68 |         return 'ConnectionError(%s) caused by: %s(%s)' % (
 69 |             self.error, self.info.__class__.__name__, self.info)
 70 | 
 71 | 
 72 | class SSLError(ConnectionError):
 73 |     """ Error raised when encountering SSL errors. """
 74 | 
 75 | 
 76 | class ConnectionTimeout(ConnectionError):
 77 |     """ A network timeout. Doesn't cause a node retry by default. """
 78 |     def __str__(self):
 79 |         return 'ConnectionTimeout caused by - %s(%s)' % (
 80 |             self.info.__class__.__name__, self.info)
 81 | 
 82 | 
 83 | class NotFoundError(TransportError):
 84 |     """ Exception representing a 404 status code. """
 85 | 
 86 | 
 87 | class ConflictError(TransportError):
 88 |     """ Exception representing a 409 status code. """
 89 | 
 90 | 
 91 | class RequestError(TransportError):
 92 |     """ Exception representing a 400 status code. """
 93 | 
 94 | 
 95 | class AuthenticationException(TransportError):
 96 |     """ Exception representing a 401 status code. """
 97 | 
 98 | 
 99 | class AuthorizationException(TransportError):
100 |     """ Exception representing a 403 status code. """
101 | 
102 | # more generic mappings from status_code to python exceptions
103 | HTTP_EXCEPTIONS = {
104 |     400: RequestError,
105 |     401: AuthenticationException,
106 |     403: AuthorizationException,
107 |     404: NotFoundError,
108 |     409: ConflictError,
109 | }
110 | 


--------------------------------------------------------------------------------
/thirdparty/connection/http_urllib3.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | from urllib import urlencode
 3 | import urllib3
 4 | from thirdparty.connection.exceptions import ConnectionError
 5 | urllib3.disable_warnings()
 6 | 
 7 | class HttpUtil():
 8 |     def __init__(self):
 9 |         self.pool = urllib3.PoolManager()
10 |         self.default_headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36',"Cookie":"rememberMe=xx"}
11 |     def request(self, url, params=None,body=None, timeout=None,headers={},redirect=False,**kwargs):
12 |         if params:
13 |             url = '%s?%s' % (url, urlencode(params))
14 |         if body:
15 |             method = "POST"
16 |         else:
17 |             method = "GET"
18 |         try:
19 |             kw = {}
20 |             if timeout:
21 |                 kw['timeout'] = timeout
22 |             if not isinstance(url, str):
23 |                 url = url.encode('utf-8')
24 |             if not isinstance(method, str):
25 |                 method = method.encode('utf-8')
26 |             if redirect:
27 |                 retries = 3
28 |             else:
29 |                 retries = False
30 |             headers.update(self.default_headers)
31 |             response = self.pool.request(method, url, body, retries=retries,redirect=redirect, headers=headers,timeout=urllib3.Timeout(connect=timeout, read=2.0),**kwargs)
32 |             response.content = response.data
33 |             response.status_code = response.status
34 |             response.resp_headers = response.getheaders()
35 |         except Exception as e:
36 |             raise ConnectionError('N/A', str(e), e)
37 |         return response


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | urllib3 - Thread-safe connection pooling and re-using.
 3 | """
 4 | 
 5 | from __future__ import absolute_import
 6 | import warnings
 7 | 
 8 | from .connectionpool import (
 9 |     HTTPConnectionPool,
10 |     HTTPSConnectionPool,
11 |     connection_from_url
12 | )
13 | 
14 | from . import exceptions
15 | from .filepost import encode_multipart_formdata
16 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
17 | from .response import HTTPResponse
18 | from .util.request import make_headers
19 | from .util.url import get_host
20 | from .util.timeout import Timeout
21 | from .util.retry import Retry
22 | 
23 | 
24 | # Set default logging handler to avoid "No handler found" warnings.
25 | import logging
26 | try:  # Python 2.7+
27 |     from logging import NullHandler
28 | except ImportError:
29 |     class NullHandler(logging.Handler):
30 |         def emit(self, record):
31 |             pass
32 | 
33 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
34 | __license__ = 'MIT'
35 | __version__ = '1.22'
36 | 
37 | __all__ = (
38 |     'HTTPConnectionPool',
39 |     'HTTPSConnectionPool',
40 |     'PoolManager',
41 |     'ProxyManager',
42 |     'HTTPResponse',
43 |     'Retry',
44 |     'Timeout',
45 |     'add_stderr_logger',
46 |     'connection_from_url',
47 |     'disable_warnings',
48 |     'encode_multipart_formdata',
49 |     'get_host',
50 |     'make_headers',
51 |     'proxy_from_url',
52 | )
53 | 
54 | logging.getLogger(__name__).addHandler(NullHandler())
55 | 
56 | 
57 | def add_stderr_logger(level=logging.DEBUG):
58 |     """
59 |     Helper for quickly adding a StreamHandler to the logger. Useful for
60 |     debugging.
61 | 
62 |     Returns the handler after adding it.
63 |     """
64 |     # This method needs to be in this __init__.py to get the __name__ correct
65 |     # even if urllib3 is vendored within another package.
66 |     logger = logging.getLogger(__name__)
67 |     handler = logging.StreamHandler()
68 |     handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
69 |     logger.addHandler(handler)
70 |     logger.setLevel(level)
71 |     logger.debug('Added a stderr logging handler to logger: %s', __name__)
72 |     return handler
73 | 
74 | 
75 | # ... Clean up.
76 | del NullHandler
77 | 
78 | 
79 | # All warning filters *must* be appended unless you're really certain that they
80 | # shouldn't be: otherwise, it's very hard for users to use most Python
81 | # mechanisms to silence them.
82 | # SecurityWarning's always go off by default.
83 | warnings.simplefilter('always', exceptions.SecurityWarning, append=True)
84 | # SubjectAltNameWarning's should go off once per host
85 | warnings.simplefilter('default', exceptions.SubjectAltNameWarning, append=True)
86 | # InsecurePlatformWarning's don't vary between requests, so we keep it default.
87 | warnings.simplefilter('default', exceptions.InsecurePlatformWarning,
88 |                       append=True)
89 | # SNIMissingWarnings should go off only once.
90 | warnings.simplefilter('default', exceptions.SNIMissingWarning, append=True)
91 | 
92 | 
93 | def disable_warnings(category=exceptions.HTTPWarning):
94 |     """
95 |     Helper for quickly disabling all urllib3 warnings.
96 |     """
97 |     warnings.simplefilter('ignore', category)
98 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/contrib/__init__.py


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/contrib/_securetransport/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/contrib/_securetransport/__init__.py


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/contrib/ntlmpool.py:
--------------------------------------------------------------------------------
  1 | """
  2 | NTLM authenticating pool, contributed by erikcederstran
  3 | 
  4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
  5 | """
  6 | from __future__ import absolute_import
  7 | 
  8 | from logging import getLogger
  9 | from ntlm import ntlm
 10 | 
 11 | from .. import HTTPSConnectionPool
 12 | from ..packages.six.moves.http_client import HTTPSConnection
 13 | 
 14 | 
 15 | log = getLogger(__name__)
 16 | 
 17 | 
 18 | class NTLMConnectionPool(HTTPSConnectionPool):
 19 |     """
 20 |     Implements an NTLM authentication version of an urllib3 connection pool
 21 |     """
 22 | 
 23 |     scheme = 'https'
 24 | 
 25 |     def __init__(self, user, pw, authurl, *args, **kwargs):
 26 |         """
 27 |         authurl is a random URL on the server that is protected by NTLM.
 28 |         user is the Windows user, probably in the DOMAIN\\username format.
 29 |         pw is the password for the user.
 30 |         """
 31 |         super(NTLMConnectionPool, self).__init__(*args, **kwargs)
 32 |         self.authurl = authurl
 33 |         self.rawuser = user
 34 |         user_parts = user.split('\\', 1)
 35 |         self.domain = user_parts[0].upper()
 36 |         self.user = user_parts[1]
 37 |         self.pw = pw
 38 | 
 39 |     def _new_conn(self):
 40 |         # Performs the NTLM handshake that secures the connection. The socket
 41 |         # must be kept open while requests are performed.
 42 |         self.num_connections += 1
 43 |         log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s',
 44 |                   self.num_connections, self.host, self.authurl)
 45 | 
 46 |         headers = {}
 47 |         headers['Connection'] = 'Keep-Alive'
 48 |         req_header = 'Authorization'
 49 |         resp_header = 'www-authenticate'
 50 | 
 51 |         conn = HTTPSConnection(host=self.host, port=self.port)
 52 | 
 53 |         # Send negotiation message
 54 |         headers[req_header] = (
 55 |             'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
 56 |         log.debug('Request headers: %s', headers)
 57 |         conn.request('GET', self.authurl, None, headers)
 58 |         res = conn.getresponse()
 59 |         reshdr = dict(res.getheaders())
 60 |         log.debug('Response status: %s %s', res.status, res.reason)
 61 |         log.debug('Response headers: %s', reshdr)
 62 |         log.debug('Response data: %s [...]', res.read(100))
 63 | 
 64 |         # Remove the reference to the socket, so that it can not be closed by
 65 |         # the response object (we want to keep the socket open)
 66 |         res.fp = None
 67 | 
 68 |         # Server should respond with a challenge message
 69 |         auth_header_values = reshdr[resp_header].split(', ')
 70 |         auth_header_value = None
 71 |         for s in auth_header_values:
 72 |             if s[:5] == 'NTLM ':
 73 |                 auth_header_value = s[5:]
 74 |         if auth_header_value is None:
 75 |             raise Exception('Unexpected %s response header: %s' %
 76 |                             (resp_header, reshdr[resp_header]))
 77 | 
 78 |         # Send authentication message
 79 |         ServerChallenge, NegotiateFlags = \
 80 |             ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
 81 |         auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
 82 |                                                          self.user,
 83 |                                                          self.domain,
 84 |                                                          self.pw,
 85 |                                                          NegotiateFlags)
 86 |         headers[req_header] = 'NTLM %s' % auth_msg
 87 |         log.debug('Request headers: %s', headers)
 88 |         conn.request('GET', self.authurl, None, headers)
 89 |         res = conn.getresponse()
 90 |         log.debug('Response status: %s %s', res.status, res.reason)
 91 |         log.debug('Response headers: %s', dict(res.getheaders()))
 92 |         log.debug('Response data: %s [...]', res.read()[:100])
 93 |         if res.status != 200:
 94 |             if res.status == 401:
 95 |                 raise Exception('Server rejected request: wrong '
 96 |                                 'username or password')
 97 |             raise Exception('Wrong server response: %s %s' %
 98 |                             (res.status, res.reason))
 99 | 
100 |         res.fp = None
101 |         log.debug('Connection established')
102 |         return conn
103 | 
104 |     def urlopen(self, method, url, body=None, headers=None, retries=3,
105 |                 redirect=True, assert_same_host=True):
106 |         if headers is None:
107 |             headers = {}
108 |         headers['Connection'] = 'Keep-Alive'
109 |         return super(NTLMConnectionPool, self).urlopen(method, url, body,
110 |                                                        headers, retries,
111 |                                                        redirect,
112 |                                                        assert_same_host)
113 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/contrib/socks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This module contains provisional support for SOCKS proxies from within
  4 | urllib3. This module supports SOCKS4 (specifically the SOCKS4A variant) and
  5 | SOCKS5. To enable its functionality, either install PySocks or install this
  6 | module with the ``socks`` extra.
  7 | 
  8 | The SOCKS implementation supports the full range of urllib3 features. It also
  9 | supports the following SOCKS features:
 10 | 
 11 | - SOCKS4
 12 | - SOCKS4a
 13 | - SOCKS5
 14 | - Usernames and passwords for the SOCKS proxy
 15 | 
 16 | Known Limitations:
 17 | 
 18 | - Currently PySocks does not support contacting remote websites via literal
 19 |   IPv6 addresses. Any such connection attempt will fail. You must use a domain
 20 |   name.
 21 | - Currently PySocks does not support IPv6 connections to the SOCKS proxy. Any
 22 |   such connection attempt will fail.
 23 | """
 24 | from __future__ import absolute_import
 25 | 
 26 | try:
 27 |     import socks
 28 | except ImportError:
 29 |     import warnings
 30 |     from ..exceptions import DependencyWarning
 31 | 
 32 |     warnings.warn((
 33 |         'SOCKS support in urllib3 requires the installation of optional '
 34 |         'dependencies: specifically, PySocks.  For more information, see '
 35 |         'https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies'
 36 |         ),
 37 |         DependencyWarning
 38 |     )
 39 |     raise
 40 | 
 41 | from socket import error as SocketError, timeout as SocketTimeout
 42 | 
 43 | from ..connection import (
 44 |     HTTPConnection, HTTPSConnection
 45 | )
 46 | from ..connectionpool import (
 47 |     HTTPConnectionPool, HTTPSConnectionPool
 48 | )
 49 | from ..exceptions import ConnectTimeoutError, NewConnectionError
 50 | from ..poolmanager import PoolManager
 51 | from ..util.url import parse_url
 52 | 
 53 | try:
 54 |     import ssl
 55 | except ImportError:
 56 |     ssl = None
 57 | 
 58 | 
 59 | class SOCKSConnection(HTTPConnection):
 60 |     """
 61 |     A plain-text HTTP connection that connects via a SOCKS proxy.
 62 |     """
 63 |     def __init__(self, *args, **kwargs):
 64 |         self._socks_options = kwargs.pop('_socks_options')
 65 |         super(SOCKSConnection, self).__init__(*args, **kwargs)
 66 | 
 67 |     def _new_conn(self):
 68 |         """
 69 |         Establish a new connection via the SOCKS proxy.
 70 |         """
 71 |         extra_kw = {}
 72 |         if self.source_address:
 73 |             extra_kw['source_address'] = self.source_address
 74 | 
 75 |         if self.socket_options:
 76 |             extra_kw['socket_options'] = self.socket_options
 77 | 
 78 |         try:
 79 |             conn = socks.create_connection(
 80 |                 (self.host, self.port),
 81 |                 proxy_type=self._socks_options['socks_version'],
 82 |                 proxy_addr=self._socks_options['proxy_host'],
 83 |                 proxy_port=self._socks_options['proxy_port'],
 84 |                 proxy_username=self._socks_options['username'],
 85 |                 proxy_password=self._socks_options['password'],
 86 |                 proxy_rdns=self._socks_options['rdns'],
 87 |                 timeout=self.timeout,
 88 |                 **extra_kw
 89 |             )
 90 | 
 91 |         except SocketTimeout as e:
 92 |             raise ConnectTimeoutError(
 93 |                 self, "Connection to %s timed out. (connect timeout=%s)" %
 94 |                 (self.host, self.timeout))
 95 | 
 96 |         except socks.ProxyError as e:
 97 |             # This is fragile as hell, but it seems to be the only way to raise
 98 |             # useful errors here.
 99 |             if e.socket_err:
100 |                 error = e.socket_err
101 |                 if isinstance(error, SocketTimeout):
102 |                     raise ConnectTimeoutError(
103 |                         self,
104 |                         "Connection to %s timed out. (connect timeout=%s)" %
105 |                         (self.host, self.timeout)
106 |                     )
107 |                 else:
108 |                     raise NewConnectionError(
109 |                         self,
110 |                         "Failed to establish a new connection: %s" % error
111 |                     )
112 |             else:
113 |                 raise NewConnectionError(
114 |                     self,
115 |                     "Failed to establish a new connection: %s" % e
116 |                 )
117 | 
118 |         except SocketError as e:  # Defensive: PySocks should catch all these.
119 |             raise NewConnectionError(
120 |                 self, "Failed to establish a new connection: %s" % e)
121 | 
122 |         return conn
123 | 
124 | 
125 | # We don't need to duplicate the Verified/Unverified distinction from
126 | # urllib3/connection.py here because the HTTPSConnection will already have been
127 | # correctly set to either the Verified or Unverified form by that module. This
128 | # means the SOCKSHTTPSConnection will automatically be the correct type.
129 | class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
130 |     pass
131 | 
132 | 
133 | class SOCKSHTTPConnectionPool(HTTPConnectionPool):
134 |     ConnectionCls = SOCKSConnection
135 | 
136 | 
137 | class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
138 |     ConnectionCls = SOCKSHTTPSConnection
139 | 
140 | 
141 | class SOCKSProxyManager(PoolManager):
142 |     """
143 |     A version of the urllib3 ProxyManager that routes connections via the
144 |     defined SOCKS proxy.
145 |     """
146 |     pool_classes_by_scheme = {
147 |         'http': SOCKSHTTPConnectionPool,
148 |         'https': SOCKSHTTPSConnectionPool,
149 |     }
150 | 
151 |     def __init__(self, proxy_url, username=None, password=None,
152 |                  num_pools=10, headers=None, **connection_pool_kw):
153 |         parsed = parse_url(proxy_url)
154 | 
155 |         if parsed.scheme == 'socks5':
156 |             socks_version = socks.PROXY_TYPE_SOCKS5
157 |             rdns = False
158 |         elif parsed.scheme == 'socks5h':
159 |             socks_version = socks.PROXY_TYPE_SOCKS5
160 |             rdns = True
161 |         elif parsed.scheme == 'socks4':
162 |             socks_version = socks.PROXY_TYPE_SOCKS4
163 |             rdns = False
164 |         elif parsed.scheme == 'socks4a':
165 |             socks_version = socks.PROXY_TYPE_SOCKS4
166 |             rdns = True
167 |         else:
168 |             raise ValueError(
169 |                 "Unable to determine SOCKS version from %s" % proxy_url
170 |             )
171 | 
172 |         self.proxy_url = proxy_url
173 | 
174 |         socks_options = {
175 |             'socks_version': socks_version,
176 |             'proxy_host': parsed.host,
177 |             'proxy_port': parsed.port,
178 |             'username': username,
179 |             'password': password,
180 |             'rdns': rdns
181 |         }
182 |         connection_pool_kw['_socks_options'] = socks_options
183 | 
184 |         super(SOCKSProxyManager, self).__init__(
185 |             num_pools, headers, **connection_pool_kw
186 |         )
187 | 
188 |         self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme
189 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/exceptions.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from .packages.six.moves.http_client import (
  3 |     IncompleteRead as httplib_IncompleteRead
  4 | )
  5 | # Base Exceptions
  6 | 
  7 | 
  8 | class HTTPError(Exception):
  9 |     "Base exception used by this module."
 10 |     pass
 11 | 
 12 | 
 13 | class HTTPWarning(Warning):
 14 |     "Base warning used by this module."
 15 |     pass
 16 | 
 17 | 
 18 | class PoolError(HTTPError):
 19 |     "Base exception for errors caused within a pool."
 20 |     def __init__(self, pool, message):
 21 |         self.pool = pool
 22 |         HTTPError.__init__(self, "%s: %s" % (pool, message))
 23 | 
 24 |     def __reduce__(self):
 25 |         # For pickling purposes.
 26 |         return self.__class__, (None, None)
 27 | 
 28 | 
 29 | class RequestError(PoolError):
 30 |     "Base exception for PoolErrors that have associated URLs."
 31 |     def __init__(self, pool, url, message):
 32 |         self.url = url
 33 |         PoolError.__init__(self, pool, message)
 34 | 
 35 |     def __reduce__(self):
 36 |         # For pickling purposes.
 37 |         return self.__class__, (None, self.url, None)
 38 | 
 39 | 
 40 | class SSLError(HTTPError):
 41 |     "Raised when SSL certificate fails in an HTTPS connection."
 42 |     pass
 43 | 
 44 | 
 45 | class ProxyError(HTTPError):
 46 |     "Raised when the connection to a proxy fails."
 47 |     pass
 48 | 
 49 | 
 50 | class DecodeError(HTTPError):
 51 |     "Raised when automatic decoding based on Content-Type fails."
 52 |     pass
 53 | 
 54 | 
 55 | class ProtocolError(HTTPError):
 56 |     "Raised when something unexpected happens mid-request/response."
 57 |     pass
 58 | 
 59 | 
 60 | #: Renamed to ProtocolError but aliased for backwards compatibility.
 61 | ConnectionError = ProtocolError
 62 | 
 63 | 
 64 | # Leaf Exceptions
 65 | 
 66 | class MaxRetryError(RequestError):
 67 |     """Raised when the maximum number of retries is exceeded.
 68 | 
 69 |     :param pool: The connection pool
 70 |     :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
 71 |     :param string url: The requested Url
 72 |     :param exceptions.Exception reason: The underlying error
 73 | 
 74 |     """
 75 | 
 76 |     def __init__(self, pool, url, reason=None):
 77 |         self.reason = reason
 78 | 
 79 |         message = "Max retries exceeded with url: %s (Caused by %r)" % (
 80 |             url, reason)
 81 | 
 82 |         RequestError.__init__(self, pool, url, message)
 83 | 
 84 | 
 85 | class HostChangedError(RequestError):
 86 |     "Raised when an existing pool gets a request for a foreign host."
 87 | 
 88 |     def __init__(self, pool, url, retries=3):
 89 |         message = "Tried to open a foreign host with url: %s" % url
 90 |         RequestError.__init__(self, pool, url, message)
 91 |         self.retries = retries
 92 | 
 93 | 
 94 | class TimeoutStateError(HTTPError):
 95 |     """ Raised when passing an invalid state to a timeout """
 96 |     pass
 97 | 
 98 | 
 99 | class TimeoutError(HTTPError):
100 |     """ Raised when a socket timeout error occurs.
101 | 
102 |     Catching this error will catch both :exc:`ReadTimeoutErrors
103 |     <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
104 |     """
105 |     pass
106 | 
107 | 
108 | class ReadTimeoutError(TimeoutError, RequestError):
109 |     "Raised when a socket timeout occurs while receiving data from a server"
110 |     pass
111 | 
112 | 
113 | # This timeout error does not have a URL attached and needs to inherit from the
114 | # base HTTPError
115 | class ConnectTimeoutError(TimeoutError):
116 |     "Raised when a socket timeout occurs while connecting to a server"
117 |     pass
118 | 
119 | 
120 | class NewConnectionError(ConnectTimeoutError, PoolError):
121 |     "Raised when we fail to establish a new connection. Usually ECONNREFUSED."
122 |     pass
123 | 
124 | 
125 | class EmptyPoolError(PoolError):
126 |     "Raised when a pool runs out of connections and no more are allowed."
127 |     pass
128 | 
129 | 
130 | class ClosedPoolError(PoolError):
131 |     "Raised when a request enters a pool after the pool has been closed."
132 |     pass
133 | 
134 | 
135 | class LocationValueError(ValueError, HTTPError):
136 |     "Raised when there is something wrong with a given URL input."
137 |     pass
138 | 
139 | 
140 | class LocationParseError(LocationValueError):
141 |     "Raised when get_host or similar fails to parse the URL input."
142 | 
143 |     def __init__(self, location):
144 |         message = "Failed to parse: %s" % location
145 |         HTTPError.__init__(self, message)
146 | 
147 |         self.location = location
148 | 
149 | 
150 | class ResponseError(HTTPError):
151 |     "Used as a container for an error reason supplied in a MaxRetryError."
152 |     GENERIC_ERROR = 'too many error responses'
153 |     SPECIFIC_ERROR = 'too many {status_code} error responses'
154 | 
155 | 
156 | class SecurityWarning(HTTPWarning):
157 |     "Warned when perfoming security reducing actions"
158 |     pass
159 | 
160 | 
161 | class SubjectAltNameWarning(SecurityWarning):
162 |     "Warned when connecting to a host with a certificate missing a SAN."
163 |     pass
164 | 
165 | 
166 | class InsecureRequestWarning(SecurityWarning):
167 |     "Warned when making an unverified HTTPS request."
168 |     pass
169 | 
170 | 
171 | class SystemTimeWarning(SecurityWarning):
172 |     "Warned when system time is suspected to be wrong"
173 |     pass
174 | 
175 | 
176 | class InsecurePlatformWarning(SecurityWarning):
177 |     "Warned when certain SSL configuration is not available on a platform."
178 |     pass
179 | 
180 | 
181 | class SNIMissingWarning(HTTPWarning):
182 |     "Warned when making a HTTPS request without SNI available."
183 |     pass
184 | 
185 | 
186 | class DependencyWarning(HTTPWarning):
187 |     """
188 |     Warned when an attempt is made to import a module with missing optional
189 |     dependencies.
190 |     """
191 |     pass
192 | 
193 | 
194 | class ResponseNotChunked(ProtocolError, ValueError):
195 |     "Response needs to be chunked in order to read it as chunks."
196 |     pass
197 | 
198 | 
199 | class BodyNotHttplibCompatible(HTTPError):
200 |     """
201 |     Body should be httplib.HTTPResponse like (have an fp attribute which
202 |     returns raw chunks) for read_chunked().
203 |     """
204 |     pass
205 | 
206 | 
207 | class IncompleteRead(HTTPError, httplib_IncompleteRead):
208 |     """
209 |     Response length doesn't match expected Content-Length
210 | 
211 |     Subclass of http_client.IncompleteRead to allow int value
212 |     for `partial` to avoid creating large objects on streamed
213 |     reads.
214 |     """
215 |     def __init__(self, partial, expected):
216 |         super(IncompleteRead, self).__init__(partial, expected)
217 | 
218 |     def __repr__(self):
219 |         return ('IncompleteRead(%i bytes read, '
220 |                 '%i more expected)' % (self.partial, self.expected))
221 | 
222 | 
223 | class InvalidHeader(HTTPError):
224 |     "The header provided was somehow invalid."
225 |     pass
226 | 
227 | 
228 | class ProxySchemeUnknown(AssertionError, ValueError):
229 |     "ProxyManager does not support the supplied scheme"
230 |     # TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
231 | 
232 |     def __init__(self, scheme):
233 |         message = "Not supported proxy scheme %s" % scheme
234 |         super(ProxySchemeUnknown, self).__init__(message)
235 | 
236 | 
237 | class HeaderParsingError(HTTPError):
238 |     "Raised by assert_header_parsing, but we convert it to a log.warning statement."
239 |     def __init__(self, defects, unparsed_data):
240 |         message = '%s, unparsed data: %r' % (defects or 'Unknown', unparsed_data)
241 |         super(HeaderParsingError, self).__init__(message)
242 | 
243 | 
244 | class UnrewindableBodyError(HTTPError):
245 |     "urllib3 encountered an error when trying to rewind a body"
246 |     pass
247 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/fields.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import email.utils
  3 | import mimetypes
  4 | 
  5 | from .packages import six
  6 | 
  7 | 
  8 | def guess_content_type(filename, default='application/octet-stream'):
  9 |     """
 10 |     Guess the "Content-Type" of a file.
 11 | 
 12 |     :param filename:
 13 |         The filename to guess the "Content-Type" of using :mod:`mimetypes`.
 14 |     :param default:
 15 |         If no "Content-Type" can be guessed, default to `default`.
 16 |     """
 17 |     if filename:
 18 |         return mimetypes.guess_type(filename)[0] or default
 19 |     return default
 20 | 
 21 | 
 22 | def format_header_param(name, value):
 23 |     """
 24 |     Helper function to format and quote a single header parameter.
 25 | 
 26 |     Particularly useful for header parameters which might contain
 27 |     non-ASCII values, like file names. This follows RFC 2231, as
 28 |     suggested by RFC 2388 Section 4.4.
 29 | 
 30 |     :param name:
 31 |         The name of the parameter, a string expected to be ASCII only.
 32 |     :param value:
 33 |         The value of the parameter, provided as a unicode string.
 34 |     """
 35 |     if not any(ch in value for ch in '"\\\r\n'):
 36 |         result = '%s="%s"' % (name, value)
 37 |         try:
 38 |             result.encode('ascii')
 39 |         except (UnicodeEncodeError, UnicodeDecodeError):
 40 |             pass
 41 |         else:
 42 |             return result
 43 |     if not six.PY3 and isinstance(value, six.text_type):  # Python 2:
 44 |         value = value.encode('utf-8')
 45 |     value = email.utils.encode_rfc2231(value, 'utf-8')
 46 |     value = '%s*=%s' % (name, value)
 47 |     return value
 48 | 
 49 | 
 50 | class RequestField(object):
 51 |     """
 52 |     A data container for request body parameters.
 53 | 
 54 |     :param name:
 55 |         The name of this request field.
 56 |     :param data:
 57 |         The data/value body.
 58 |     :param filename:
 59 |         An optional filename of the request field.
 60 |     :param headers:
 61 |         An optional dict-like object of headers to initially use for the field.
 62 |     """
 63 |     def __init__(self, name, data, filename=None, headers=None):
 64 |         self._name = name
 65 |         self._filename = filename
 66 |         self.data = data
 67 |         self.headers = {}
 68 |         if headers:
 69 |             self.headers = dict(headers)
 70 | 
 71 |     @classmethod
 72 |     def from_tuples(cls, fieldname, value):
 73 |         """
 74 |         A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
 75 | 
 76 |         Supports constructing :class:`~urllib3.fields.RequestField` from
 77 |         parameter of key/value strings AND key/filetuple. A filetuple is a
 78 |         (filename, data, MIME type) tuple where the MIME type is optional.
 79 |         For example::
 80 | 
 81 |             'foo': 'bar',
 82 |             'fakefile': ('foofile.txt', 'contents of foofile'),
 83 |             'realfile': ('barfile.txt', open('realfile').read()),
 84 |             'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
 85 |             'nonamefile': 'contents of nonamefile field',
 86 | 
 87 |         Field names and filenames must be unicode.
 88 |         """
 89 |         if isinstance(value, tuple):
 90 |             if len(value) == 3:
 91 |                 filename, data, content_type = value
 92 |             else:
 93 |                 filename, data = value
 94 |                 content_type = guess_content_type(filename)
 95 |         else:
 96 |             filename = None
 97 |             content_type = None
 98 |             data = value
 99 | 
100 |         request_param = cls(fieldname, data, filename=filename)
101 |         request_param.make_multipart(content_type=content_type)
102 | 
103 |         return request_param
104 | 
105 |     def _render_part(self, name, value):
106 |         """
107 |         Overridable helper function to format a single header parameter.
108 | 
109 |         :param name:
110 |             The name of the parameter, a string expected to be ASCII only.
111 |         :param value:
112 |             The value of the parameter, provided as a unicode string.
113 |         """
114 |         return format_header_param(name, value)
115 | 
116 |     def _render_parts(self, header_parts):
117 |         """
118 |         Helper function to format and quote a single header.
119 | 
120 |         Useful for single headers that are composed of multiple items. E.g.,
121 |         'Content-Disposition' fields.
122 | 
123 |         :param header_parts:
124 |             A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
125 |             as `k1="v1"; k2="v2"; ...`.
126 |         """
127 |         parts = []
128 |         iterable = header_parts
129 |         if isinstance(header_parts, dict):
130 |             iterable = header_parts.items()
131 | 
132 |         for name, value in iterable:
133 |             if value is not None:
134 |                 parts.append(self._render_part(name, value))
135 | 
136 |         return '; '.join(parts)
137 | 
138 |     def render_headers(self):
139 |         """
140 |         Renders the headers for this request field.
141 |         """
142 |         lines = []
143 | 
144 |         sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
145 |         for sort_key in sort_keys:
146 |             if self.headers.get(sort_key, False):
147 |                 lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
148 | 
149 |         for header_name, header_value in self.headers.items():
150 |             if header_name not in sort_keys:
151 |                 if header_value:
152 |                     lines.append('%s: %s' % (header_name, header_value))
153 | 
154 |         lines.append('\r\n')
155 |         return '\r\n'.join(lines)
156 | 
157 |     def make_multipart(self, content_disposition=None, content_type=None,
158 |                        content_location=None):
159 |         """
160 |         Makes this request field into a multipart request field.
161 | 
162 |         This method overrides "Content-Disposition", "Content-Type" and
163 |         "Content-Location" headers to the request parameter.
164 | 
165 |         :param content_type:
166 |             The 'Content-Type' of the request body.
167 |         :param content_location:
168 |             The 'Content-Location' of the request body.
169 | 
170 |         """
171 |         self.headers['Content-Disposition'] = content_disposition or 'form-data'
172 |         self.headers['Content-Disposition'] += '; '.join([
173 |             '', self._render_parts(
174 |                 (('name', self._name), ('filename', self._filename))
175 |             )
176 |         ])
177 |         self.headers['Content-Type'] = content_type
178 |         self.headers['Content-Location'] = content_location
179 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/filepost.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import codecs
 3 | 
 4 | from uuid import uuid4
 5 | from io import BytesIO
 6 | 
 7 | from .packages import six
 8 | from .packages.six import b
 9 | from .fields import RequestField
10 | 
11 | writer = codecs.lookup('utf-8')[3]
12 | 
13 | 
14 | def choose_boundary():
15 |     """
16 |     Our embarrassingly-simple replacement for mimetools.choose_boundary.
17 |     """
18 |     return uuid4().hex
19 | 
20 | 
21 | def iter_field_objects(fields):
22 |     """
23 |     Iterate over fields.
24 | 
25 |     Supports list of (k, v) tuples and dicts, and lists of
26 |     :class:`~urllib3.fields.RequestField`.
27 | 
28 |     """
29 |     if isinstance(fields, dict):
30 |         i = six.iteritems(fields)
31 |     else:
32 |         i = iter(fields)
33 | 
34 |     for field in i:
35 |         if isinstance(field, RequestField):
36 |             yield field
37 |         else:
38 |             yield RequestField.from_tuples(*field)
39 | 
40 | 
41 | def iter_fields(fields):
42 |     """
43 |     .. deprecated:: 1.6
44 | 
45 |     Iterate over fields.
46 | 
47 |     The addition of :class:`~urllib3.fields.RequestField` makes this function
48 |     obsolete. Instead, use :func:`iter_field_objects`, which returns
49 |     :class:`~urllib3.fields.RequestField` objects.
50 | 
51 |     Supports list of (k, v) tuples and dicts.
52 |     """
53 |     if isinstance(fields, dict):
54 |         return ((k, v) for k, v in six.iteritems(fields))
55 | 
56 |     return ((k, v) for k, v in fields)
57 | 
58 | 
59 | def encode_multipart_formdata(fields, boundary=None):
60 |     """
61 |     Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
62 | 
63 |     :param fields:
64 |         Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
65 | 
66 |     :param boundary:
67 |         If not specified, then a random boundary will be generated using
68 |         :func:`mimetools.choose_boundary`.
69 |     """
70 |     body = BytesIO()
71 |     if boundary is None:
72 |         boundary = choose_boundary()
73 | 
74 |     for field in iter_field_objects(fields):
75 |         body.write(b('--%s\r\n' % (boundary)))
76 | 
77 |         writer(body).write(field.render_headers())
78 |         data = field.data
79 | 
80 |         if isinstance(data, int):
81 |             data = str(data)  # Backwards compatibility
82 | 
83 |         if isinstance(data, six.text_type):
84 |             writer(body).write(data)
85 |         else:
86 |             body.write(data)
87 | 
88 |         body.write(b'\r\n')
89 | 
90 |     body.write(b('--%s--\r\n' % (boundary)))
91 | 
92 |     content_type = str('multipart/form-data; boundary=%s' % boundary)
93 | 
94 |     return body.getvalue(), content_type
95 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from . import ssl_match_hostname
4 | 
5 | __all__ = ('ssl_match_hostname', )
6 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/packages/backports/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkgh006/assetscan/12492689a4ec214fb68019ffa2b1903f9ae4c836/thirdparty/connection/urllib3/packages/backports/__init__.py


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/packages/backports/makefile.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | backports.makefile
 4 | ~~~~~~~~~~~~~~~~~~
 5 | 
 6 | Backports the Python 3 ``socket.makefile`` method for use with anything that
 7 | wants to create a "fake" socket object.
 8 | """
 9 | import io
10 | 
11 | from socket import SocketIO
12 | 
13 | 
14 | def backport_makefile(self, mode="r", buffering=None, encoding=None,
15 |                       errors=None, newline=None):
16 |     """
17 |     Backport of ``socket.makefile`` from Python 3.5.
18 |     """
19 |     if not set(mode) <= set(["r", "w", "b"]):
20 |         raise ValueError(
21 |             "invalid mode %r (only r, w, b allowed)" % (mode,)
22 |         )
23 |     writing = "w" in mode
24 |     reading = "r" in mode or not writing
25 |     assert reading or writing
26 |     binary = "b" in mode
27 |     rawmode = ""
28 |     if reading:
29 |         rawmode += "r"
30 |     if writing:
31 |         rawmode += "w"
32 |     raw = SocketIO(self, rawmode)
33 |     self._makefile_refs += 1
34 |     if buffering is None:
35 |         buffering = -1
36 |     if buffering < 0:
37 |         buffering = io.DEFAULT_BUFFER_SIZE
38 |     if buffering == 0:
39 |         if not binary:
40 |             raise ValueError("unbuffered streams must be binary")
41 |         return raw
42 |     if reading and writing:
43 |         buffer = io.BufferedRWPair(raw, raw, buffering)
44 |     elif reading:
45 |         buffer = io.BufferedReader(raw, buffering)
46 |     else:
47 |         assert writing
48 |         buffer = io.BufferedWriter(raw, buffering)
49 |     if binary:
50 |         return buffer
51 |     text = io.TextIOWrapper(buffer, encoding, errors, newline)
52 |     text.mode = mode
53 |     return text
54 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | try:
 4 |     # Our match_hostname function is the same as 3.5's, so we only want to
 5 |     # import the match_hostname function if it's at least that good.
 6 |     if sys.version_info < (3, 5):
 7 |         raise ImportError("Fallback to vendored code")
 8 | 
 9 |     from ssl import CertificateError, match_hostname
10 | except ImportError:
11 |     try:
12 |         # Backport of the function from a pypi module
13 |         from backports.ssl_match_hostname import CertificateError, match_hostname
14 |     except ImportError:
15 |         # Our vendored copy
16 |         from ._implementation import CertificateError, match_hostname
17 | 
18 | # Not needed, but documenting what we provide.
19 | __all__ = ('CertificateError', 'match_hostname')
20 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
  1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
  2 | 
  3 | # Note: This file is under the PSF license as the code comes from the python
  4 | # stdlib.   http://docs.python.org/3/license.html
  5 | 
  6 | import re
  7 | import sys
  8 | 
  9 | # ipaddress has been backported to 2.6+ in pypi.  If it is installed on the
 10 | # system, use it to handle IPAddress ServerAltnames (this was added in
 11 | # python-3.5) otherwise only do DNS matching.  This allows
 12 | # backports.ssl_match_hostname to continue to be used all the way back to
 13 | # python-2.4.
 14 | try:
 15 |     import ipaddress
 16 | except ImportError:
 17 |     ipaddress = None
 18 | 
 19 | __version__ = '3.5.0.1'
 20 | 
 21 | 
 22 | class CertificateError(ValueError):
 23 |     pass
 24 | 
 25 | 
 26 | def _dnsname_match(dn, hostname, max_wildcards=1):
 27 |     """Matching according to RFC 6125, section 6.4.3
 28 | 
 29 |     http://tools.ietf.org/html/rfc6125#section-6.4.3
 30 |     """
 31 |     pats = []
 32 |     if not dn:
 33 |         return False
 34 | 
 35 |     # Ported from python3-syntax:
 36 |     # leftmost, *remainder = dn.split(r'.')
 37 |     parts = dn.split(r'.')
 38 |     leftmost = parts[0]
 39 |     remainder = parts[1:]
 40 | 
 41 |     wildcards = leftmost.count('*')
 42 |     if wildcards > max_wildcards:
 43 |         # Issue #17980: avoid denials of service by refusing more
 44 |         # than one wildcard per fragment.  A survey of established
 45 |         # policy among SSL implementations showed it to be a
 46 |         # reasonable choice.
 47 |         raise CertificateError(
 48 |             "too many wildcards in certificate DNS name: " + repr(dn))
 49 | 
 50 |     # speed up common case w/o wildcards
 51 |     if not wildcards:
 52 |         return dn.lower() == hostname.lower()
 53 | 
 54 |     # RFC 6125, section 6.4.3, subitem 1.
 55 |     # The client SHOULD NOT attempt to match a presented identifier in which
 56 |     # the wildcard character comprises a label other than the left-most label.
 57 |     if leftmost == '*':
 58 |         # When '*' is a fragment by itself, it matches a non-empty dotless
 59 |         # fragment.
 60 |         pats.append('[^.]+')
 61 |     elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
 62 |         # RFC 6125, section 6.4.3, subitem 3.
 63 |         # The client SHOULD NOT attempt to match a presented identifier
 64 |         # where the wildcard character is embedded within an A-label or
 65 |         # U-label of an internationalized domain name.
 66 |         pats.append(re.escape(leftmost))
 67 |     else:
 68 |         # Otherwise, '*' matches any dotless string, e.g. www*
 69 |         pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
 70 | 
 71 |     # add the remaining fragments, ignore any wildcards
 72 |     for frag in remainder:
 73 |         pats.append(re.escape(frag))
 74 | 
 75 |     pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
 76 |     return pat.match(hostname)
 77 | 
 78 | 
 79 | def _to_unicode(obj):
 80 |     if isinstance(obj, str) and sys.version_info < (3,):
 81 |         obj = unicode(obj, encoding='ascii', errors='strict')
 82 |     return obj
 83 | 
 84 | def _ipaddress_match(ipname, host_ip):
 85 |     """Exact matching of IP addresses.
 86 | 
 87 |     RFC 6125 explicitly doesn't define an algorithm for this
 88 |     (section 1.7.2 - "Out of Scope").
 89 |     """
 90 |     # OpenSSL may add a trailing newline to a subjectAltName's IP address
 91 |     # Divergence from upstream: ipaddress can't handle byte str
 92 |     ip = ipaddress.ip_address(_to_unicode(ipname).rstrip())
 93 |     return ip == host_ip
 94 | 
 95 | 
 96 | def match_hostname(cert, hostname):
 97 |     """Verify that *cert* (in decoded format as returned by
 98 |     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
 99 |     rules are followed, but IP addresses are not accepted for *hostname*.
100 | 
101 |     CertificateError is raised on failure. On success, the function
102 |     returns nothing.
103 |     """
104 |     if not cert:
105 |         raise ValueError("empty or no certificate, match_hostname needs a "
106 |                          "SSL socket or SSL context with either "
107 |                          "CERT_OPTIONAL or CERT_REQUIRED")
108 |     try:
109 |         # Divergence from upstream: ipaddress can't handle byte str
110 |         host_ip = ipaddress.ip_address(_to_unicode(hostname))
111 |     except ValueError:
112 |         # Not an IP address (common case)
113 |         host_ip = None
114 |     except UnicodeError:
115 |         # Divergence from upstream: Have to deal with ipaddress not taking
116 |         # byte strings.  addresses should be all ascii, so we consider it not
117 |         # an ipaddress in this case
118 |         host_ip = None
119 |     except AttributeError:
120 |         # Divergence from upstream: Make ipaddress library optional
121 |         if ipaddress is None:
122 |             host_ip = None
123 |         else:
124 |             raise
125 |     dnsnames = []
126 |     san = cert.get('subjectAltName', ())
127 |     for key, value in san:
128 |         if key == 'DNS':
129 |             if host_ip is None and _dnsname_match(value, hostname):
130 |                 return
131 |             dnsnames.append(value)
132 |         elif key == 'IP Address':
133 |             if host_ip is not None and _ipaddress_match(value, host_ip):
134 |                 return
135 |             dnsnames.append(value)
136 |     if not dnsnames:
137 |         # The subject is only checked when there is no dNSName entry
138 |         # in subjectAltName
139 |         for sub in cert.get('subject', ()):
140 |             for key, value in sub:
141 |                 # XXX according to RFC 2818, the most specific Common Name
142 |                 # must be used.
143 |                 if key == 'commonName':
144 |                     if _dnsname_match(value, hostname):
145 |                         return
146 |                     dnsnames.append(value)
147 |     if len(dnsnames) > 1:
148 |         raise CertificateError("hostname %r "
149 |             "doesn't match either of %s"
150 |             % (hostname, ', '.join(map(repr, dnsnames))))
151 |     elif len(dnsnames) == 1:
152 |         raise CertificateError("hostname %r "
153 |             "doesn't match %r"
154 |             % (hostname, dnsnames[0]))
155 |     else:
156 |         raise CertificateError("no appropriate commonName or "
157 |             "subjectAltName fields were found")
158 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/request.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | from .filepost import encode_multipart_formdata
  4 | from .packages.six.moves.urllib.parse import urlencode
  5 | 
  6 | 
  7 | __all__ = ['RequestMethods']
  8 | 
  9 | 
 10 | class RequestMethods(object):
 11 |     """
 12 |     Convenience mixin for classes who implement a :meth:`urlopen` method, such
 13 |     as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
 14 |     :class:`~urllib3.poolmanager.PoolManager`.
 15 | 
 16 |     Provides behavior for making common types of HTTP request methods and
 17 |     decides which type of request field encoding to use.
 18 | 
 19 |     Specifically,
 20 | 
 21 |     :meth:`.request_encode_url` is for sending requests whose fields are
 22 |     encoded in the URL (such as GET, HEAD, DELETE).
 23 | 
 24 |     :meth:`.request_encode_body` is for sending requests whose fields are
 25 |     encoded in the *body* of the request using multipart or www-form-urlencoded
 26 |     (such as for POST, PUT, PATCH).
 27 | 
 28 |     :meth:`.request` is for making any kind of request, it will look up the
 29 |     appropriate encoding format and use one of the above two methods to make
 30 |     the request.
 31 | 
 32 |     Initializer parameters:
 33 | 
 34 |     :param headers:
 35 |         Headers to include with all requests, unless other headers are given
 36 |         explicitly.
 37 |     """
 38 | 
 39 |     _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
 40 | 
 41 |     def __init__(self, headers=None):
 42 |         self.headers = headers or {}
 43 | 
 44 |     def urlopen(self, method, url, body=None, headers=None,
 45 |                 encode_multipart=True, multipart_boundary=None,
 46 |                 **kw):  # Abstract
 47 |         raise NotImplemented("Classes extending RequestMethods must implement "
 48 |                              "their own ``urlopen`` method.")
 49 | 
 50 |     def request(self, method, url, fields=None, headers=None, **urlopen_kw):
 51 |         """
 52 |         Make a request using :meth:`urlopen` with the appropriate encoding of
 53 |         ``fields`` based on the ``method`` used.
 54 | 
 55 |         This is a convenience method that requires the least amount of manual
 56 |         effort. It can be used in most situations, while still having the
 57 |         option to drop down to more specific methods when necessary, such as
 58 |         :meth:`request_encode_url`, :meth:`request_encode_body`,
 59 |         or even the lowest level :meth:`urlopen`.
 60 |         """
 61 |         method = method.upper()
 62 | 
 63 |         if method in self._encode_url_methods:
 64 |             return self.request_encode_url(method, url, fields=fields,
 65 |                                            headers=headers,
 66 |                                            **urlopen_kw)
 67 |         else:
 68 |             return self.request_encode_body(method, url, fields=fields,
 69 |                                             headers=headers,
 70 |                                             **urlopen_kw)
 71 | 
 72 |     def request_encode_url(self, method, url, fields=None, headers=None,
 73 |                            **urlopen_kw):
 74 |         """
 75 |         Make a request using :meth:`urlopen` with the ``fields`` encoded in
 76 |         the url. This is useful for request methods like GET, HEAD, DELETE, etc.
 77 |         """
 78 |         if headers is None:
 79 |             headers = self.headers
 80 | 
 81 |         extra_kw = {'headers': headers}
 82 |         extra_kw.update(urlopen_kw)
 83 | 
 84 |         if fields:
 85 |             url += '?' + urlencode(fields)
 86 | 
 87 |         return self.urlopen(method, url, **extra_kw)
 88 | 
 89 |     def request_encode_body(self, method, url, fields=None, headers=None,
 90 |                             encode_multipart=True, multipart_boundary=None,
 91 |                             **urlopen_kw):
 92 |         """
 93 |         Make a request using :meth:`urlopen` with the ``fields`` encoded in
 94 |         the body. This is useful for request methods like POST, PUT, PATCH, etc.
 95 | 
 96 |         When ``encode_multipart=True`` (default), then
 97 |         :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
 98 |         the payload with the appropriate content type. Otherwise
 99 |         :meth:`urllib.urlencode` is used with the
100 |         'application/x-www-form-urlencoded' content type.
101 | 
102 |         Multipart encoding must be used when posting files, and it's reasonably
103 |         safe to use it in other times too. However, it may break request
104 |         signing, such as with OAuth.
105 | 
106 |         Supports an optional ``fields`` parameter of key/value strings AND
107 |         key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
108 |         the MIME type is optional. For example::
109 | 
110 |             fields = {
111 |                 'foo': 'bar',
112 |                 'fakefile': ('foofile.txt', 'contents of foofile'),
113 |                 'realfile': ('barfile.txt', open('realfile').read()),
114 |                 'typedfile': ('bazfile.bin', open('bazfile').read(),
115 |                               'image/jpeg'),
116 |                 'nonamefile': 'contents of nonamefile field',
117 |             }
118 | 
119 |         When uploading a file, providing a filename (the first parameter of the
120 |         tuple) is optional but recommended to best mimick behavior of browsers.
121 | 
122 |         Note that if ``headers`` are supplied, the 'Content-Type' header will
123 |         be overwritten because it depends on the dynamic random boundary string
124 |         which is used to compose the body of the request. The random boundary
125 |         string can be explicitly set with the ``multipart_boundary`` parameter.
126 |         """
127 |         if headers is None:
128 |             headers = self.headers
129 | 
130 |         extra_kw = {'headers': {}}
131 | 
132 |         if fields:
133 |             if 'body' in urlopen_kw:
134 |                 raise TypeError(
135 |                     "request got values for both 'fields' and 'body', can only specify one.")
136 | 
137 |             if encode_multipart:
138 |                 body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary)
139 |             else:
140 |                 body, content_type = urlencode(fields), 'application/x-www-form-urlencoded'
141 | 
142 |             extra_kw['body'] = body
143 |             extra_kw['headers'] = {'Content-Type': content_type}
144 | 
145 |         extra_kw['headers'].update(headers)
146 |         extra_kw.update(urlopen_kw)
147 | 
148 |         return self.urlopen(method, url, **extra_kw)
149 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | # For backwards compatibility, provide imports that used to be here.
 3 | from .connection import is_connection_dropped
 4 | from .request import make_headers
 5 | from .response import is_fp_closed
 6 | from .ssl_ import (
 7 |     SSLContext,
 8 |     HAS_SNI,
 9 |     IS_PYOPENSSL,
10 |     IS_SECURETRANSPORT,
11 |     assert_fingerprint,
12 |     resolve_cert_reqs,
13 |     resolve_ssl_version,
14 |     ssl_wrap_socket,
15 | )
16 | from .timeout import (
17 |     current_time,
18 |     Timeout,
19 | )
20 | 
21 | from .retry import Retry
22 | from .url import (
23 |     get_host,
24 |     parse_url,
25 |     split_first,
26 |     Url,
27 | )
28 | from .wait import (
29 |     wait_for_read,
30 |     wait_for_write
31 | )
32 | 
33 | __all__ = (
34 |     'HAS_SNI',
35 |     'IS_PYOPENSSL',
36 |     'IS_SECURETRANSPORT',
37 |     'SSLContext',
38 |     'Retry',
39 |     'Timeout',
40 |     'Url',
41 |     'assert_fingerprint',
42 |     'current_time',
43 |     'is_connection_dropped',
44 |     'is_fp_closed',
45 |     'get_host',
46 |     'parse_url',
47 |     'make_headers',
48 |     'resolve_cert_reqs',
49 |     'resolve_ssl_version',
50 |     'split_first',
51 |     'ssl_wrap_socket',
52 |     'wait_for_read',
53 |     'wait_for_write'
54 | )
55 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/connection.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import socket
  3 | from .wait import wait_for_read
  4 | from .selectors import HAS_SELECT, SelectorError
  5 | 
  6 | 
  7 | def is_connection_dropped(conn):  # Platform-specific
  8 |     """
  9 |     Returns True if the connection is dropped and should be closed.
 10 | 
 11 |     :param conn:
 12 |         :class:`httplib.HTTPConnection` object.
 13 | 
 14 |     Note: For platforms like AppEngine, this will always return ``False`` to
 15 |     let the platform handle connection recycling transparently for us.
 16 |     """
 17 |     sock = getattr(conn, 'sock', False)
 18 |     if sock is False:  # Platform-specific: AppEngine
 19 |         return False
 20 |     if sock is None:  # Connection already closed (such as by httplib).
 21 |         return True
 22 | 
 23 |     if not HAS_SELECT:
 24 |         return False
 25 | 
 26 |     try:
 27 |         return bool(wait_for_read(sock, timeout=0.0))
 28 |     except SelectorError:
 29 |         return True
 30 | 
 31 | 
 32 | # This function is copied from socket.py in the Python 2.7 standard
 33 | # library test suite. Added to its signature is only `socket_options`.
 34 | # One additional modification is that we avoid binding to IPv6 servers
 35 | # discovered in DNS if the system doesn't have IPv6 functionality.
 36 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
 37 |                       source_address=None, socket_options=None):
 38 |     """Connect to *address* and return the socket object.
 39 | 
 40 |     Convenience function.  Connect to *address* (a 2-tuple ``(host,
 41 |     port)``) and return the socket object.  Passing the optional
 42 |     *timeout* parameter will set the timeout on the socket instance
 43 |     before attempting to connect.  If no *timeout* is supplied, the
 44 |     global default timeout setting returned by :func:`getdefaulttimeout`
 45 |     is used.  If *source_address* is set it must be a tuple of (host, port)
 46 |     for the socket to bind as a source address before making the connection.
 47 |     An host of '' or port 0 tells the OS to use the default.
 48 |     """
 49 | 
 50 |     host, port = address
 51 |     if host.startswith('['):
 52 |         host = host.strip('[]')
 53 |     err = None
 54 | 
 55 |     # Using the value from allowed_gai_family() in the context of getaddrinfo lets
 56 |     # us select whether to work with IPv4 DNS records, IPv6 records, or both.
 57 |     # The original create_connection function always returns all records.
 58 |     family = allowed_gai_family()
 59 | 
 60 |     for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
 61 |         af, socktype, proto, canonname, sa = res
 62 |         sock = None
 63 |         try:
 64 |             sock = socket.socket(af, socktype, proto)
 65 | 
 66 |             # If provided, set socket level options before connecting.
 67 |             _set_socket_options(sock, socket_options)
 68 | 
 69 |             if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 70 |                 sock.settimeout(timeout)
 71 |             if source_address:
 72 |                 sock.bind(source_address)
 73 |             sock.connect(sa)
 74 |             return sock
 75 | 
 76 |         except socket.error as e:
 77 |             err = e
 78 |             if sock is not None:
 79 |                 sock.close()
 80 |                 sock = None
 81 | 
 82 |     if err is not None:
 83 |         raise err
 84 | 
 85 |     raise socket.error("getaddrinfo returns an empty list")
 86 | 
 87 | 
 88 | def _set_socket_options(sock, options):
 89 |     if options is None:
 90 |         return
 91 | 
 92 |     for opt in options:
 93 |         sock.setsockopt(*opt)
 94 | 
 95 | 
 96 | def allowed_gai_family():
 97 |     """This function is designed to work in the context of
 98 |     getaddrinfo, where family=socket.AF_UNSPEC is the default and
 99 |     will perform a DNS search for both IPv6 and IPv4 records."""
100 | 
101 |     family = socket.AF_INET
102 |     if HAS_IPV6:
103 |         family = socket.AF_UNSPEC
104 |     return family
105 | 
106 | 
107 | def _has_ipv6(host):
108 |     """ Returns True if the system can bind an IPv6 address. """
109 |     sock = None
110 |     has_ipv6 = False
111 | 
112 |     if socket.has_ipv6:
113 |         # has_ipv6 returns true if cPython was compiled with IPv6 support.
114 |         # It does not tell us if the system has IPv6 support enabled. To
115 |         # determine that we must bind to an IPv6 address.
116 |         # https://github.com/shazow/urllib3/pull/611
117 |         # https://bugs.python.org/issue658327
118 |         try:
119 |             sock = socket.socket(socket.AF_INET6)
120 |             sock.bind((host, 0))
121 |             has_ipv6 = True
122 |         except Exception:
123 |             pass
124 | 
125 |     if sock:
126 |         sock.close()
127 |     return has_ipv6
128 | 
129 | 
130 | HAS_IPV6 = _has_ipv6('::1')
131 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/request.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from base64 import b64encode
  3 | 
  4 | from ..packages.six import b, integer_types
  5 | from ..exceptions import UnrewindableBodyError
  6 | 
  7 | ACCEPT_ENCODING = 'gzip,deflate'
  8 | _FAILEDTELL = object()
  9 | 
 10 | 
 11 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
 12 |                  basic_auth=None, proxy_basic_auth=None, disable_cache=None):
 13 |     """
 14 |     Shortcuts for generating request headers.
 15 | 
 16 |     :param keep_alive:
 17 |         If ``True``, adds 'connection: keep-alive' header.
 18 | 
 19 |     :param accept_encoding:
 20 |         Can be a boolean, list, or string.
 21 |         ``True`` translates to 'gzip,deflate'.
 22 |         List will get joined by comma.
 23 |         String will be used as provided.
 24 | 
 25 |     :param user_agent:
 26 |         String representing the user-agent you want, such as
 27 |         "python-urllib3/0.6"
 28 | 
 29 |     :param basic_auth:
 30 |         Colon-separated username:password string for 'authorization: basic ...'
 31 |         auth header.
 32 | 
 33 |     :param proxy_basic_auth:
 34 |         Colon-separated username:password string for 'proxy-authorization: basic ...'
 35 |         auth header.
 36 | 
 37 |     :param disable_cache:
 38 |         If ``True``, adds 'cache-control: no-cache' header.
 39 | 
 40 |     Example::
 41 | 
 42 |         >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
 43 |         {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
 44 |         >>> make_headers(accept_encoding=True)
 45 |         {'accept-encoding': 'gzip,deflate'}
 46 |     """
 47 |     headers = {}
 48 |     if accept_encoding:
 49 |         if isinstance(accept_encoding, str):
 50 |             pass
 51 |         elif isinstance(accept_encoding, list):
 52 |             accept_encoding = ','.join(accept_encoding)
 53 |         else:
 54 |             accept_encoding = ACCEPT_ENCODING
 55 |         headers['accept-encoding'] = accept_encoding
 56 | 
 57 |     if user_agent:
 58 |         headers['user-agent'] = user_agent
 59 | 
 60 |     if keep_alive:
 61 |         headers['connection'] = 'keep-alive'
 62 | 
 63 |     if basic_auth:
 64 |         headers['authorization'] = 'Basic ' + \
 65 |             b64encode(b(basic_auth)).decode('utf-8')
 66 | 
 67 |     if proxy_basic_auth:
 68 |         headers['proxy-authorization'] = 'Basic ' + \
 69 |             b64encode(b(proxy_basic_auth)).decode('utf-8')
 70 | 
 71 |     if disable_cache:
 72 |         headers['cache-control'] = 'no-cache'
 73 | 
 74 |     return headers
 75 | 
 76 | 
 77 | def set_file_position(body, pos):
 78 |     """
 79 |     If a position is provided, move file to that point.
 80 |     Otherwise, we'll attempt to record a position for future use.
 81 |     """
 82 |     if pos is not None:
 83 |         rewind_body(body, pos)
 84 |     elif getattr(body, 'tell', None) is not None:
 85 |         try:
 86 |             pos = body.tell()
 87 |         except (IOError, OSError):
 88 |             # This differentiates from None, allowing us to catch
 89 |             # a failed `tell()` later when trying to rewind the body.
 90 |             pos = _FAILEDTELL
 91 | 
 92 |     return pos
 93 | 
 94 | 
 95 | def rewind_body(body, body_pos):
 96 |     """
 97 |     Attempt to rewind body to a certain position.
 98 |     Primarily used for request redirects and retries.
 99 | 
100 |     :param body:
101 |         File-like object that supports seek.
102 | 
103 |     :param int pos:
104 |         Position to seek to in file.
105 |     """
106 |     body_seek = getattr(body, 'seek', None)
107 |     if body_seek is not None and isinstance(body_pos, integer_types):
108 |         try:
109 |             body_seek(body_pos)
110 |         except (IOError, OSError):
111 |             raise UnrewindableBodyError("An error occurred when rewinding request "
112 |                                         "body for redirect/retry.")
113 |     elif body_pos is _FAILEDTELL:
114 |         raise UnrewindableBodyError("Unable to record file position for rewinding "
115 |                                     "request body during a redirect/retry.")
116 |     else:
117 |         raise ValueError("body_pos must be of type integer, "
118 |                          "instead it was %s." % type(body_pos))
119 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/response.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from ..packages.six.moves import http_client as httplib
 3 | 
 4 | from ..exceptions import HeaderParsingError
 5 | 
 6 | 
 7 | def is_fp_closed(obj):
 8 |     """
 9 |     Checks whether a given file-like object is closed.
10 | 
11 |     :param obj:
12 |         The file-like object to check.
13 |     """
14 | 
15 |     try:
16 |         # Check `isclosed()` first, in case Python3 doesn't set `closed`.
17 |         # GH Issue #928
18 |         return obj.isclosed()
19 |     except AttributeError:
20 |         pass
21 | 
22 |     try:
23 |         # Check via the official file-like-object way.
24 |         return obj.closed
25 |     except AttributeError:
26 |         pass
27 | 
28 |     try:
29 |         # Check if the object is a container for another file-like object that
30 |         # gets released on exhaustion (e.g. HTTPResponse).
31 |         return obj.fp is None
32 |     except AttributeError:
33 |         pass
34 | 
35 |     raise ValueError("Unable to determine whether fp is closed.")
36 | 
37 | 
38 | def assert_header_parsing(headers):
39 |     """
40 |     Asserts whether all headers have been successfully parsed.
41 |     Extracts encountered errors from the result of parsing headers.
42 | 
43 |     Only works on Python 3.
44 | 
45 |     :param headers: Headers to verify.
46 |     :type headers: `httplib.HTTPMessage`.
47 | 
48 |     :raises urllib3.exceptions.HeaderParsingError:
49 |         If parsing errors are found.
50 |     """
51 | 
52 |     # This will fail silently if we pass in the wrong kind of parameter.
53 |     # To make debugging easier add an explicit check.
54 |     if not isinstance(headers, httplib.HTTPMessage):
55 |         raise TypeError('expected httplib.Message, got {0}.'.format(
56 |             type(headers)))
57 | 
58 |     defects = getattr(headers, 'defects', None)
59 |     get_payload = getattr(headers, 'get_payload', None)
60 | 
61 |     unparsed_data = None
62 |     if get_payload:  # Platform-specific: Python 3.
63 |         unparsed_data = get_payload()
64 | 
65 |     if defects or unparsed_data:
66 |         raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
67 | 
68 | 
69 | def is_response_to_head(response):
70 |     """
71 |     Checks whether the request of a response has been a HEAD-request.
72 |     Handles the quirks of AppEngine.
73 | 
74 |     :param conn:
75 |     :type conn: :class:`httplib.HTTPResponse`
76 |     """
77 |     # FIXME: Can we do this somehow without accessing private httplib _method?
78 |     method = response._method
79 |     if isinstance(method, int):  # Platform-specific: Appengine
80 |         return method == 3
81 |     return method.upper() == 'HEAD'
82 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/url.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from collections import namedtuple
  3 | 
  4 | from ..exceptions import LocationParseError
  5 | 
  6 | 
  7 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
  8 | 
  9 | # We only want to normalize urls with an HTTP(S) scheme.
 10 | # urllib3 infers URLs without a scheme (None) to be http.
 11 | NORMALIZABLE_SCHEMES = ('http', 'https', None)
 12 | 
 13 | 
 14 | class Url(namedtuple('Url', url_attrs)):
 15 |     """
 16 |     Datastructure for representing an HTTP URL. Used as a return value for
 17 |     :func:`parse_url`. Both the scheme and host are normalized as they are
 18 |     both case-insensitive according to RFC 3986.
 19 |     """
 20 |     __slots__ = ()
 21 | 
 22 |     def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
 23 |                 query=None, fragment=None):
 24 |         if path and not path.startswith('/'):
 25 |             path = '/' + path
 26 |         if scheme:
 27 |             scheme = scheme.lower()
 28 |         if host and scheme in NORMALIZABLE_SCHEMES:
 29 |             host = host.lower()
 30 |         return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
 31 |                                        query, fragment)
 32 | 
 33 |     @property
 34 |     def hostname(self):
 35 |         """For backwards-compatibility with urlparse. We're nice like that."""
 36 |         return self.host
 37 | 
 38 |     @property
 39 |     def request_uri(self):
 40 |         """Absolute path including the query string."""
 41 |         uri = self.path or '/'
 42 | 
 43 |         if self.query is not None:
 44 |             uri += '?' + self.query
 45 | 
 46 |         return uri
 47 | 
 48 |     @property
 49 |     def netloc(self):
 50 |         """Network location including host and port"""
 51 |         if self.port:
 52 |             return '%s:%d' % (self.host, self.port)
 53 |         return self.host
 54 | 
 55 |     @property
 56 |     def url(self):
 57 |         """
 58 |         Convert self into a url
 59 | 
 60 |         This function should more or less round-trip with :func:`.parse_url`. The
 61 |         returned url may not be exactly the same as the url inputted to
 62 |         :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
 63 |         with a blank port will have : removed).
 64 | 
 65 |         Example: ::
 66 | 
 67 |             >>> U = parse_url('http://google.com/mail/')
 68 |             >>> U.url
 69 |             'http://google.com/mail/'
 70 |             >>> Url('http', 'username:password', 'host.com', 80,
 71 |             ... '/path', 'query', 'fragment').url
 72 |             'http://username:password@host.com:80/path?query#fragment'
 73 |         """
 74 |         scheme, auth, host, port, path, query, fragment = self
 75 |         url = ''
 76 | 
 77 |         # We use "is not None" we want things to happen with empty strings (or 0 port)
 78 |         if scheme is not None:
 79 |             url += scheme + '://'
 80 |         if auth is not None:
 81 |             url += auth + '@'
 82 |         if host is not None:
 83 |             url += host
 84 |         if port is not None:
 85 |             url += ':' + str(port)
 86 |         if path is not None:
 87 |             url += path
 88 |         if query is not None:
 89 |             url += '?' + query
 90 |         if fragment is not None:
 91 |             url += '#' + fragment
 92 | 
 93 |         return url
 94 | 
 95 |     def __str__(self):
 96 |         return self.url
 97 | 
 98 | 
 99 | def split_first(s, delims):
100 |     """
101 |     Given a string and an iterable of delimiters, split on the first found
102 |     delimiter. Return two split parts and the matched delimiter.
103 | 
104 |     If not found, then the first part is the full input string.
105 | 
106 |     Example::
107 | 
108 |         >>> split_first('foo/bar?baz', '?/=')
109 |         ('foo', 'bar?baz', '/')
110 |         >>> split_first('foo/bar?baz', '123')
111 |         ('foo/bar?baz', '', None)
112 | 
113 |     Scales linearly with number of delims. Not ideal for large number of delims.
114 |     """
115 |     min_idx = None
116 |     min_delim = None
117 |     for d in delims:
118 |         idx = s.find(d)
119 |         if idx < 0:
120 |             continue
121 | 
122 |         if min_idx is None or idx < min_idx:
123 |             min_idx = idx
124 |             min_delim = d
125 | 
126 |     if min_idx is None or min_idx < 0:
127 |         return s, '', None
128 | 
129 |     return s[:min_idx], s[min_idx + 1:], min_delim
130 | 
131 | 
132 | def parse_url(url):
133 |     """
134 |     Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
135 |     performed to parse incomplete urls. Fields not provided will be None.
136 | 
137 |     Partly backwards-compatible with :mod:`urlparse`.
138 | 
139 |     Example::
140 | 
141 |         >>> parse_url('http://google.com/mail/')
142 |         Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
143 |         >>> parse_url('google.com:80')
144 |         Url(scheme=None, host='google.com', port=80, path=None, ...)
145 |         >>> parse_url('/foo?bar')
146 |         Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
147 |     """
148 | 
149 |     # While this code has overlap with stdlib's urlparse, it is much
150 |     # simplified for our needs and less annoying.
151 |     # Additionally, this implementations does silly things to be optimal
152 |     # on CPython.
153 | 
154 |     if not url:
155 |         # Empty
156 |         return Url()
157 | 
158 |     scheme = None
159 |     auth = None
160 |     host = None
161 |     port = None
162 |     path = None
163 |     fragment = None
164 |     query = None
165 | 
166 |     # Scheme
167 |     if '://' in url:
168 |         scheme, url = url.split('://', 1)
169 | 
170 |     # Find the earliest Authority Terminator
171 |     # (http://tools.ietf.org/html/rfc3986#section-3.2)
172 |     url, path_, delim = split_first(url, ['/', '?', '#'])
173 | 
174 |     if delim:
175 |         # Reassemble the path
176 |         path = delim + path_
177 | 
178 |     # Auth
179 |     if '@' in url:
180 |         # Last '@' denotes end of auth part
181 |         auth, url = url.rsplit('@', 1)
182 | 
183 |     # IPv6
184 |     if url and url[0] == '[':
185 |         host, url = url.split(']', 1)
186 |         host += ']'
187 | 
188 |     # Port
189 |     if ':' in url:
190 |         _host, port = url.split(':', 1)
191 | 
192 |         if not host:
193 |             host = _host
194 | 
195 |         if port:
196 |             # If given, ports must be integers. No whitespace, no plus or
197 |             # minus prefixes, no non-integer digits such as ^2 (superscript).
198 |             if not port.isdigit():
199 |                 raise LocationParseError(url)
200 |             try:
201 |                 port = int(port)
202 |             except ValueError:
203 |                 raise LocationParseError(url)
204 |         else:
205 |             # Blank ports are cool, too. (rfc3986#section-3.2.3)
206 |             port = None
207 | 
208 |     elif not host and url:
209 |         host = url
210 | 
211 |     if not path:
212 |         return Url(scheme, auth, host, port, path, query, fragment)
213 | 
214 |     # Fragment
215 |     if '#' in path:
216 |         path, fragment = path.split('#', 1)
217 | 
218 |     # Query
219 |     if '?' in path:
220 |         path, query = path.split('?', 1)
221 | 
222 |     return Url(scheme, auth, host, port, path, query, fragment)
223 | 
224 | 
225 | def get_host(url):
226 |     """
227 |     Deprecated. Use :func:`parse_url` instead.
228 |     """
229 |     p = parse_url(url)
230 |     return p.scheme or 'http', p.hostname, p.port
231 | 


--------------------------------------------------------------------------------
/thirdparty/connection/urllib3/util/wait.py:
--------------------------------------------------------------------------------
 1 | from .selectors import (
 2 |     HAS_SELECT,
 3 |     DefaultSelector,
 4 |     EVENT_READ,
 5 |     EVENT_WRITE
 6 | )
 7 | 
 8 | 
 9 | def _wait_for_io_events(socks, events, timeout=None):
10 |     """ Waits for IO events to be available from a list of sockets
11 |     or optionally a single socket if passed in. Returns a list of
12 |     sockets that can be interacted with immediately. """
13 |     if not HAS_SELECT:
14 |         raise ValueError('Platform does not have a selector')
15 |     if not isinstance(socks, list):
16 |         # Probably just a single socket.
17 |         if hasattr(socks, "fileno"):
18 |             socks = [socks]
19 |         # Otherwise it might be a non-list iterable.
20 |         else:
21 |             socks = list(socks)
22 |     with DefaultSelector() as selector:
23 |         for sock in socks:
24 |             selector.register(sock, events)
25 |         return [key[0].fileobj for key in
26 |                 selector.select(timeout) if key[1] & events]
27 | 
28 | 
29 | def wait_for_read(socks, timeout=None):
30 |     """ Waits for reading to be available from a list of sockets
31 |     or optionally a single socket if passed in. Returns a list of
32 |     sockets that can be read from immediately. """
33 |     return _wait_for_io_events(socks, EVENT_READ, timeout)
34 | 
35 | 
36 | def wait_for_write(socks, timeout=None):
37 |     """ Waits for writing to be available from a list of sockets
38 |     or optionally a single socket if passed in. Returns a list of
39 |     sockets that can be written to immediately. """
40 |     return _wait_for_io_events(socks, EVENT_WRITE, timeout)
41 | 


--------------------------------------------------------------------------------