├── README.md ├── common.py ├── config.py ├── controller.py ├── dict ├── configfile.lst ├── dependents.lst ├── directory.lst ├── directory_common.lst ├── filename.lst ├── package_ext.lst └── tmpfile_ext.lst ├── getlinks.py ├── libs ├── .tld_set ├── FuzzUrlGenerator.py ├── GenerateDict.py ├── GetAllLink.py ├── HttpFuzzEnginer.py ├── UrlSplitParser.py ├── __init__.py ├── requests │ ├── __init__.py │ ├── adapters.py │ ├── api.py │ ├── auth.py │ ├── cacert.pem │ ├── certs.py │ ├── compat.py │ ├── cookies.py │ ├── exceptions.py │ ├── hooks.py │ ├── models.py │ ├── packages │ │ ├── __init__.py │ │ ├── chardet │ │ │ ├── __init__.py │ │ │ ├── big5freq.py │ │ │ ├── big5prober.py │ │ │ ├── chardetect.py │ │ │ ├── chardistribution.py │ │ │ ├── charsetgroupprober.py │ │ │ ├── charsetprober.py │ │ │ ├── codingstatemachine.py │ │ │ ├── compat.py │ │ │ ├── constants.py │ │ │ ├── cp949prober.py │ │ │ ├── escprober.py │ │ │ ├── escsm.py │ │ │ ├── eucjpprober.py │ │ │ ├── euckrfreq.py │ │ │ ├── euckrprober.py │ │ │ ├── euctwfreq.py │ │ │ ├── euctwprober.py │ │ │ ├── gb2312freq.py │ │ │ ├── gb2312prober.py │ │ │ ├── hebrewprober.py │ │ │ ├── jisfreq.py │ │ │ ├── jpcntx.py │ │ │ ├── langbulgarianmodel.py │ │ │ ├── langcyrillicmodel.py │ │ │ ├── langgreekmodel.py │ │ │ ├── langhebrewmodel.py │ │ │ ├── langhungarianmodel.py │ │ │ ├── langthaimodel.py │ │ │ ├── latin1prober.py │ │ │ ├── mbcharsetprober.py │ │ │ ├── mbcsgroupprober.py │ │ │ ├── mbcssm.py │ │ │ ├── sbcharsetprober.py │ │ │ ├── sbcsgroupprober.py │ │ │ ├── sjisprober.py │ │ │ ├── universaldetector.py │ │ │ └── utf8prober.py │ │ └── urllib3 │ │ │ ├── __init__.py │ │ │ ├── _collections.py │ │ │ ├── connection.py │ │ │ ├── connectionpool.py │ │ │ ├── contrib │ │ │ ├── __init__.py │ │ │ ├── ntlmpool.py │ │ │ └── pyopenssl.py │ │ │ ├── exceptions.py │ │ │ ├── fields.py │ │ │ ├── filepost.py │ │ │ ├── packages │ │ │ ├── __init__.py │ │ │ ├── ordered_dict.py │ │ │ ├── six.py │ │ │ └── ssl_match_hostname │ │ │ │ ├── __init__.py │ │ │ │ └── _implementation.py │ │ │ ├── poolmanager.py │ │ │ ├── request.py │ │ │ ├── response.py │ │ │ └── util │ │ │ ├── __init__.py │ │ │ ├── connection.py │ │ │ ├── request.py │ │ │ ├── response.py │ │ │ ├── retry.py │ │ │ ├── ssl_.py │ │ │ ├── timeout.py │ │ │ └── url.py │ ├── sessions.py │ ├── status_codes.py │ ├── structures.py │ └── utils.py ├── tldextract.py ├── utils │ ├── FileUtils.py │ ├── Queue.py │ ├── __init__.py │ └── exrex.py └── wyparser.py ├── requirements.txt └── wyspider.py /README.md: -------------------------------------------------------------------------------- 1 | # weakfilescan 2 | 基于爬虫,动态收集扫描目标相关信息后进行二次整理形成字典规则,利用动态规则的多线程敏感信息泄露检测工具,支持多种个性化定制选项,包括: 3 | * 规则字典多样化定义(支持正则、整数、字符、日期) 4 | * 扫描域名策略(域名全称、主域名、域名的名字) 5 | * 自定义HTTP状态码 6 | * 支持动态配置HTTP脚本扩展名 7 | * 自定义判断文件是否存在正则 8 | * 返回结果集误报清洗选项 9 | * HTTPS服务器证书校验 10 | * 线程数定义 11 | * HTTP请求超时时间 12 | * 是否允许URL重定向 13 | * 是否开启Session支持,在发出的所有请求之间保持cookies 14 | * 是否允许随机User-Agent 15 | * 是否允许随机X-Forwarded-For 16 | * 动态代理列表配置(支持TOR) 17 | * HTTP头自定义 18 | 19 | 更多使用详情参照 [/config.py](https://github.com/ring04h/weakfilescan/blob/master/config.py) 20 | 21 | # 快速开始 22 | ``` shell 23 | python wyspider.py http://wuyun.org php 24 | ``` 25 | 26 | # 字典支持规则 27 | ## 规则使用简介 28 | 在字典中使用规则引擎,必须以 **{** 括号开头,并以 **}$** 结尾,类型后面跟的 **#** 代表生成数据的长度,**$** 代表单步值,开始-结束,数据的起始区间设置。 29 | ``` 30 | {规则=类型#长度$step:开始-结束}$ 31 | ``` 32 | | 规则 | 说明 | 33 | | :-------- |:--------| 34 | | re | 正则引擎 | 35 | | int | 整数 | 36 | | str | 字符 | 37 | | date | 日期 | 38 | 39 | 正则引擎类型 40 | ------------ 41 | 使用实例 42 | {re=引擎名称:正则表达式}$ 43 | ``` python 44 | {re=exrex:[0-9]}$ 45 | [u'0', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9'] 46 | {re=exrex:[aA]dmin[1-5]}$ 47 | [u'admin1', u'admin2', u'admin3', u'admin4', u'admin5', u'Admin1', u'Admin2', u'Admin3', u'Admin4', u'Admin5'] 48 | ``` 49 | 50 | 整数类规则 51 | ------------ 52 | | 类型 | 使用实例 | 53 | | :-------- |:--------| 54 | | 顺序递进 处理step | {int=series$单步值:开始数字-结束数字}$ | 55 | ``` python 56 | {int=series$2:0-10}$ 57 | [0, 2, 4, 6, 8, 10] 58 | ``` 59 | 60 | | 类型 | 使用实例 | 61 | | :-------- |:--------| 62 | | 连号数字 | {int=digits#长度:开始数字-结束数字}$ | 63 | ``` python 64 | {int=digits#3:0-9}$ 65 | [123, 234, 345, 456, 567, 678, 789] 66 | ``` 67 | 68 | | 类型 | 使用实例 | 69 | | :-------- |:--------| 70 | | 重叠数字 | {int=overlap#长度:开始数字-结束数字}$ | 71 | ``` python 72 | {int=overlap#4:0-9}$ 73 | [1111, 2222, 3333, 4444, 5555, 6666, 7777, 8888, 9999] 74 | ``` 75 | 76 | 字符类规则 77 | ------------ 78 | | 类型 | 使用实例 | 79 | | :-------- |:--------| 80 | | 顺序递进 处理step | {str=letters#长度:开始字符-结束字符}$ | 81 | ``` python 82 | {str=letters#3:a-g}$ 83 | ['abc', 'bcd', 'cde', 'def', 'efg'] 84 | ``` 85 | 86 | | 类型 | 使用实例 | 87 | | :-------- |:--------| 88 | | 重叠字母 | {str=overlap#长度:开始字符-结束字符}$ | 89 | ``` python 90 | {str=overlap#4:a-g}$ 91 | ['aaaa', 'bbbb', 'cccc', 'dddd', 'eeee', 'ffff', 'gggg'] 92 | ``` 93 | 94 | 日期类规则 95 | ------------ 96 | | 类型 | 使用实例 | 97 | | :-------- |:--------| 98 | | 年 | {date=year:开始年份-结束年份}$ | 99 | ``` python 100 | {date=year:2010-2015}$ 101 | [2010, 2011, 2012, 2013, 2014, 2015] 102 | ``` 103 | 104 | | 类型 | 使用实例 | 105 | | :-------- |:--------| 106 | | 月 | {date=mon:开始月份-结束月份}$ | 107 | ``` python 108 | {date=mon:01-12}$ 109 | [1, 01, 2, 02, 3, 03, ‘...’, 9, 09] 110 | ``` 111 | 112 | | 类型 | 使用实例 | 113 | | :-------- |:--------| 114 | | 日 | {date=day:开始日-结束日}$ | 115 | ``` python 116 | {date=day:01-31}$ 117 | [1, 01, 2, 02, 3, 03, 4, 04, 5, 05, ‘...’, 31] 118 | ``` 119 | 120 | | 类型 | 使用实例 | 121 | | :-------- |:--------| 122 | | 年月 | {date=year_mon:开始年月-结束年月}$ | 123 | ``` python 124 | {date=year_mon:201501-201504}$ 125 | [201501, 20151, 201502, 20152, ‘...’, 201504] 126 | ``` 127 | 128 | | 类型 | 使用实例 | 129 | | :-------- |:--------| 130 | | 月日 | {date=mon_day:开始月日-结束月日}$ | 131 | ``` python 132 | {date=mon_day:0501-0531}$ 133 | [0501, 51, 0502, 52, 0506, 56, 0511, 511, ‘...’, 0530,530] 134 | ``` 135 | 136 | | 类型 | 使用实例 | 137 | | :-------- |:--------| 138 | | 年月日 | {date=year_mon_day:开始年月日-结束年月日}$ | 139 | ``` python 140 | {date=year_mon_day:20150101-20150401}$ 141 | [20150101, 201511, 20150112, 2015112, ‘...’, 20150401] 142 | ``` 143 | 144 | | 类型 | 使用实例 | 145 | | :-------- |:--------| 146 | | 月日年 | {date=mon_day_year:开始月日年-结束月日年}$ | 147 | ``` python 148 | {date=mon_day_year:01012015-04012015}$ 149 | [01012015, 112015, 01122015, 1122015, ‘...’, 04012015] 150 | ``` 151 | 152 | ## 安装 153 | #### CentOS 6.* 7.* Linux 154 | 安装 setuptools, pip 155 | ``` shell 156 | wget https://bootstrap.pypa.io/ez_setup.py -O - | python 157 | wget https://pypi.python.org/packages/source/p/pip/pip-6.0.8.tar.gz 158 | tar zvxf pip-6.0.8.tar.gz 159 | cd pip-6.0.8 160 | python setup.py install 161 | ``` 162 | 安装 lxml解析器 & beautifulsoup4 163 | ``` shell 164 | yum install python-devel libxml2-devel libxslt-devel 165 | pip install lxml beautifulsoup4 166 | ``` -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 全局函数文件 3 | # email: ringzero@0x557.org 4 | 5 | from config import * 6 | import re 7 | import urlparse 8 | import threading 9 | from bs4 import BeautifulSoup 10 | from libs.tldextract import extract, TLDExtract 11 | import libs.requests as requests 12 | import libs.requests as __requests__ 13 | from libs.FuzzUrlGenerator import UrlGenerator 14 | from libs.UrlSplitParser import UrlSplitParser 15 | 16 | if allow_http_session: 17 | requests = requests.Session() 18 | 19 | def get_basedomain(url): 20 | try: 21 | if basedomain == 1: 22 | return urlparse.urlparse(url).netloc 23 | elif basedomain == 2: 24 | return extract(url).registered_domain 25 | elif basedomain == 3: 26 | return extract(url).domain # 更加有关联性的处理方法 27 | except Exception, e: 28 | pass 29 | 30 | def get_baseurl(link): 31 | netloc = urlparse.urlparse(link).netloc 32 | if netloc: 33 | split_url = link.split(netloc) 34 | baseurl = '%s%s' % (split_url[0], netloc) 35 | return baseurl 36 | 37 | def http_request_get(url, body_content_workflow=False, allow_redirects=allow_redirects): 38 | try: 39 | result = requests.get(url, 40 | stream=body_content_workflow, 41 | headers=headers, 42 | timeout=timeout, 43 | proxies=proxies, 44 | allow_redirects=allow_redirects, 45 | verify=allow_ssl_verify) 46 | return result 47 | except Exception, e: 48 | # 返回空的requests结果对象 49 | return __requests__.models.Response() 50 | 51 | def http_request_post(url, payload, body_content_workflow=False, allow_redirects=allow_redirects): 52 | """ 53 | payload = {'key1': 'value1', 'key2': 'value2'} 54 | """ 55 | try: 56 | result = requests.post(url, 57 | data=payload, 58 | headers=headers, 59 | stream=body_content_workflow, 60 | timeout=timeout, 61 | proxies=proxies, 62 | allow_redirects=allow_redirects, 63 | verify=allow_ssl_verify) 64 | return result 65 | except Exception, e: 66 | # 返回空的requests结果对象 67 | return __requests__.models.Response() 68 | 69 | def checksite_isalive(siteurl): 70 | result = http_request_get(siteurl, allow_redirects=False) 71 | if result.status_code: # 存在状态码 72 | return True 73 | else: 74 | return False 75 | 76 | def checksite_possibility(siteurl): # 检查可能性 77 | temp_weburls = [ 78 | '/ea63a430b109194d/', 79 | '/ea63a430b109194d1/', 80 | '/ea63a430b109194d.'+default_extion, 81 | '/ea63a430b109194d1.'+default_extion, 82 | ] 83 | 84 | req_result = {} 85 | for tempurl in temp_weburls: 86 | httpres = http_request_get(siteurl.rstrip('/')+tempurl) 87 | is_redirect = True if len(httpres.history) > 0 else False 88 | req_result[tempurl] = { 89 | 'status_code' : httpres.status_code, 90 | 'is_redirect' : is_redirect, 91 | 'text' : httpres.text, 92 | 'history' : httpres.history, 93 | 'request' : httpres.url, 94 | 'text_size' : len(httpres.text), 95 | } 96 | 97 | possibility = 100 98 | refer_to_val = 0 99 | regex = re.compile(page_not_found_reg) 100 | 101 | dir1 = temp_weburls[0] 102 | dir2 = temp_weburls[1] 103 | file1 = temp_weburls[2] 104 | file2 = temp_weburls[3] 105 | 106 | # 分析状态判断结果 107 | if req_result[dir1]['status_code'] != 404 and req_result[dir2]['status_code'] != 404: 108 | possibility -= 10 # print '返回状态不等于404' 109 | if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']): 110 | possibility -= 10 # print '文件和目录错误页面都没有状态标示' 111 | else: 112 | refer_to_val += 50 # print '有特征码可参考' 113 | if req_result[dir1]['text_size'] != req_result[dir2]['text_size']: 114 | possibility -= 10 # print '返回的结果大小不一样' 115 | if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']: 116 | possibility -= 10 # 请求的文件名存在于返回内容当中 117 | 118 | if req_result[dir1]['request'] == req_result[dir2]['request']: 119 | possibility -= 10 # 返回的请求url结果一样 120 | 121 | if req_result[file1]['status_code'] != 404 and req_result[file2]['status_code'] != 404: 122 | possibility -= 10 # print '返回状态不等于404' 123 | if not regex.findall(req_result[dir1]['text']) and not regex.findall(req_result[file1]['text']): 124 | possibility -= 10 # print '文件和目录错误页面都没有状态标示' 125 | else: 126 | refer_to_val += 50 # print '有特征码可参考' 127 | if req_result[file1]['text_size'] != req_result[file2]['text_size']: 128 | possibility -= 10 # print '返回的结果大小不一样' 129 | if dir1 in req_result[dir1]['text'] and file1 in req_result[file1]['text']: 130 | possibility -= 10 # 请求的文件名存在于返回内容当中 131 | 132 | if req_result[file1]['request'] == req_result[file2]['request']: 133 | possibility -= 10 # 返回的请求url结果一样 134 | 135 | if refer_to_val < 50 and possibility < 65: 136 | return {'considered':False, 'possibility':possibility, 'refer_to_val':refer_to_val} 137 | else: 138 | return {'considered':True, 'possibility':possibility, 'refer_to_val':refer_to_val} 139 | 140 | def get_segments(url): 141 | url_webdirs = [] 142 | parser_obj = UrlSplitParser(urlparse.urlparse(url)) 143 | for segment in parser_obj.get_paths()['segment']: 144 | url_webdirs.append(parser_obj.baseurl + segment) 145 | return url_webdirs 146 | 147 | def get_first_segment(url): 148 | pathobj = urlparse.urlparse(url) 149 | path = pathobj.path.replace('//','/') 150 | if len(path.split('/')) < 3: 151 | return '/' 152 | # return pathobj.scheme+'://'+pathobj.netloc+'/' 153 | else: 154 | segment = path.split('/')[1] 155 | return '/'+segment+'/' 156 | # return pathobj.scheme+'://'+pathobj.netloc+'/'+segment+'/' 157 | 158 | class LinksParser(object): 159 | """docstring for link_parser""" 160 | def __init__(self, html_content): 161 | super(LinksParser, self).__init__() 162 | self.html_content = html_content 163 | self.url_links = { 164 | 'a':[], 165 | 'link':[], 166 | 'img':[], 167 | 'script':[] 168 | } 169 | self.url = self.html_content.url 170 | self.baseurl = get_baseurl(self.url) 171 | self.soup = BeautifulSoup(self.html_content.text, 'lxml') 172 | 173 | def complet_url(self, link): 174 | if link.startswith('/') or link.startswith('.'): 175 | return urlparse.urljoin(self.baseurl, link) 176 | elif link.startswith('http') or link.startswith('https'): 177 | return link 178 | elif link.startswith('#'): # 为了兼容某些变态的URI模式 179 | return urlparse.urljoin(self.url, link) 180 | else: 181 | return False 182 | 183 | def getall(self): 184 | self.get_tag_a() 185 | self.get_tag_link() 186 | self.get_tag_img() 187 | self.get_tag_script() 188 | # links 去重 189 | for child in self.url_links.keys(): 190 | self.url_links[child] = list(set(self.url_links[child])) 191 | return {self.url : self.url_links} 192 | 193 | def get_tag_a(self): 194 | # 处理A链接 195 | for tag in self.soup.find_all('a'): 196 | if tag.attrs.has_key('href'): 197 | link = tag.attrs['href'] 198 | # link = urlparse.urldefrag(tag.attrs['href'])[0] # 处理掉#tag标签信息 199 | complet_link = self.complet_url(link.strip()) 200 | if complet_link: 201 | self.url_links['a'].append(complet_link) 202 | return self.url_links 203 | 204 | def get_tag_link(self): 205 | # 处理link链接资源 206 | for tag in self.soup.find_all('link'): 207 | if tag.attrs.has_key('href'): 208 | link = tag.attrs['href'] 209 | complet_link = self.complet_url(link.strip()) 210 | if complet_link: 211 | self.url_links['link'].append(complet_link) 212 | return self.url_links 213 | 214 | def get_tag_img(self): 215 | # 处理img链接资源 216 | for tag in self.soup.find_all('img'): 217 | if tag.attrs.has_key('src'): 218 | link = tag.attrs['src'] 219 | complet_link = self.complet_url(link.strip()) 220 | if complet_link: 221 | self.url_links['img'].append(complet_link) 222 | return self.url_links 223 | 224 | def get_tag_script(self): 225 | # 处理script链接资源 226 | for tag in self.soup.find_all('script'): 227 | if tag.attrs.has_key('src'): 228 | link = tag.attrs['src'] 229 | complet_link = self.complet_url(link.strip()) 230 | if complet_link: 231 | self.url_links['script'].append(complet_link) 232 | return self.url_links 233 | 234 | 235 | 236 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 全局配置文件 3 | 4 | import sys 5 | import random 6 | 7 | # 字典来自文件列表 8 | directory_dict = './dict/directory.lst' 9 | directory_common_dict = './dict/directory_common.lst' 10 | filename_dict = './dict/filename.lst' 11 | package_ext_dict = './dict/package_ext.lst' 12 | tempfile_ext_dict = './dict/tmpfile_ext.lst' 13 | 14 | # 扫描域名策略 15 | # 1 = 和域名全称相关: 包含 job.wooyun.org 16 | # 2 = 和主域名相关: 包含 wooyun.org 17 | # 3 = 和域名的名字相关: 包含 wooyun 18 | basedomain = 2 19 | 20 | # 判断文件或目录存在的状态码,多个以逗号隔开 21 | # exclude_status = [200,403] 22 | exclude_status = [200] 23 | 24 | # 预设默认扩展名 25 | custom_extion = 'php' # 自定义扩展名 26 | default_extion = sys.argv[2] if len(sys.argv) == 3 else custom_extion 27 | 28 | # 判断文件是否存在正则,如果页面存在如下定义的内容,将url从结果中剔除 29 | page_not_found_reg = r'404|[nN]ot [fF]ound|不存在|未找到|Error' 30 | 31 | # 检测返回的结果集条数限制,超过多少条判定为误报 32 | resulst_cnt_val = 30 33 | 34 | # 是否开启https服务器的证书校验 35 | allow_ssl_verify = False 36 | 37 | # 数据库文件 38 | sqlfile = ['data','install','web','user', 'members'] 39 | sqlfile_ext = ['.sql','.bak','.sql.tar.gz','.sql.zip','.sql.rar'] 40 | 41 | # 线程数 42 | threads_count = 32 43 | 44 | # ------------------------------------------------- 45 | # requests 配置项 46 | # ------------------------------------------------- 47 | 48 | # 超时时间 49 | timeout = 10 50 | 51 | # 是否允许URL重定向 52 | allow_redirects = True 53 | 54 | # 是否允许继承http Request类的Session支持,在发出的所有请求之间保持cookies。 55 | allow_http_session = True 56 | 57 | # 是否允许随机User-Agent 58 | allow_random_useragent = True 59 | 60 | # 是否允许随机X-Forwarded-For 61 | allow_random_x_forward = True 62 | 63 | # 代理配置 64 | proxies = { 65 | # "http": "http://user:pass@10.10.1.10:3128/", 66 | # "https": "http://10.10.1.10:1080", 67 | # "http": "http://127.0.0.1:8118", # TOR 洋葱路由器 68 | } 69 | 70 | # 随机HTTP头 71 | USER_AGENTS = [ 72 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 73 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 74 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 75 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 76 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 77 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 78 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 79 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 80 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 81 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 82 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 83 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 84 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 85 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 86 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 87 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 88 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 89 | ] 90 | 91 | # 随机生成User-Agent 92 | def random_useragent(condition=False): 93 | if condition: 94 | return random.choice(USER_AGENTS) 95 | else: 96 | return USER_AGENTS[0] 97 | 98 | # 随机X-Forwarded-For,动态IP 99 | def random_x_forwarded_for(condition=False): 100 | if condition: 101 | return '%d.%d.%d.%d' % (random.randint(1, 254),random.randint(1, 254),random.randint(1, 254),random.randint(1, 254)) 102 | else: 103 | return '8.8.8.8' 104 | 105 | # HTTP 头设置 106 | headers = { 107 | 'User-Agent': random_useragent(allow_random_useragent), 108 | 'X_FORWARDED_FOR': random_x_forwarded_for(allow_random_x_forward), 109 | # 'Referer' : 'http://www.google.com', 110 | # 'Cookie': 'whoami=wyscan_dirfuzz', 111 | } 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /controller.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 主控制器 3 | # email: ringzero@0x557.org 4 | # http://github.com/ring04h/weakfilescan 5 | 6 | import sys 7 | from config import * 8 | from common import * 9 | import json 10 | import urlparse 11 | from libs.GenerateDict import ProcessDic 12 | from libs.GetAllLink import GetAllLink 13 | from libs.HttpFuzzEnginer import FuzzEnginer 14 | 15 | reload(sys) 16 | sys.setdefaultencoding('utf-8') 17 | 18 | def start_wyspider(siteurl): # 启动爬虫和fuzz类 19 | # 目标赋值 20 | if "://" not in siteurl: 21 | siteurl = 'http://%s' % siteurl.rstrip('/') 22 | siteurl = siteurl.rstrip('/') 23 | basedomain = get_basedomain(siteurl) 24 | 25 | print '-' * 50 26 | print '* scan %s start' % siteurl 27 | print '-' * 50 28 | 29 | # 初始化字典 30 | fuzz_bak = ProcessDic(package_ext_dict).parser() 31 | fuzz_tmp = ProcessDic(tempfile_ext_dict).parser() 32 | 33 | bak_ext_re = '|'.join(fuzz_bak).replace('.', '\.') # 生成常见备份文件规则 34 | fuzz_filename_replace = {'%EXT%':default_extion,'%BAK_EXT%':bak_ext_re} 35 | fuzz_filename = ProcessDic(filename_dict,fuzz_filename_replace).parser() 36 | 37 | fuzz_webdirs = ProcessDic(directory_dict).parser() 38 | fuzz_webdirs_common = ProcessDic(directory_common_dict).parser() 39 | 40 | # 传递一个siteurl,返回当前网页下的三层链接资源 41 | link_datas = GetAllLink(siteurl).start() 42 | 43 | fuzzdir_request_set = {} # 目录fuzz请求集合 44 | fuzzfile_request_set = [] # 文件fuzz请求集合 45 | 46 | # 分析爬虫获取的链接,得到所有已知的WEB目录,生成目录FUZZ 47 | url_webdirs = [] 48 | for category in link_datas.keys(): 49 | if category == 'a': # 所有a=href资源 50 | for key in link_datas[category].keys(): # 这里的KEY只是域名对象,可以跳过处理 51 | for second_key in link_datas[category][key].keys(): # 处理第二层的KEY 52 | url_webdirs.extend(get_segments(second_key)) 53 | urlgenerator_obj = UrlGenerator(second_key, fuzz_bak, fuzz_tmp, extion=default_extion) 54 | fuzzfile_request_set.extend(urlgenerator_obj.generator()) 55 | for url_values in link_datas[category][key][second_key]: # 处理第二层KEY下的所有(三层链接) 56 | url_webdirs.extend(get_segments(url_values)) 57 | urlgenerator_obj = UrlGenerator(url_values, fuzz_bak, fuzz_tmp, extion=default_extion) 58 | fuzzfile_request_set.extend(urlgenerator_obj.generator()) 59 | else: # 所有静态资源(img,link,script) 60 | pass # 暂不处理静态资源 61 | # for key in link_datas[category].keys(): # 这里的KEY只是域名对象,可以跳过处理 62 | # for url_values in link_datas[category][key]: 63 | # url_webdirs.extend(get_segments(url_values)) 64 | 65 | url_webdirs = list(set(url_webdirs)) 66 | possibility_urls = {siteurl:[]} # fuzz目录列表 67 | possibility_files = {siteurl:[]} # fuzz 文件列表 68 | 69 | for webdir in url_webdirs: # 生成存在的服务器列表 70 | if basedomain in webdir: 71 | httpurl = urlparse.urlparse(webdir).scheme+'://'+urlparse.urlparse(webdir).netloc+'/' 72 | if not possibility_urls.has_key(httpurl): 73 | possibility_urls[httpurl] = [] 74 | possibility_urls[httpurl].append(webdir.rstrip('/')+'/') 75 | 76 | possibility_info = {} # 服务端容错处理机制信息 77 | for httpurl in possibility_urls.keys(): # 清空无法做出正常判断的服务器 78 | if not checksite_isalive(httpurl): # 纯http请求,返回资源为None,代表出错 79 | del possibility_urls[httpurl] 80 | else: 81 | possibility = checksite_possibility(httpurl) 82 | if not possibility['considered']: # 服务端配置了容错处理,fuzz规则无法判断 83 | del possibility_urls[httpurl] 84 | else: 85 | possibility_info[httpurl] = possibility 86 | 87 | if checksite_isalive(siteurl): # 根服务器是存活的 88 | siteurl_possibility = checksite_possibility(siteurl) 89 | if siteurl_possibility['considered']: # 服务端配置了容错处理,fuzz规则无法判断 90 | # 根目录 fuzz 对象列表生成 91 | possibility_info[siteurl] = siteurl_possibility 92 | for root_fuzz_dir in fuzz_webdirs: 93 | url = siteurl.rstrip('/')+root_fuzz_dir.rstrip('/')+'/' 94 | if not fuzzdir_request_set.has_key(siteurl): 95 | fuzzdir_request_set[siteurl] = [] 96 | fuzzdir_request_set[siteurl].append(url) 97 | rootdir = siteurl.rstrip('/') # 压入网站根目录 98 | fuzzdir_request_set[siteurl].append(rootdir) # 压入根目录其因变量文件 99 | urlgenerator_obj = UrlGenerator(rootdir, fuzz_bak, fuzz_tmp, extion=default_extion) 100 | possibility_files[siteurl].extend(urlgenerator_obj.generator()) 101 | 102 | for http_siteurl in fuzzdir_request_set.keys(): 103 | if not possibility_urls.has_key(http_siteurl): possibility_urls[http_siteurl] = [] 104 | # 生成向HttpFuzzEnginer传递的目录URL列表 105 | request_webdirs = list(set(fuzzdir_request_set[http_siteurl])) 106 | refer_to_val = possibility_info[http_siteurl]['refer_to_val'] 107 | httpfuzz_result = FuzzEnginer(request_webdirs, refer_to_val=refer_to_val).start() 108 | for status_code in httpfuzz_result: # 分析多线程fuzz的结果 109 | for url in httpfuzz_result[status_code].keys(): 110 | possibility_urls[http_siteurl].append(url.rstrip('/')+'/') 111 | possibility_urls[http_siteurl] = list(set(possibility_urls[http_siteurl])) 112 | 113 | existing_files = {} # 存在的文件列表 114 | for httpsite in possibility_urls.keys(): # 处理文件字典,将文件与目录拼接 115 | if not possibility_files.has_key(httpsite): 116 | possibility_files[httpsite] = [] 117 | for http_dirurl in possibility_urls[httpsite]: 118 | for fuzz_request_file in fuzz_filename: 119 | fuzz_request_path = http_dirurl.rstrip('/')+'/'+fuzz_request_file 120 | possibility_files[httpsite].append(fuzz_request_path) 121 | 122 | # ---------------------------------------------------- 123 | # 将其因变量文件列表中的内容进行分类 124 | # ---------------------------------------------------- 125 | for fuzzfile in fuzzfile_request_set: 126 | if basedomain in fuzzfile: 127 | httpurl = urlparse.urlparse(fuzzfile).scheme+'://'+urlparse.urlparse(fuzzfile).netloc+'/' 128 | if not possibility_files.has_key(httpurl): 129 | possibility_files[httpurl] = [] 130 | possibility_files[httpurl].append(fuzzfile) 131 | 132 | for http_fileurl in possibility_files.keys(): # 清空无法做出正常判断的服务器 133 | if not checksite_isalive(http_fileurl): # 纯http请求,返回资源为None,代表出错 134 | del possibility_files[http_fileurl] 135 | else: 136 | possibility = checksite_possibility(http_fileurl) 137 | if not possibility['considered']: # 服务端配置了容错处理,fuzz规则无法判断 138 | del possibility_files[http_fileurl] 139 | else: 140 | possibility_info[http_fileurl] = possibility 141 | 142 | for http_fileurl in possibility_files.keys(): 143 | request_files = list(set(possibility_files[http_fileurl])) 144 | refer_to_val = possibility_info[http_fileurl]['refer_to_val'] 145 | httpfuzz_result = FuzzEnginer(request_files, refer_to_val=refer_to_val).start() 146 | for status_code in httpfuzz_result: # 分析多线程fuzz的结果 147 | for fileurl in httpfuzz_result[status_code].keys(): 148 | if not existing_files.has_key(http_fileurl): 149 | existing_files[http_fileurl] = {} 150 | first_segment = get_first_segment(fileurl) # 获取文件的1级目录名称,并为结果分类 151 | if not existing_files[http_fileurl].has_key(first_segment): 152 | existing_files[http_fileurl][first_segment] = [] 153 | existing_files[http_fileurl][first_segment].append(fileurl) 154 | 155 | print '-' * 50 156 | print '* scan complete...' 157 | print '-' * 50 158 | 159 | # 误报结果统计清洗 160 | for httpsite in existing_files.keys(): 161 | for first_segment in existing_files[httpsite].keys(): 162 | if len(existing_files[httpsite][first_segment]) > resulst_cnt_val: 163 | existing_files[httpsite][first_segment] = ['misdescription cleaned'] 164 | 165 | for httpsite in possibility_urls.keys(): 166 | if len(possibility_urls[httpsite]) > resulst_cnt_val: 167 | possibility_urls[httpsite] = ['misdescription cleaned'] 168 | 169 | return {'dirs':possibility_urls,'files':existing_files} 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /dict/configfile.lst: -------------------------------------------------------------------------------- 1 | /wp-config.php 2 | /configuration.php 3 | /sites/default/settings.php 4 | /config.php 5 | /config.inc.php 6 | /conf/_basic_config.php 7 | /config/site.php 8 | /system/config/default.php 9 | /framework/conf/config.php 10 | /mysite/_config.php 11 | /typo3conf/localconf.php 12 | /config/config_global.php 13 | /config/config_ucenter.php 14 | /textpattern/config.php 15 | /lib/config.functions.php 16 | /data/config.php 17 | /data/config.inc.php 18 | /includes/config.php 19 | /data/common.inc.php 20 | /caches/configs/database.php 21 | /caches/configs/system.php 22 | /include/config.inc.php 23 | /phpsso_server/caches/configs/database.php 24 | /phpsso_server/caches/configs/system.php -------------------------------------------------------------------------------- /dict/dependents.lst: -------------------------------------------------------------------------------- 1 | %DEPEN_NAME%{re=exrex:[0-9]}$ 2 | %DEPEN_NAME%{re=exrex:(200[0-9])|(201[0-5])}$ 3 | %DEPEN_NAME%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$ 4 | %EXT%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$ 5 | %DOMAIN%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$ 6 | %DOMAIN%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$ 7 | %DOMAIN%{re=exrex:(!?!|!!!)|(@?@|@@@)|123}$ 8 | admin{re=exrex:[0-9]}$ -------------------------------------------------------------------------------- /dict/directory.lst: -------------------------------------------------------------------------------- 1 | /{date=year:2010-2015}$ 2 | /{date=year_mon:201001-201512}$ 3 | /a3 4 | /abstract 5 | /account 6 | /act 7 | /action 8 | /activity 9 | /ad 10 | /address 11 | /ajax 12 | /alarm 13 | /api 14 | /app 15 | /ar 16 | /attachment 17 | /auth 18 | /authority 19 | /autounittestcode 20 | /award 21 | /back 22 | /backup 23 | /bak 24 | /base 25 | /bd 26 | /bg 27 | /bin 28 | /blacklist 29 | /blog 30 | /bootstrap 31 | /brand 32 | /build 33 | /cache 34 | /caching 35 | /cacti 36 | /zabbix 37 | /cake 38 | /captcha 39 | /category 40 | /cdn 41 | /ch 42 | /check 43 | /city 44 | /class 45 | /classes 46 | /classic 47 | /client 48 | /cluster 49 | /collection 50 | /gouwu 51 | /group 52 | /gss -------------------------------------------------------------------------------- /dict/directory_common.lst: -------------------------------------------------------------------------------- 1 | /{re=exrex:[0-9]|[a-z]}$ 2 | /{re=exrex:(test|admin|manager|manage)[0-9]?}$ 3 | /comment 4 | /commit 5 | /common 6 | /commons 7 | /components 8 | /conf 9 | /config 10 | /confs 11 | /console 12 | /consumer 13 | /content 14 | /control 15 | /controllers 16 | /core 17 | /crontab 18 | /crud 19 | /css 20 | /daily 21 | /dashboard 22 | /data 23 | /database 24 | /db 25 | /default 26 | /demo 27 | /dev 28 | /doc 29 | /download 30 | /duty 31 | /es 32 | /eva 33 | /examples 34 | /excel 35 | /export 36 | /ext 37 | /fckeditor 38 | /FCKeditor 39 | /fe 40 | /feature 41 | /file 42 | /files 43 | /finance 44 | /flashchart 45 | /follow 46 | /frame 47 | /framework 48 | /ft 49 | /gallery 50 | /game 51 | /gift 52 | /gold 53 | /hello 54 | /helper 55 | /helpers 56 | /history 57 | /home 58 | /hr 59 | /htdocs 60 | /html 61 | /hunter 62 | /image 63 | /img11 64 | /import 65 | /improve 66 | /inc 67 | /include 68 | /includes 69 | /index 70 | /info 71 | /install 72 | /interface 73 | /item 74 | /jobconsume 75 | /jobs 76 | /jpgraph 77 | /json 78 | /kindeditor 79 | /l 80 | /languages 81 | /lib 82 | /libraries 83 | /libs 84 | /link 85 | /lite 86 | /local 87 | /log 88 | /login 89 | /logs 90 | /mail 91 | /main 92 | /maintenance 93 | /manage 94 | /manager 95 | /manufacturer 96 | /menus 97 | /message 98 | /mis 99 | /model 100 | /models 101 | /module 102 | /modules 103 | /monitor 104 | /movie 105 | /mysql 106 | /n 107 | /nav 108 | /network 109 | /news 110 | /notice 111 | /nw 112 | /oauth 113 | /other 114 | /page 115 | /pages 116 | /passport 117 | /pay 118 | /pcheck 119 | /people 120 | /person 121 | /php 122 | /phpadmin 123 | /phprpc 124 | /phptest 125 | /picture 126 | /pl 127 | /platform 128 | /pm 129 | /portal 130 | /post 131 | /product 132 | /project 133 | /protected 134 | /proxy 135 | /ps 136 | /public 137 | /qq 138 | /question 139 | /quote 140 | /redirect 141 | /redisclient 142 | /report 143 | /resource 144 | /resources 145 | /s 146 | /save 147 | /schedule 148 | /schema 149 | /script 150 | /scripts 151 | /search 152 | /security 153 | /server 154 | /service 155 | /shell 156 | /show 157 | /simple 158 | /site 159 | /skin 160 | /sms 161 | /soap 162 | /sola 163 | /sort 164 | /spider 165 | /sql 166 | /stat 167 | /static 168 | /statistics 169 | /stats 170 | /submit 171 | /subways 172 | /survey 173 | /sv 174 | /syslog 175 | /system 176 | /tag 177 | /task 178 | /tasks 179 | /tcpdf 180 | /template 181 | /templates 182 | /test 183 | /tests 184 | /ticket 185 | /tieba 186 | /tmp 187 | /token 188 | /tool 189 | /tools 190 | /top 191 | /tpl 192 | /txt 193 | /upload 194 | /uploadify 195 | /uploads 196 | /url 197 | /user 198 | /util 199 | /v1 200 | /v2 201 | /phpmyadmin 202 | /vendor 203 | /view 204 | /views 205 | /web 206 | /weixin 207 | /widgets 208 | /wm 209 | /zabiix 210 | /nagios 211 | /wordpress 212 | /workspace 213 | /ws 214 | /www 215 | /www2 216 | /bbs 217 | /bbs2 218 | /forum 219 | /wwwroot 220 | /zone 221 | /admin 222 | /admin_bak -------------------------------------------------------------------------------- /dict/filename.lst: -------------------------------------------------------------------------------- 1 | {re=exrex:[0-9]|[a-z]}$.%EXT% 2 | {re=exrex:www|www[0-9]|wwwroot|webroot|html|htdocs|var|nginx|apache|tomcat|php|jsp|asp}$.%EXT% 3 | {date=year:2010-2015}$.%EXT% 4 | {int=digits#3:0-9}$.%EXT% 5 | {re=exrex:(wwwroot|www|htdocs|web|ww|w|website|backup|back|site|http|admin|default|webroot|index|main|global|include|includes|common)(%BAK_EXT%)}$ 6 | .svn/entries 7 | .git/config 8 | .DS_Store 9 | activity.%EXT% 10 | add.%EXT% 11 | admin.html 12 | admin.%EXT% 13 | api.%EXT% 14 | auto.%EXT% 15 | base.%EXT% 16 | cache.%EXT% 17 | call.%EXT% 18 | callback.%EXT% 19 | check.%EXT% 20 | combine.%EXT% 21 | common.%EXT% 22 | communicate.%EXT% 23 | compare.%EXT% 24 | conf.%EXT% 25 | config.%EXT% 26 | connect.%EXT% 27 | debug.%EXT% 28 | core.%EXT% 29 | cp.%EXT% 30 | crossdomain.%EXT% 31 | crossdomain2.%EXT% 32 | curl.%EXT% 33 | data.%EXT% 34 | db.%EXT% 35 | define.%EXT% 36 | demo.%EXT% 37 | demo1.%EXT% 38 | demo2.%EXT% 39 | demo3.%EXT% 40 | devid.%EXT% 41 | dispatch.%EXT% 42 | dump.%EXT% 43 | error.%EXT% 44 | export.%EXT% 45 | extra.%EXT% 46 | faq.html 47 | file.%EXT% 48 | forum.%EXT% 49 | hello.%EXT% 50 | help.%EXT% 51 | ile.%EXT% 52 | index.%EXT% 53 | index.%EXT%.rar 54 | index.%EXT%~ 55 | index1.%EXT% 56 | info.%EXT% 57 | interface.%EXT% 58 | json.%EXT% 59 | jsonp.%EXT% 60 | key.%EXT% 61 | left.%EXT% 62 | log.%EXT% 63 | logger.%EXT% 64 | login.html 65 | login.%EXT% 66 | login2.%EXT% 67 | logout.%EXT% 68 | memcache.%EXT% 69 | msg.%EXT% 70 | notify.%EXT% 71 | nspj.%EXT% 72 | oauth2.%EXT% 73 | ogin.%EXT% 74 | onfig.%EXT% 75 | page.%EXT% 76 | pay.%EXT% 77 | %EXT%.%EXT% 78 | %EXT%info.%EXT% 79 | pic.%EXT% 80 | point.%EXT% 81 | popen.%EXT% 82 | portalindex.%EXT% 83 | protect.%EXT% 84 | pv.%EXT% 85 | qq.%EXT% 86 | query.%EXT% 87 | rank.%EXT% 88 | read.%EXT% 89 | reader.%EXT% 90 | readfile.%EXT% 91 | register.%EXT% 92 | search.%EXT% 93 | serverinfo.%EXT% 94 | setting.%EXT% 95 | single.%EXT% 96 | stat.%EXT% 97 | status.%EXT% 98 | style.%EXT% 99 | survey.%EXT% 100 | test.html 101 | test.%EXT% 102 | time.%EXT% 103 | tips.%EXT% 104 | tool.%EXT% 105 | tools.%EXT% 106 | top.%EXT% 107 | txsphzh.%EXT% 108 | tz.%EXT% 109 | up.%EXT% 110 | update.%EXT% 111 | upload.html 112 | upload.%EXT% 113 | uploadfile.%EXT% 114 | userinfo.%EXT% 115 | version.jsp 116 | webadmin.%EXT% 117 | weixin.%EXT% 118 | wiki.%EXT% 119 | ws.%EXT% 120 | wx.%EXT% 121 | xmlrpc.%EXT% 122 | database.inc 123 | common.inc 124 | db.inc 125 | connect.inc 126 | conn.inc 127 | sql.inc 128 | debug.inc -------------------------------------------------------------------------------- /dict/package_ext.lst: -------------------------------------------------------------------------------- 1 | .rar 2 | .zip 3 | .gz 4 | .tar 5 | .tgz 6 | .tar.gz 7 | .7z 8 | .z 9 | .bz2 10 | .tar.bz2 11 | .iso 12 | .cab -------------------------------------------------------------------------------- /dict/tmpfile_ext.lst: -------------------------------------------------------------------------------- 1 | ~ 2 | .~ 3 | .bak 4 | .BAK 5 | .{re=exrex:[0-3]}$ 6 | {re=exrex:[0-3]}$ 7 | .swp 8 | .tmp 9 | ! 10 | .{re=exrex:201[0-5]}$ -------------------------------------------------------------------------------- /getlinks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # email: ringzero@0x557.org 4 | # http://github.com/ring04h/weakfilescan 5 | # 通过一个网页获取三层目录下的所有链接资源 6 | 7 | import sys 8 | from config import * 9 | from common import * 10 | import json 11 | import urlparse 12 | from libs.GetAllLink import GetAllLink 13 | 14 | def start_getlinks(siteurl): 15 | if not siteurl.startswith('http://'): 16 | siteurl = 'http://%s' % siteurl 17 | siteurl = siteurl.rstrip('/') 18 | link_datas = GetAllLink(siteurl).start() 19 | return json.dumps(link_datas, indent=2) 20 | 21 | if __name__ == "__main__": 22 | if len(sys.argv) == 2: 23 | print start_getlinks(sys.argv[1]) 24 | sys.exit(0) 25 | else: 26 | print ("usage: %s http://www.wooyun.org" % sys.argv[0]) 27 | sys.exit(-1) 28 | -------------------------------------------------------------------------------- /libs/.tld_set: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ring04h/weakfilescan/b1a3066e3fdcd60b8ecf635526f49cb5ad603064/libs/.tld_set -------------------------------------------------------------------------------- /libs/FuzzUrlGenerator.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # Fuzz URL列表 生成器 3 | # email: ringzero@0x557.org 4 | 5 | import sys 6 | sys.path.append("../") 7 | from config import * 8 | import urlparse 9 | from UrlSplitParser import UrlSplitParser 10 | 11 | class UrlGenerator(object): 12 | """docstring for UrlGenerator""" 13 | def __init__(self, url, fuzz_bak, fuzz_tmp, extion=default_extion): 14 | super(UrlGenerator, self).__init__() 15 | self.url = url 16 | self.fuzz_bak = fuzz_bak 17 | self.fuzz_tmp = fuzz_tmp 18 | self.extion = extion 19 | 20 | def generator(self): 21 | # 整合其因变量(目录列表、文件名、域名、子域名),拼接备份文件、临时文件 22 | parser_obj = UrlSplitParser(urlparse.urlparse(self.url),self.extion) 23 | url_parser = parser_obj.get_paths() 24 | urls_result = [] 25 | 26 | # 处理其因变量备份文件扩展 27 | depend_files = [] 28 | for bak_line in self.fuzz_bak: 29 | for depend in parser_obj.dependent: 30 | depend_files.append(depend + bak_line) 31 | 32 | # 处理临时文件扩展 33 | script_files = [] 34 | for tmp_line in self.fuzz_tmp: 35 | if url_parser['path']: 36 | for path_name in url_parser['path']: 37 | script_files.append(path_name + '.' + parser_obj.file_ext + tmp_line) 38 | else: 39 | script_files.append('index.' + parser_obj.file_ext + tmp_line) 40 | 41 | # 需要检测的目录 42 | for webdir in url_parser['segment']: 43 | # 拼接备份文件扫描完整URL 44 | for depend in depend_files: 45 | if webdir == '/': 46 | urls_result.append(parser_obj.baseurl + webdir + depend) 47 | else: 48 | urls_result.append(parser_obj.baseurl + webdir + '/' + depend) 49 | # 拼接临时文件扫描完整URL 50 | for script in script_files: 51 | if webdir == '/': 52 | urls_result.append(parser_obj.baseurl + webdir + script) 53 | else: 54 | urls_result.append(parser_obj.baseurl + webdir + '/' + script) 55 | 56 | return urls_result 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /libs/GenerateDict.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 全局函数文件 3 | # email: ringzero@0x557.org 4 | 5 | """ 6 | 字典文件动态解析器 7 | # 使用说明 8 | fuzz_bak = ProcessDic('./dict/package_ext.lst').parser() 9 | fuzz_tmp = ProcessDic('./dict/tmpfile_ext.lst').parser() 10 | fuzz_filename_replace = {'%EXT%':'jsp'} 11 | fuzz_filename = ProcessDic('./dict/filename.lst',fuzz_filename_replace).parser() 12 | """ 13 | from wyparser import DictParser 14 | from utils.FileUtils import FileUtils 15 | 16 | class ProcessDic(object): 17 | """docstring for ProcessDic""" 18 | def __init__(self, dicfile, replace_dict={}): 19 | super(ProcessDic, self).__init__() 20 | self.dicfile = dicfile 21 | self.replace_dict = replace_dict 22 | 23 | def parser(self): 24 | # 读取字典文件存入变量 25 | tmp_dict = [] 26 | for line in FileUtils.getLines(self.dicfile): 27 | tmp_dict.append(line) 28 | # 检查是否拥有需要替换的固定组合 29 | if len(self.replace_dict) >= 1: 30 | for key in self.replace_dict.keys(): 31 | new_dict = [] 32 | replace_key = key 33 | replace_value = self.replace_dict[key] 34 | for tmp_line in tmp_dict: 35 | if replace_key in tmp_line: 36 | new_dict.append(tmp_line.replace(replace_key, replace_value)) 37 | else: 38 | new_dict.append(tmp_line) 39 | tmp_dict = new_dict 40 | fuzz_lst = [] 41 | for line_ in new_dict: 42 | # 利用正则引擎遍历一次字典 43 | parser = DictParser(line_) 44 | wyparser_result = parser.parse() 45 | if wyparser_result: 46 | for parser_line in wyparser_result: 47 | fuzz_lst.append(parser_line) 48 | else: 49 | fuzz_lst.append(line_) 50 | return fuzz_lst 51 | else: 52 | fuzz_lst = [] 53 | for line_ in tmp_dict: 54 | # 利用正则引擎遍历一次字典 55 | parser = DictParser(line_) 56 | wyparser_result = parser.parse() 57 | if wyparser_result: 58 | for parser_line in wyparser_result: 59 | fuzz_lst.append(parser_line) 60 | else: 61 | fuzz_lst.append(line_) 62 | return fuzz_lst -------------------------------------------------------------------------------- /libs/GetAllLink.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 传递一个siteurl,返回当前网页下的三层链接资源 3 | # email: ringzero@0x557.org 4 | 5 | import sys 6 | sys.path.append("../") 7 | from config import * 8 | from common import * 9 | import threading 10 | import Queue 11 | import urlparse 12 | 13 | class GetAllLink(object): 14 | """docstring for GetAllLink""" 15 | def __init__(self, siteurl): 16 | super(GetAllLink, self).__init__() 17 | self.siteurl = siteurl 18 | self.basedomain = get_basedomain(siteurl) 19 | 20 | class WyWorker(threading.Thread): 21 | def __init__(self,queue): 22 | threading.Thread.__init__(self) 23 | self.queue = queue 24 | def run(self): 25 | while True: 26 | if self.queue.empty(): 27 | break 28 | # 用hack方法,no_timeout读取Queue队列,直接异常退出线程避免阻塞 29 | try: 30 | url = self.queue.get_nowait() 31 | response_obj = LinksParser(http_request_get(url)) 32 | resources[url] = response_obj.getall() 33 | except Exception, e: 34 | # print e # 队列阻塞 35 | break 36 | 37 | def start(self): 38 | # 爬取三层结构基本上就能够覆盖90%链接了,动态结构层将来改进 39 | link_datas = { 40 | 'a' : {}, 41 | 'img' : {}, 42 | 'link' : {}, 43 | 'script' : {} 44 | } 45 | 46 | # 获取根网页的所有链接内容 47 | response_obj = LinksParser(http_request_get(self.siteurl)) 48 | links_res = response_obj.getall() 49 | 50 | for key in links_res.keys(): # 只有A链接的资源需要进一步处理,img, link, script,硬编码实现 51 | link_arr = {} 52 | for link in links_res[key]['a']: 53 | link_arr[link] = [] 54 | link_datas['a'][key] = link_arr 55 | 56 | for proc_key in ['img', 'link', 'script']: # 硬编码逐个处理静态资源 57 | netloc = urlparse.urlparse(key).netloc 58 | link_datas[proc_key][netloc] = [] 59 | for link in links_res[key][proc_key]: 60 | link_datas[proc_key][netloc].append(link) 61 | 62 | # 生成任务队列 63 | queue = Queue.Queue() 64 | 65 | second_links = link_arr.keys() # 开始处理第二层链接,不用递归方法 66 | for url in second_links: 67 | netloc = urlparse.urlparse(url).netloc 68 | if self.basedomain in netloc: # 检查链接的hostname部分是否与主域名相同 69 | queue.put(url) 70 | 71 | global resources 72 | resources = {} 73 | threads = [] # 初始化线程组 74 | for i in xrange(threads_count): 75 | threads.append(self.WyWorker(queue)) 76 | for t in threads: # 启动线程 77 | t.start() 78 | for t in threads: # 等待线程执行结束后,回到主线程中 79 | t.join() 80 | 81 | for rkey in resources.keys(): # 多线程任务结束,遍历结果数组 82 | if link_datas['a'][key].has_key(rkey): 83 | link_datas['a'][key][rkey] = resources[rkey].values()[0]['a'] 84 | 85 | for proc_key in ['img', 'link', 'script']: # 硬编码逐个处理静态资源 86 | for rkey in resources.keys(): 87 | for link in resources[rkey].keys(): # 初始化创建对应的[netloc]数组 88 | netloc = urlparse.urlparse(link).netloc 89 | if not link_datas[proc_key].has_key(netloc): 90 | link_datas[proc_key][netloc] = [] 91 | for res_link in resources[rkey][link][proc_key]: # 处理所有netloc对应的静态资源压入数组 92 | link_datas[proc_key][netloc].append(res_link) 93 | 94 | return link_datas 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /libs/HttpFuzzEnginer.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 传递一个queue队列,判断是否该队列内URL的HTTP请求状态,是否符合config内定义的exclude_status状态码 3 | # email: ringzero@0x557.org 4 | 5 | import sys 6 | sys.path.append("../") 7 | from config import * 8 | from common import * 9 | import threading 10 | import Queue 11 | import re 12 | 13 | class FuzzEnginer(object): 14 | """docstring for FuzzEnginer""" 15 | def __init__(self, urls, refer_to_val=0): 16 | super(FuzzEnginer, self).__init__() 17 | self.urls = urls 18 | self.refer_to_val = refer_to_val 19 | 20 | class FuzzWorker(threading.Thread): 21 | def __init__(self, queue): 22 | threading.Thread.__init__(self) 23 | self.queue = queue 24 | 25 | def run(self): 26 | while True: 27 | if self.queue.empty(): 28 | break 29 | try: # 用hack方法,no_timeout读取Queue队列,直接异常退出线程避免阻塞 30 | url = self.queue.get_nowait() 31 | results = http_request_get(url) 32 | # print "[%s] %s => %s" % (results.status_code, url, results.url) # 客户端调试信息 33 | if results.status_code in exclude_status: 34 | print "[%s] %s => %s" % (results.status_code, url, results.url) # 客户端调试信息 35 | # 加入是否为备份文件结尾判断 36 | if results.headers.get('content-length'): # 存在content-length属性 37 | is_redirect = True if len(results.history) > 0 else False 38 | if not is_redirect: # 未发生url跳转 39 | # 如果返回了content-length属性,同时大小<100KB,加入404错误定义检测 1000 = 1k, 100kb = 100000 40 | if int(results.headers.get('content-length')) < 20000: 41 | regex = re.compile(page_not_found_reg) 42 | if not regex.findall(results.text): # print '找到错误定义,成功返回404信息' 43 | resources[results.status_code][url] = {'is_redirect':is_redirect,'history':results.history,'request':results.url} 44 | elif refer_to_val >= 50: # print '404错误回显可工作' 45 | regex = re.compile(page_not_found_reg) 46 | if not regex.findall(results.text): # print '找到错误定义,成功返回404信息' 47 | is_redirect = True if len(results.history) > 0 else False 48 | resources[results.status_code][url] = {'is_redirect':is_redirect,'history':results.history,'request':results.url} 49 | else: 50 | is_redirect = True if len(results.history) > 0 else False 51 | resources[results.status_code][url] = {'is_redirect':is_redirect,'history':results.history,'request':results.url} 52 | except Exception, e: # 队列阻塞 53 | # print e 54 | break 55 | 56 | def start(self): 57 | 58 | global resources 59 | global refer_to_val 60 | resources = {} 61 | refer_to_val = self.refer_to_val 62 | 63 | queue = Queue.Queue() 64 | for url in self.urls: # 生成任务队列 65 | queue.put(url) 66 | for status_code in exclude_status: # 初始化全局状态码数据 67 | resources[status_code] = {} 68 | threads = [] # 初始化线程组 69 | for i in xrange(threads_count): 70 | threads.append(self.FuzzWorker(queue)) 71 | for t in threads: # 启动线程 72 | t.start() 73 | for t in threads: # 等待线程执行结束后,回到主线程中 74 | t.join() 75 | return resources 76 | 77 | -------------------------------------------------------------------------------- /libs/UrlSplitParser.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # URL处理对象 3 | # email: ringzero@0x557.org 4 | 5 | import sys 6 | sys.path.append("../") 7 | from config import * 8 | import urlparse 9 | from tldextract import extract, TLDExtract 10 | 11 | class UrlSplitParser(object): 12 | """docstring for UrlSplitParser 13 | 碎片化信息处理并集,生成其因变量组 [dependents] 14 | """ 15 | def __init__(self, urlobj, extion=default_extion): 16 | super(UrlSplitParser, self).__init__() 17 | self.url = urlobj.geturl() 18 | self.scheme = urlobj.scheme 19 | self.netloc = urlobj.netloc 20 | self.path = urlobj.path 21 | self.paths = self.split_path() 22 | self.query = urlobj.query 23 | self.fragment = urlobj.fragment 24 | self.domain = extract(urlobj.netloc).domain 25 | self.rootdomain = extract(urlobj.netloc).registered_domain 26 | self.subdomain = extract(urlobj.netloc).subdomain.split('.') 27 | self.domain_info = self.get_domain_info() 28 | self.extion = extion 29 | self.file_ext = self.get_extion() 30 | self.urlfile = self.get_urlfile() 31 | self.baseurl = self.scheme+'://'+self.netloc 32 | self.dependent = self.get_dependent() 33 | 34 | def parse(self): 35 | urlsplit = {} 36 | urlsplit['url'] = self.url 37 | urlsplit['scheme'] = self.scheme 38 | urlsplit['netloc'] = self.netloc 39 | urlsplit['query'] = self.split_query() 40 | urlsplit['path'] = self.split_path() 41 | urlsplit['extion'] = self.get_extion() 42 | urlsplit['fragment'] = self.fragment 43 | return urlsplit 44 | 45 | def split_query(self): 46 | query = {} 47 | condition = self.query.split('&') 48 | if len(condition) >= 1: 49 | for line in condition: 50 | line_split = line.split('=') 51 | if len(line_split) > 1: 52 | query[line_split[0]] = line_split[1] 53 | else: 54 | query[line_split[0]] = '' 55 | return query 56 | else: return '' 57 | 58 | def split_path(self): 59 | path = [] 60 | for dirs in self.path.split('/'): 61 | if dirs != '': path.append(dirs) 62 | return path 63 | 64 | def split_fragment(self): 65 | fragment = [] 66 | for frags in self.fragment.split('='): 67 | if frags != '': 68 | fragment.append(frags) 69 | return fragment 70 | 71 | def get_domain_info(self): 72 | # 扩充域名信息节点 73 | domain = self.domain 74 | subdomain = self.subdomain 75 | subdomain.append(domain) 76 | if '' in subdomain: 77 | subdomain.remove('') 78 | return subdomain 79 | 80 | def get_dependent(self): 81 | # 生成其因变量组 82 | dependent = [] 83 | dependent.extend(self.split_query().keys()) 84 | dependent.extend(self.split_query().values()) 85 | dependent.extend(self.split_fragment()) 86 | dependent.extend(self.get_paths()['path']) 87 | dependent.extend(self.domain_info) 88 | dependent.append(self.file_ext) 89 | dependent = list(set(dependent)) 90 | if '' in dependent: dependent.remove('') 91 | return dependent 92 | 93 | def get_extion(self): 94 | path = self.split_path() 95 | if len(path) >= 1: 96 | filename = path[-1].split('.') 97 | if len(filename) > 1: 98 | return filename[-1] 99 | else: return self.extion 100 | else: return self.extion 101 | 102 | def get_urlfile(self): 103 | # 初始化脚本文件 104 | urlfile = self.path 105 | if self.get_extion(): 106 | file_ext = self.get_extion() 107 | if urlfile == '/': 108 | urlfile = urlfile+'index.'+file_ext 109 | elif urlfile == '': 110 | urlfile = urlfile+'/index.'+file_ext 111 | elif not urlfile.endswith(file_ext): 112 | urlfile = urlfile+'.'+file_ext 113 | return urlfile 114 | 115 | def get_paths(self): 116 | paths = [] 117 | segments = ['/'] 118 | fullpath = '' 119 | if self.path.endswith('/'): 120 | for pathline in self.paths: 121 | paths.append(pathline) 122 | fullpath += '/' + pathline 123 | segments.append(fullpath) 124 | else: 125 | for pathline in self.paths: 126 | if pathline == self.paths[-1]: 127 | if '.' in pathline: # 最后一个是文件,判断是否存在扩展名 128 | rstrip_path = pathline.replace(('.' + self.file_ext), '') 129 | paths.append(rstrip_path) 130 | else: 131 | paths.append(pathline) 132 | else: 133 | paths.append(pathline) 134 | fullpath += '/' + pathline 135 | segments.append(fullpath) 136 | 137 | return {'segment':segments,'path': paths} 138 | 139 | 140 | 141 | 142 | 143 | 144 | -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ring04h/weakfilescan/b1a3066e3fdcd60b8ecf635526f49cb5ad603064/libs/__init__.py -------------------------------------------------------------------------------- /libs/requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('http://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2014 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | 42 | """ 43 | 44 | __title__ = 'requests' 45 | __version__ = '2.5.1' 46 | __build__ = 0x020501 47 | __author__ = 'Kenneth Reitz' 48 | __license__ = 'Apache 2.0' 49 | __copyright__ = 'Copyright 2014 Kenneth Reitz' 50 | 51 | # Attempt to enable urllib3's SNI support, if possible 52 | try: 53 | from .packages.urllib3.contrib import pyopenssl 54 | pyopenssl.inject_into_urllib3() 55 | except ImportError: 56 | pass 57 | 58 | from . import utils 59 | from .models import Request, Response, PreparedRequest 60 | from .api import request, get, head, post, patch, put, delete, options 61 | from .sessions import session, Session 62 | from .status_codes import codes 63 | from .exceptions import ( 64 | RequestException, Timeout, URLRequired, 65 | TooManyRedirects, HTTPError, ConnectionError 66 | ) 67 | 68 | # Set default logging handler to avoid "No handler found" warnings. 69 | import logging 70 | try: # Python 2.7+ 71 | from logging import NullHandler 72 | except ImportError: 73 | class NullHandler(logging.Handler): 74 | def emit(self, record): 75 | pass 76 | 77 | logging.getLogger(__name__).addHandler(NullHandler()) 78 | -------------------------------------------------------------------------------- /libs/requests/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.api 5 | ~~~~~~~~~~~~ 6 | 7 | This module implements the Requests API. 8 | 9 | :copyright: (c) 2012 by Kenneth Reitz. 10 | :license: Apache2, see LICENSE for more details. 11 | 12 | """ 13 | 14 | from . import sessions 15 | 16 | 17 | def request(method, url, **kwargs): 18 | """Constructs and sends a :class:`Request `. 19 | Returns :class:`Response ` object. 20 | 21 | :param method: method for the new :class:`Request` object. 22 | :param url: URL for the new :class:`Request` object. 23 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 24 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 25 | :param json: (optional) json data to send in the body of the :class:`Request`. 26 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. 27 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. 28 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. 29 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. 30 | :param timeout: (optional) How long to wait for the server to send data 31 | before giving up, as a float, or a (`connect timeout, read timeout 32 | `_) tuple. 33 | :type timeout: float or tuple 34 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. 35 | :type allow_redirects: bool 36 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. 37 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. 38 | :param stream: (optional) if ``False``, the response content will be immediately downloaded. 39 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. 40 | 41 | Usage:: 42 | 43 | >>> import requests 44 | >>> req = requests.request('GET', 'http://httpbin.org/get') 45 | 46 | """ 47 | 48 | session = sessions.Session() 49 | response = session.request(method=method, url=url, **kwargs) 50 | # By explicitly closing the session, we avoid leaving sockets open which 51 | # can trigger a ResourceWarning in some cases, and look like a memory leak 52 | # in others. 53 | session.close() 54 | return response 55 | 56 | 57 | def get(url, **kwargs): 58 | """Sends a GET request. Returns :class:`Response` object. 59 | 60 | :param url: URL for the new :class:`Request` object. 61 | :param \*\*kwargs: Optional arguments that ``request`` takes. 62 | """ 63 | 64 | kwargs.setdefault('allow_redirects', True) 65 | return request('get', url, **kwargs) 66 | 67 | 68 | def options(url, **kwargs): 69 | """Sends a OPTIONS request. Returns :class:`Response` object. 70 | 71 | :param url: URL for the new :class:`Request` object. 72 | :param \*\*kwargs: Optional arguments that ``request`` takes. 73 | """ 74 | 75 | kwargs.setdefault('allow_redirects', True) 76 | return request('options', url, **kwargs) 77 | 78 | 79 | def head(url, **kwargs): 80 | """Sends a HEAD request. Returns :class:`Response` object. 81 | 82 | :param url: URL for the new :class:`Request` object. 83 | :param \*\*kwargs: Optional arguments that ``request`` takes. 84 | """ 85 | 86 | kwargs.setdefault('allow_redirects', False) 87 | return request('head', url, **kwargs) 88 | 89 | 90 | def post(url, data=None, json=None, **kwargs): 91 | """Sends a POST request. Returns :class:`Response` object. 92 | 93 | :param url: URL for the new :class:`Request` object. 94 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 95 | :param json: (optional) json data to send in the body of the :class:`Request`. 96 | :param \*\*kwargs: Optional arguments that ``request`` takes. 97 | """ 98 | 99 | return request('post', url, data=data, json=json, **kwargs) 100 | 101 | 102 | def put(url, data=None, **kwargs): 103 | """Sends a PUT request. Returns :class:`Response` object. 104 | 105 | :param url: URL for the new :class:`Request` object. 106 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 107 | :param \*\*kwargs: Optional arguments that ``request`` takes. 108 | """ 109 | 110 | return request('put', url, data=data, **kwargs) 111 | 112 | 113 | def patch(url, data=None, **kwargs): 114 | """Sends a PATCH request. Returns :class:`Response` object. 115 | 116 | :param url: URL for the new :class:`Request` object. 117 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 118 | :param \*\*kwargs: Optional arguments that ``request`` takes. 119 | """ 120 | 121 | return request('patch', url, data=data, **kwargs) 122 | 123 | 124 | def delete(url, **kwargs): 125 | """Sends a DELETE request. Returns :class:`Response` object. 126 | 127 | :param url: URL for the new :class:`Request` object. 128 | :param \*\*kwargs: Optional arguments that ``request`` takes. 129 | """ 130 | 131 | return request('delete', url, **kwargs) 132 | -------------------------------------------------------------------------------- /libs/requests/auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.auth 5 | ~~~~~~~~~~~~~ 6 | 7 | This module contains the authentication handlers for Requests. 8 | """ 9 | 10 | import os 11 | import re 12 | import time 13 | import hashlib 14 | 15 | from base64 import b64encode 16 | 17 | from .compat import urlparse, str 18 | from .cookies import extract_cookies_to_jar 19 | from .utils import parse_dict_header, to_native_string 20 | from .status_codes import codes 21 | 22 | CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' 23 | CONTENT_TYPE_MULTI_PART = 'multipart/form-data' 24 | 25 | 26 | def _basic_auth_str(username, password): 27 | """Returns a Basic Auth string.""" 28 | 29 | authstr = 'Basic ' + to_native_string( 30 | b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() 31 | ) 32 | 33 | return authstr 34 | 35 | 36 | class AuthBase(object): 37 | """Base class that all auth implementations derive from""" 38 | 39 | def __call__(self, r): 40 | raise NotImplementedError('Auth hooks must be callable.') 41 | 42 | 43 | class HTTPBasicAuth(AuthBase): 44 | """Attaches HTTP Basic Authentication to the given Request object.""" 45 | def __init__(self, username, password): 46 | self.username = username 47 | self.password = password 48 | 49 | def __call__(self, r): 50 | r.headers['Authorization'] = _basic_auth_str(self.username, self.password) 51 | return r 52 | 53 | 54 | class HTTPProxyAuth(HTTPBasicAuth): 55 | """Attaches HTTP Proxy Authentication to a given Request object.""" 56 | def __call__(self, r): 57 | r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) 58 | return r 59 | 60 | 61 | class HTTPDigestAuth(AuthBase): 62 | """Attaches HTTP Digest Authentication to the given Request object.""" 63 | def __init__(self, username, password): 64 | self.username = username 65 | self.password = password 66 | self.last_nonce = '' 67 | self.nonce_count = 0 68 | self.chal = {} 69 | self.pos = None 70 | self.num_401_calls = 1 71 | 72 | def build_digest_header(self, method, url): 73 | 74 | realm = self.chal['realm'] 75 | nonce = self.chal['nonce'] 76 | qop = self.chal.get('qop') 77 | algorithm = self.chal.get('algorithm') 78 | opaque = self.chal.get('opaque') 79 | 80 | if algorithm is None: 81 | _algorithm = 'MD5' 82 | else: 83 | _algorithm = algorithm.upper() 84 | # lambdas assume digest modules are imported at the top level 85 | if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': 86 | def md5_utf8(x): 87 | if isinstance(x, str): 88 | x = x.encode('utf-8') 89 | return hashlib.md5(x).hexdigest() 90 | hash_utf8 = md5_utf8 91 | elif _algorithm == 'SHA': 92 | def sha_utf8(x): 93 | if isinstance(x, str): 94 | x = x.encode('utf-8') 95 | return hashlib.sha1(x).hexdigest() 96 | hash_utf8 = sha_utf8 97 | 98 | KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) 99 | 100 | if hash_utf8 is None: 101 | return None 102 | 103 | # XXX not implemented yet 104 | entdig = None 105 | p_parsed = urlparse(url) 106 | path = p_parsed.path 107 | if p_parsed.query: 108 | path += '?' + p_parsed.query 109 | 110 | A1 = '%s:%s:%s' % (self.username, realm, self.password) 111 | A2 = '%s:%s' % (method, path) 112 | 113 | HA1 = hash_utf8(A1) 114 | HA2 = hash_utf8(A2) 115 | 116 | if nonce == self.last_nonce: 117 | self.nonce_count += 1 118 | else: 119 | self.nonce_count = 1 120 | ncvalue = '%08x' % self.nonce_count 121 | s = str(self.nonce_count).encode('utf-8') 122 | s += nonce.encode('utf-8') 123 | s += time.ctime().encode('utf-8') 124 | s += os.urandom(8) 125 | 126 | cnonce = (hashlib.sha1(s).hexdigest()[:16]) 127 | noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) 128 | if _algorithm == 'MD5-SESS': 129 | HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) 130 | 131 | if qop is None: 132 | respdig = KD(HA1, "%s:%s" % (nonce, HA2)) 133 | elif qop == 'auth' or 'auth' in qop.split(','): 134 | respdig = KD(HA1, noncebit) 135 | else: 136 | # XXX handle auth-int. 137 | return None 138 | 139 | self.last_nonce = nonce 140 | 141 | # XXX should the partial digests be encoded too? 142 | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 143 | 'response="%s"' % (self.username, realm, nonce, path, respdig) 144 | if opaque: 145 | base += ', opaque="%s"' % opaque 146 | if algorithm: 147 | base += ', algorithm="%s"' % algorithm 148 | if entdig: 149 | base += ', digest="%s"' % entdig 150 | if qop: 151 | base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) 152 | 153 | return 'Digest %s' % (base) 154 | 155 | def handle_redirect(self, r, **kwargs): 156 | """Reset num_401_calls counter on redirects.""" 157 | if r.is_redirect: 158 | self.num_401_calls = 1 159 | 160 | def handle_401(self, r, **kwargs): 161 | """Takes the given response and tries digest-auth, if needed.""" 162 | 163 | if self.pos is not None: 164 | # Rewind the file position indicator of the body to where 165 | # it was to resend the request. 166 | r.request.body.seek(self.pos) 167 | num_401_calls = getattr(self, 'num_401_calls', 1) 168 | s_auth = r.headers.get('www-authenticate', '') 169 | 170 | if 'digest' in s_auth.lower() and num_401_calls < 2: 171 | 172 | self.num_401_calls += 1 173 | pat = re.compile(r'digest ', flags=re.IGNORECASE) 174 | self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) 175 | 176 | # Consume content and release the original connection 177 | # to allow our new request to reuse the same one. 178 | r.content 179 | r.raw.release_conn() 180 | prep = r.request.copy() 181 | extract_cookies_to_jar(prep._cookies, r.request, r.raw) 182 | prep.prepare_cookies(prep._cookies) 183 | 184 | prep.headers['Authorization'] = self.build_digest_header( 185 | prep.method, prep.url) 186 | _r = r.connection.send(prep, **kwargs) 187 | _r.history.append(r) 188 | _r.request = prep 189 | 190 | return _r 191 | 192 | self.num_401_calls = 1 193 | return r 194 | 195 | def __call__(self, r): 196 | # If we have a saved nonce, skip the 401 197 | if self.last_nonce: 198 | r.headers['Authorization'] = self.build_digest_header(r.method, r.url) 199 | try: 200 | self.pos = r.body.tell() 201 | except AttributeError: 202 | # In the case of HTTPDigestAuth being reused and the body of 203 | # the previous request was a file-like object, pos has the 204 | # file position of the previous body. Ensure it's set to 205 | # None. 206 | self.pos = None 207 | r.register_hook('response', self.handle_401) 208 | r.register_hook('response', self.handle_redirect) 209 | return r 210 | -------------------------------------------------------------------------------- /libs/requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | certs.py 6 | ~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | import os.path 15 | 16 | try: 17 | from certifi import where 18 | except ImportError: 19 | def where(): 20 | """Return the preferred certificate bundle.""" 21 | # vendored bundle inside Requests 22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 23 | 24 | if __name__ == '__main__': 25 | print(where()) 26 | -------------------------------------------------------------------------------- /libs/requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | pythoncompat 5 | """ 6 | 7 | from .packages import chardet 8 | 9 | import sys 10 | 11 | # ------- 12 | # Pythons 13 | # ------- 14 | 15 | # Syntax sugar. 16 | _ver = sys.version_info 17 | 18 | #: Python 2.x? 19 | is_py2 = (_ver[0] == 2) 20 | 21 | #: Python 3.x? 22 | is_py3 = (_ver[0] == 3) 23 | 24 | #: Python 3.0.x 25 | is_py30 = (is_py3 and _ver[1] == 0) 26 | 27 | #: Python 3.1.x 28 | is_py31 = (is_py3 and _ver[1] == 1) 29 | 30 | #: Python 3.2.x 31 | is_py32 = (is_py3 and _ver[1] == 2) 32 | 33 | #: Python 3.3.x 34 | is_py33 = (is_py3 and _ver[1] == 3) 35 | 36 | #: Python 3.4.x 37 | is_py34 = (is_py3 and _ver[1] == 4) 38 | 39 | #: Python 2.7.x 40 | is_py27 = (is_py2 and _ver[1] == 7) 41 | 42 | #: Python 2.6.x 43 | is_py26 = (is_py2 and _ver[1] == 6) 44 | 45 | #: Python 2.5.x 46 | is_py25 = (is_py2 and _ver[1] == 5) 47 | 48 | #: Python 2.4.x 49 | is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. 50 | 51 | 52 | # --------- 53 | # Platforms 54 | # --------- 55 | 56 | 57 | # Syntax sugar. 58 | _ver = sys.version.lower() 59 | 60 | is_pypy = ('pypy' in _ver) 61 | is_jython = ('jython' in _ver) 62 | is_ironpython = ('iron' in _ver) 63 | 64 | # Assume CPython, if nothing else. 65 | is_cpython = not any((is_pypy, is_jython, is_ironpython)) 66 | 67 | # Windows-based system. 68 | is_windows = 'win32' in str(sys.platform).lower() 69 | 70 | # Standard Linux 2+ system. 71 | is_linux = ('linux' in str(sys.platform).lower()) 72 | is_osx = ('darwin' in str(sys.platform).lower()) 73 | is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. 74 | is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. 75 | 76 | try: 77 | import simplejson as json 78 | except (ImportError, SyntaxError): 79 | # simplejson does not support Python 3.2, it throws a SyntaxError 80 | # because of u'...' Unicode literals. 81 | import json 82 | 83 | # --------- 84 | # Specifics 85 | # --------- 86 | 87 | if is_py2: 88 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 89 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 90 | from urllib2 import parse_http_list 91 | import cookielib 92 | from Cookie import Morsel 93 | from StringIO import StringIO 94 | from .packages.urllib3.packages.ordered_dict import OrderedDict 95 | 96 | builtin_str = str 97 | bytes = str 98 | str = unicode 99 | basestring = basestring 100 | numeric_types = (int, long, float) 101 | 102 | 103 | elif is_py3: 104 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 105 | from urllib.request import parse_http_list, getproxies, proxy_bypass 106 | from http import cookiejar as cookielib 107 | from http.cookies import Morsel 108 | from io import StringIO 109 | from collections import OrderedDict 110 | 111 | builtin_str = str 112 | str = str 113 | bytes = bytes 114 | basestring = (str, bytes) 115 | numeric_types = (int, float) 116 | -------------------------------------------------------------------------------- /libs/requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | 9 | """ 10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError 11 | 12 | 13 | class RequestException(IOError): 14 | """There was an ambiguous exception that occurred while handling your 15 | request.""" 16 | 17 | def __init__(self, *args, **kwargs): 18 | """ 19 | Initialize RequestException with `request` and `response` objects. 20 | """ 21 | response = kwargs.pop('response', None) 22 | self.response = response 23 | self.request = kwargs.pop('request', None) 24 | if (response is not None and not self.request and 25 | hasattr(response, 'request')): 26 | self.request = self.response.request 27 | super(RequestException, self).__init__(*args, **kwargs) 28 | 29 | 30 | class HTTPError(RequestException): 31 | """An HTTP error occurred.""" 32 | 33 | 34 | class ConnectionError(RequestException): 35 | """A Connection error occurred.""" 36 | 37 | 38 | class ProxyError(ConnectionError): 39 | """A proxy error occurred.""" 40 | 41 | 42 | class SSLError(ConnectionError): 43 | """An SSL error occurred.""" 44 | 45 | 46 | class Timeout(RequestException): 47 | """The request timed out. 48 | 49 | Catching this error will catch both 50 | :exc:`~requests.exceptions.ConnectTimeout` and 51 | :exc:`~requests.exceptions.ReadTimeout` errors. 52 | """ 53 | 54 | 55 | class ConnectTimeout(ConnectionError, Timeout): 56 | """The request timed out while trying to connect to the remote server. 57 | 58 | Requests that produced this error are safe to retry. 59 | """ 60 | 61 | 62 | class ReadTimeout(Timeout): 63 | """The server did not send any data in the allotted amount of time.""" 64 | 65 | 66 | class URLRequired(RequestException): 67 | """A valid URL is required to make a request.""" 68 | 69 | 70 | class TooManyRedirects(RequestException): 71 | """Too many redirects.""" 72 | 73 | 74 | class MissingSchema(RequestException, ValueError): 75 | """The URL schema (e.g. http or https) is missing.""" 76 | 77 | 78 | class InvalidSchema(RequestException, ValueError): 79 | """See defaults.py for valid schemas.""" 80 | 81 | 82 | class InvalidURL(RequestException, ValueError): 83 | """ The URL provided was somehow invalid. """ 84 | 85 | 86 | class ChunkedEncodingError(RequestException): 87 | """The server declared chunked encoding but sent an invalid chunk.""" 88 | 89 | 90 | class ContentDecodingError(RequestException, BaseHTTPError): 91 | """Failed to decode response content""" 92 | 93 | 94 | class StreamConsumedError(RequestException, TypeError): 95 | """The content for this response was already consumed""" 96 | 97 | 98 | class RetryError(RequestException): 99 | """Custom retries logic failed""" 100 | -------------------------------------------------------------------------------- /libs/requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | 14 | """ 15 | 16 | 17 | HOOKS = ['response'] 18 | 19 | 20 | def default_hooks(): 21 | hooks = {} 22 | for event in HOOKS: 23 | hooks[event] = [] 24 | return hooks 25 | 26 | # TODO: response is the only one 27 | 28 | 29 | def dispatch_hook(key, hooks, hook_data, **kwargs): 30 | """Dispatches a hook dictionary on a given piece of data.""" 31 | 32 | hooks = hooks or dict() 33 | 34 | if key in hooks: 35 | hooks = hooks.get(key) 36 | 37 | if hasattr(hooks, '__call__'): 38 | hooks = [hooks] 39 | 40 | for hook in hooks: 41 | _hook_data = hook(hook_data, **kwargs) 42 | if _hook_data is not None: 43 | hook_data = _hook_data 44 | 45 | return hook_data 46 | -------------------------------------------------------------------------------- /libs/requests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import urllib3 4 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/escsm.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart, eError, eItsMe 29 | 30 | HZ_cls = ( 31 | 1,0,0,0,0,0,0,0, # 00 - 07 32 | 0,0,0,0,0,0,0,0, # 08 - 0f 33 | 0,0,0,0,0,0,0,0, # 10 - 17 34 | 0,0,0,1,0,0,0,0, # 18 - 1f 35 | 0,0,0,0,0,0,0,0, # 20 - 27 36 | 0,0,0,0,0,0,0,0, # 28 - 2f 37 | 0,0,0,0,0,0,0,0, # 30 - 37 38 | 0,0,0,0,0,0,0,0, # 38 - 3f 39 | 0,0,0,0,0,0,0,0, # 40 - 47 40 | 0,0,0,0,0,0,0,0, # 48 - 4f 41 | 0,0,0,0,0,0,0,0, # 50 - 57 42 | 0,0,0,0,0,0,0,0, # 58 - 5f 43 | 0,0,0,0,0,0,0,0, # 60 - 67 44 | 0,0,0,0,0,0,0,0, # 68 - 6f 45 | 0,0,0,0,0,0,0,0, # 70 - 77 46 | 0,0,0,4,0,5,2,0, # 78 - 7f 47 | 1,1,1,1,1,1,1,1, # 80 - 87 48 | 1,1,1,1,1,1,1,1, # 88 - 8f 49 | 1,1,1,1,1,1,1,1, # 90 - 97 50 | 1,1,1,1,1,1,1,1, # 98 - 9f 51 | 1,1,1,1,1,1,1,1, # a0 - a7 52 | 1,1,1,1,1,1,1,1, # a8 - af 53 | 1,1,1,1,1,1,1,1, # b0 - b7 54 | 1,1,1,1,1,1,1,1, # b8 - bf 55 | 1,1,1,1,1,1,1,1, # c0 - c7 56 | 1,1,1,1,1,1,1,1, # c8 - cf 57 | 1,1,1,1,1,1,1,1, # d0 - d7 58 | 1,1,1,1,1,1,1,1, # d8 - df 59 | 1,1,1,1,1,1,1,1, # e0 - e7 60 | 1,1,1,1,1,1,1,1, # e8 - ef 61 | 1,1,1,1,1,1,1,1, # f0 - f7 62 | 1,1,1,1,1,1,1,1, # f8 - ff 63 | ) 64 | 65 | HZ_st = ( 66 | eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 67 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 68 | eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 69 | 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f 70 | 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 71 | 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f 72 | ) 73 | 74 | HZCharLenTable = (0, 0, 0, 0, 0, 0) 75 | 76 | HZSMModel = {'classTable': HZ_cls, 77 | 'classFactor': 6, 78 | 'stateTable': HZ_st, 79 | 'charLenTable': HZCharLenTable, 80 | 'name': "HZ-GB-2312"} 81 | 82 | ISO2022CN_cls = ( 83 | 2,0,0,0,0,0,0,0, # 00 - 07 84 | 0,0,0,0,0,0,0,0, # 08 - 0f 85 | 0,0,0,0,0,0,0,0, # 10 - 17 86 | 0,0,0,1,0,0,0,0, # 18 - 1f 87 | 0,0,0,0,0,0,0,0, # 20 - 27 88 | 0,3,0,0,0,0,0,0, # 28 - 2f 89 | 0,0,0,0,0,0,0,0, # 30 - 37 90 | 0,0,0,0,0,0,0,0, # 38 - 3f 91 | 0,0,0,4,0,0,0,0, # 40 - 47 92 | 0,0,0,0,0,0,0,0, # 48 - 4f 93 | 0,0,0,0,0,0,0,0, # 50 - 57 94 | 0,0,0,0,0,0,0,0, # 58 - 5f 95 | 0,0,0,0,0,0,0,0, # 60 - 67 96 | 0,0,0,0,0,0,0,0, # 68 - 6f 97 | 0,0,0,0,0,0,0,0, # 70 - 77 98 | 0,0,0,0,0,0,0,0, # 78 - 7f 99 | 2,2,2,2,2,2,2,2, # 80 - 87 100 | 2,2,2,2,2,2,2,2, # 88 - 8f 101 | 2,2,2,2,2,2,2,2, # 90 - 97 102 | 2,2,2,2,2,2,2,2, # 98 - 9f 103 | 2,2,2,2,2,2,2,2, # a0 - a7 104 | 2,2,2,2,2,2,2,2, # a8 - af 105 | 2,2,2,2,2,2,2,2, # b0 - b7 106 | 2,2,2,2,2,2,2,2, # b8 - bf 107 | 2,2,2,2,2,2,2,2, # c0 - c7 108 | 2,2,2,2,2,2,2,2, # c8 - cf 109 | 2,2,2,2,2,2,2,2, # d0 - d7 110 | 2,2,2,2,2,2,2,2, # d8 - df 111 | 2,2,2,2,2,2,2,2, # e0 - e7 112 | 2,2,2,2,2,2,2,2, # e8 - ef 113 | 2,2,2,2,2,2,2,2, # f0 - f7 114 | 2,2,2,2,2,2,2,2, # f8 - ff 115 | ) 116 | 117 | ISO2022CN_st = ( 118 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 119 | eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f 120 | eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 121 | eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f 122 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 123 | 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f 124 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 125 | eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f 126 | ) 127 | 128 | ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) 129 | 130 | ISO2022CNSMModel = {'classTable': ISO2022CN_cls, 131 | 'classFactor': 9, 132 | 'stateTable': ISO2022CN_st, 133 | 'charLenTable': ISO2022CNCharLenTable, 134 | 'name': "ISO-2022-CN"} 135 | 136 | ISO2022JP_cls = ( 137 | 2,0,0,0,0,0,0,0, # 00 - 07 138 | 0,0,0,0,0,0,2,2, # 08 - 0f 139 | 0,0,0,0,0,0,0,0, # 10 - 17 140 | 0,0,0,1,0,0,0,0, # 18 - 1f 141 | 0,0,0,0,7,0,0,0, # 20 - 27 142 | 3,0,0,0,0,0,0,0, # 28 - 2f 143 | 0,0,0,0,0,0,0,0, # 30 - 37 144 | 0,0,0,0,0,0,0,0, # 38 - 3f 145 | 6,0,4,0,8,0,0,0, # 40 - 47 146 | 0,9,5,0,0,0,0,0, # 48 - 4f 147 | 0,0,0,0,0,0,0,0, # 50 - 57 148 | 0,0,0,0,0,0,0,0, # 58 - 5f 149 | 0,0,0,0,0,0,0,0, # 60 - 67 150 | 0,0,0,0,0,0,0,0, # 68 - 6f 151 | 0,0,0,0,0,0,0,0, # 70 - 77 152 | 0,0,0,0,0,0,0,0, # 78 - 7f 153 | 2,2,2,2,2,2,2,2, # 80 - 87 154 | 2,2,2,2,2,2,2,2, # 88 - 8f 155 | 2,2,2,2,2,2,2,2, # 90 - 97 156 | 2,2,2,2,2,2,2,2, # 98 - 9f 157 | 2,2,2,2,2,2,2,2, # a0 - a7 158 | 2,2,2,2,2,2,2,2, # a8 - af 159 | 2,2,2,2,2,2,2,2, # b0 - b7 160 | 2,2,2,2,2,2,2,2, # b8 - bf 161 | 2,2,2,2,2,2,2,2, # c0 - c7 162 | 2,2,2,2,2,2,2,2, # c8 - cf 163 | 2,2,2,2,2,2,2,2, # d0 - d7 164 | 2,2,2,2,2,2,2,2, # d8 - df 165 | 2,2,2,2,2,2,2,2, # e0 - e7 166 | 2,2,2,2,2,2,2,2, # e8 - ef 167 | 2,2,2,2,2,2,2,2, # f0 - f7 168 | 2,2,2,2,2,2,2,2, # f8 - ff 169 | ) 170 | 171 | ISO2022JP_st = ( 172 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 173 | eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f 174 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 175 | eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f 176 | eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 177 | eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f 178 | eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 179 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f 180 | eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 181 | ) 182 | 183 | ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 184 | 185 | ISO2022JPSMModel = {'classTable': ISO2022JP_cls, 186 | 'classFactor': 10, 187 | 'stateTable': ISO2022JP_st, 188 | 'charLenTable': ISO2022JPCharLenTable, 189 | 'name': "ISO-2022-JP"} 190 | 191 | ISO2022KR_cls = ( 192 | 2,0,0,0,0,0,0,0, # 00 - 07 193 | 0,0,0,0,0,0,0,0, # 08 - 0f 194 | 0,0,0,0,0,0,0,0, # 10 - 17 195 | 0,0,0,1,0,0,0,0, # 18 - 1f 196 | 0,0,0,0,3,0,0,0, # 20 - 27 197 | 0,4,0,0,0,0,0,0, # 28 - 2f 198 | 0,0,0,0,0,0,0,0, # 30 - 37 199 | 0,0,0,0,0,0,0,0, # 38 - 3f 200 | 0,0,0,5,0,0,0,0, # 40 - 47 201 | 0,0,0,0,0,0,0,0, # 48 - 4f 202 | 0,0,0,0,0,0,0,0, # 50 - 57 203 | 0,0,0,0,0,0,0,0, # 58 - 5f 204 | 0,0,0,0,0,0,0,0, # 60 - 67 205 | 0,0,0,0,0,0,0,0, # 68 - 6f 206 | 0,0,0,0,0,0,0,0, # 70 - 77 207 | 0,0,0,0,0,0,0,0, # 78 - 7f 208 | 2,2,2,2,2,2,2,2, # 80 - 87 209 | 2,2,2,2,2,2,2,2, # 88 - 8f 210 | 2,2,2,2,2,2,2,2, # 90 - 97 211 | 2,2,2,2,2,2,2,2, # 98 - 9f 212 | 2,2,2,2,2,2,2,2, # a0 - a7 213 | 2,2,2,2,2,2,2,2, # a8 - af 214 | 2,2,2,2,2,2,2,2, # b0 - b7 215 | 2,2,2,2,2,2,2,2, # b8 - bf 216 | 2,2,2,2,2,2,2,2, # c0 - c7 217 | 2,2,2,2,2,2,2,2, # c8 - cf 218 | 2,2,2,2,2,2,2,2, # d0 - d7 219 | 2,2,2,2,2,2,2,2, # d8 - df 220 | 2,2,2,2,2,2,2,2, # e0 - e7 221 | 2,2,2,2,2,2,2,2, # e8 - ef 222 | 2,2,2,2,2,2,2,2, # f0 - f7 223 | 2,2,2,2,2,2,2,2, # f8 - ff 224 | ) 225 | 226 | ISO2022KR_st = ( 227 | eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 228 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 229 | eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 230 | eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f 231 | eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 232 | ) 233 | 234 | ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) 235 | 236 | ISO2022KRSMModel = {'classTable': ISO2022KR_cls, 237 | 'classFactor': 6, 238 | 'stateTable': ISO2022KR_st, 239 | 'charLenTable': ISO2022KRCharLenTable, 240 | 'name': "ISO-2022-KR"} 241 | 242 | # flake8: noqa 243 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .constants import eNotMe 31 | from .compat import wrap_ord 32 | 33 | FREQ_CAT_NUM = 4 34 | 35 | UDF = 0 # undefined 36 | OTH = 1 # other 37 | ASC = 2 # ascii capital letter 38 | ASS = 3 # ascii small letter 39 | ACV = 4 # accent capital vowel 40 | ACO = 5 # accent capital other 41 | ASV = 6 # accent small vowel 42 | ASO = 7 # accent small other 43 | CLASS_NUM = 8 # total classes 44 | 45 | Latin1_CharToClass = ( 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 78 | ) 79 | 80 | # 0 : illegal 81 | # 1 : very unlikely 82 | # 2 : normal 83 | # 3 : very likely 84 | Latin1ClassModel = ( 85 | # UDF OTH ASC ASS ACV ACO ASV ASO 86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 94 | ) 95 | 96 | 97 | class Latin1Prober(CharSetProber): 98 | def __init__(self): 99 | CharSetProber.__init__(self) 100 | self.reset() 101 | 102 | def reset(self): 103 | self._mLastCharClass = OTH 104 | self._mFreqCounter = [0] * FREQ_CAT_NUM 105 | CharSetProber.reset(self) 106 | 107 | def get_charset_name(self): 108 | return "windows-1252" 109 | 110 | def feed(self, aBuf): 111 | aBuf = self.filter_with_english_letters(aBuf) 112 | for c in aBuf: 113 | charClass = Latin1_CharToClass[wrap_ord(c)] 114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) 115 | + charClass] 116 | if freq == 0: 117 | self._mState = eNotMe 118 | break 119 | self._mFreqCounter[freq] += 1 120 | self._mLastCharClass = charClass 121 | 122 | return self.get_state() 123 | 124 | def get_confidence(self): 125 | if self.get_state() == eNotMe: 126 | return 0.01 127 | 128 | total = sum(self._mFreqCounter) 129 | if total < 0.01: 130 | confidence = 0.0 131 | else: 132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) 133 | / total) 134 | if confidence < 0.0: 135 | confidence = 0.0 136 | # lower the confidence of latin1 so that other more accurate 137 | # detector can take priority. 138 | confidence = confidence * 0.73 139 | return confidence 140 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import sys 30 | from . import constants 31 | from .charsetprober import CharSetProber 32 | from .compat import wrap_ord 33 | 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | SYMBOL_CAT_ORDER = 250 39 | NUMBER_OF_SEQ_CAT = 4 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 41 | #NEGATIVE_CAT = 0 42 | 43 | 44 | class SingleByteCharSetProber(CharSetProber): 45 | def __init__(self, model, reversed=False, nameProber=None): 46 | CharSetProber.__init__(self) 47 | self._mModel = model 48 | # TRUE if we need to reverse every pair in the model lookup 49 | self._mReversed = reversed 50 | # Optional auxiliary prober for name decision 51 | self._mNameProber = nameProber 52 | self.reset() 53 | 54 | def reset(self): 55 | CharSetProber.reset(self) 56 | # char order of last character 57 | self._mLastOrder = 255 58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT 59 | self._mTotalSeqs = 0 60 | self._mTotalChar = 0 61 | # characters that fall in our sampling range 62 | self._mFreqChar = 0 63 | 64 | def get_charset_name(self): 65 | if self._mNameProber: 66 | return self._mNameProber.get_charset_name() 67 | else: 68 | return self._mModel['charsetName'] 69 | 70 | def feed(self, aBuf): 71 | if not self._mModel['keepEnglishLetter']: 72 | aBuf = self.filter_without_english_letters(aBuf) 73 | aLen = len(aBuf) 74 | if not aLen: 75 | return self.get_state() 76 | for c in aBuf: 77 | order = self._mModel['charToOrderMap'][wrap_ord(c)] 78 | if order < SYMBOL_CAT_ORDER: 79 | self._mTotalChar += 1 80 | if order < SAMPLE_SIZE: 81 | self._mFreqChar += 1 82 | if self._mLastOrder < SAMPLE_SIZE: 83 | self._mTotalSeqs += 1 84 | if not self._mReversed: 85 | i = (self._mLastOrder * SAMPLE_SIZE) + order 86 | model = self._mModel['precedenceMatrix'][i] 87 | else: # reverse the order of the letters in the lookup 88 | i = (order * SAMPLE_SIZE) + self._mLastOrder 89 | model = self._mModel['precedenceMatrix'][i] 90 | self._mSeqCounters[model] += 1 91 | self._mLastOrder = order 92 | 93 | if self.get_state() == constants.eDetecting: 94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: 95 | cf = self.get_confidence() 96 | if cf > POSITIVE_SHORTCUT_THRESHOLD: 97 | if constants._debug: 98 | sys.stderr.write('%s confidence = %s, we have a' 99 | 'winner\n' % 100 | (self._mModel['charsetName'], cf)) 101 | self._mState = constants.eFoundIt 102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD: 103 | if constants._debug: 104 | sys.stderr.write('%s confidence = %s, below negative' 105 | 'shortcut threshhold %s\n' % 106 | (self._mModel['charsetName'], cf, 107 | NEGATIVE_SHORTCUT_THRESHOLD)) 108 | self._mState = constants.eNotMe 109 | 110 | return self.get_state() 111 | 112 | def get_confidence(self): 113 | r = 0.01 114 | if self._mTotalSeqs > 0: 115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs 116 | / self._mModel['mTypicalPositiveRatio']) 117 | r = r * self._mFreqChar / self._mTotalChar 118 | if r >= 1.0: 119 | r = 0.99 120 | return r 121 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return self._mContextAnalyzer.get_charset_name() 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/universaldetector.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import sys 31 | import codecs 32 | from .latin1prober import Latin1Prober # windows-1252 33 | from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets 34 | from .sbcsgroupprober import SBCSGroupProber # single-byte character sets 35 | from .escprober import EscCharSetProber # ISO-2122, etc. 36 | import re 37 | 38 | MINIMUM_THRESHOLD = 0.20 39 | ePureAscii = 0 40 | eEscAscii = 1 41 | eHighbyte = 2 42 | 43 | 44 | class UniversalDetector: 45 | def __init__(self): 46 | self._highBitDetector = re.compile(b'[\x80-\xFF]') 47 | self._escDetector = re.compile(b'(\033|~{)') 48 | self._mEscCharSetProber = None 49 | self._mCharSetProbers = [] 50 | self.reset() 51 | 52 | def reset(self): 53 | self.result = {'encoding': None, 'confidence': 0.0} 54 | self.done = False 55 | self._mStart = True 56 | self._mGotData = False 57 | self._mInputState = ePureAscii 58 | self._mLastChar = b'' 59 | if self._mEscCharSetProber: 60 | self._mEscCharSetProber.reset() 61 | for prober in self._mCharSetProbers: 62 | prober.reset() 63 | 64 | def feed(self, aBuf): 65 | if self.done: 66 | return 67 | 68 | aLen = len(aBuf) 69 | if not aLen: 70 | return 71 | 72 | if not self._mGotData: 73 | # If the data starts with BOM, we know it is UTF 74 | if aBuf[:3] == codecs.BOM_UTF8: 75 | # EF BB BF UTF-8 with BOM 76 | self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} 77 | elif aBuf[:4] == codecs.BOM_UTF32_LE: 78 | # FF FE 00 00 UTF-32, little-endian BOM 79 | self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} 80 | elif aBuf[:4] == codecs.BOM_UTF32_BE: 81 | # 00 00 FE FF UTF-32, big-endian BOM 82 | self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} 83 | elif aBuf[:4] == b'\xFE\xFF\x00\x00': 84 | # FE FF 00 00 UCS-4, unusual octet order BOM (3412) 85 | self.result = { 86 | 'encoding': "X-ISO-10646-UCS-4-3412", 87 | 'confidence': 1.0 88 | } 89 | elif aBuf[:4] == b'\x00\x00\xFF\xFE': 90 | # 00 00 FF FE UCS-4, unusual octet order BOM (2143) 91 | self.result = { 92 | 'encoding': "X-ISO-10646-UCS-4-2143", 93 | 'confidence': 1.0 94 | } 95 | elif aBuf[:2] == codecs.BOM_LE: 96 | # FF FE UTF-16, little endian BOM 97 | self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} 98 | elif aBuf[:2] == codecs.BOM_BE: 99 | # FE FF UTF-16, big endian BOM 100 | self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} 101 | 102 | self._mGotData = True 103 | if self.result['encoding'] and (self.result['confidence'] > 0.0): 104 | self.done = True 105 | return 106 | 107 | if self._mInputState == ePureAscii: 108 | if self._highBitDetector.search(aBuf): 109 | self._mInputState = eHighbyte 110 | elif ((self._mInputState == ePureAscii) and 111 | self._escDetector.search(self._mLastChar + aBuf)): 112 | self._mInputState = eEscAscii 113 | 114 | self._mLastChar = aBuf[-1:] 115 | 116 | if self._mInputState == eEscAscii: 117 | if not self._mEscCharSetProber: 118 | self._mEscCharSetProber = EscCharSetProber() 119 | if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: 120 | self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), 121 | 'confidence': self._mEscCharSetProber.get_confidence()} 122 | self.done = True 123 | elif self._mInputState == eHighbyte: 124 | if not self._mCharSetProbers: 125 | self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), 126 | Latin1Prober()] 127 | for prober in self._mCharSetProbers: 128 | if prober.feed(aBuf) == constants.eFoundIt: 129 | self.result = {'encoding': prober.get_charset_name(), 130 | 'confidence': prober.get_confidence()} 131 | self.done = True 132 | break 133 | 134 | def close(self): 135 | if self.done: 136 | return 137 | if not self._mGotData: 138 | if constants._debug: 139 | sys.stderr.write('no data received!\n') 140 | return 141 | self.done = True 142 | 143 | if self._mInputState == ePureAscii: 144 | self.result = {'encoding': 'ascii', 'confidence': 1.0} 145 | return self.result 146 | 147 | if self._mInputState == eHighbyte: 148 | proberConfidence = None 149 | maxProberConfidence = 0.0 150 | maxProber = None 151 | for prober in self._mCharSetProbers: 152 | if not prober: 153 | continue 154 | proberConfidence = prober.get_confidence() 155 | if proberConfidence > maxProberConfidence: 156 | maxProberConfidence = proberConfidence 157 | maxProber = prober 158 | if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): 159 | self.result = {'encoding': maxProber.get_charset_name(), 160 | 'confidence': maxProber.get_confidence()} 161 | return self.result 162 | 163 | if constants._debug: 164 | sys.stderr.write('no probers hit minimum threshhold\n') 165 | for prober in self._mCharSetProbers[0].mProbers: 166 | if not prober: 167 | continue 168 | sys.stderr.write('%s confidence = %s\n' % 169 | (prober.get_charset_name(), 170 | prober.get_confidence())) 171 | -------------------------------------------------------------------------------- /libs/requests/packages/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 6 | __license__ = 'MIT' 7 | __version__ = 'dev' 8 | 9 | 10 | from .connectionpool import ( 11 | HTTPConnectionPool, 12 | HTTPSConnectionPool, 13 | connection_from_url 14 | ) 15 | 16 | from . import exceptions 17 | from .filepost import encode_multipart_formdata 18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 19 | from .response import HTTPResponse 20 | from .util.request import make_headers 21 | from .util.url import get_host 22 | from .util.timeout import Timeout 23 | from .util.retry import Retry 24 | 25 | 26 | # Set default logging handler to avoid "No handler found" warnings. 27 | import logging 28 | try: # Python 2.7+ 29 | from logging import NullHandler 30 | except ImportError: 31 | class NullHandler(logging.Handler): 32 | def emit(self, record): 33 | pass 34 | 35 | logging.getLogger(__name__).addHandler(NullHandler()) 36 | 37 | def add_stderr_logger(level=logging.DEBUG): 38 | """ 39 | Helper for quickly adding a StreamHandler to the logger. Useful for 40 | debugging. 41 | 42 | Returns the handler after adding it. 43 | """ 44 | # This method needs to be in this __init__.py to get the __name__ correct 45 | # even if urllib3 is vendored within another package. 46 | logger = logging.getLogger(__name__) 47 | handler = logging.StreamHandler() 48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 49 | logger.addHandler(handler) 50 | logger.setLevel(level) 51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__) 52 | return handler 53 | 54 | # ... Clean up. 55 | del NullHandler 56 | 57 | 58 | # Set security warning to only go off once by default. 59 | import warnings 60 | warnings.simplefilter('always', exceptions.SecurityWarning) 61 | 62 | def disable_warnings(category=exceptions.HTTPWarning): 63 | """ 64 | Helper for quickly disabling all urllib3 warnings. 65 | """ 66 | warnings.simplefilter('ignore', category) 67 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/_collections.py: -------------------------------------------------------------------------------- 1 | from collections import Mapping, MutableMapping 2 | try: 3 | from threading import RLock 4 | except ImportError: # Platform-specific: No threads available 5 | class RLock: 6 | def __enter__(self): 7 | pass 8 | 9 | def __exit__(self, exc_type, exc_value, traceback): 10 | pass 11 | 12 | 13 | try: # Python 2.7+ 14 | from collections import OrderedDict 15 | except ImportError: 16 | from .packages.ordered_dict import OrderedDict 17 | from .packages.six import iterkeys, itervalues 18 | 19 | 20 | __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] 21 | 22 | 23 | _Null = object() 24 | 25 | 26 | class RecentlyUsedContainer(MutableMapping): 27 | """ 28 | Provides a thread-safe dict-like container which maintains up to 29 | ``maxsize`` keys while throwing away the least-recently-used keys beyond 30 | ``maxsize``. 31 | 32 | :param maxsize: 33 | Maximum number of recent elements to retain. 34 | 35 | :param dispose_func: 36 | Every time an item is evicted from the container, 37 | ``dispose_func(value)`` is called. Callback which will get called 38 | """ 39 | 40 | ContainerCls = OrderedDict 41 | 42 | def __init__(self, maxsize=10, dispose_func=None): 43 | self._maxsize = maxsize 44 | self.dispose_func = dispose_func 45 | 46 | self._container = self.ContainerCls() 47 | self.lock = RLock() 48 | 49 | def __getitem__(self, key): 50 | # Re-insert the item, moving it to the end of the eviction line. 51 | with self.lock: 52 | item = self._container.pop(key) 53 | self._container[key] = item 54 | return item 55 | 56 | def __setitem__(self, key, value): 57 | evicted_value = _Null 58 | with self.lock: 59 | # Possibly evict the existing value of 'key' 60 | evicted_value = self._container.get(key, _Null) 61 | self._container[key] = value 62 | 63 | # If we didn't evict an existing value, we might have to evict the 64 | # least recently used item from the beginning of the container. 65 | if len(self._container) > self._maxsize: 66 | _key, evicted_value = self._container.popitem(last=False) 67 | 68 | if self.dispose_func and evicted_value is not _Null: 69 | self.dispose_func(evicted_value) 70 | 71 | def __delitem__(self, key): 72 | with self.lock: 73 | value = self._container.pop(key) 74 | 75 | if self.dispose_func: 76 | self.dispose_func(value) 77 | 78 | def __len__(self): 79 | with self.lock: 80 | return len(self._container) 81 | 82 | def __iter__(self): 83 | raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') 84 | 85 | def clear(self): 86 | with self.lock: 87 | # Copy pointers to all values, then wipe the mapping 88 | values = list(itervalues(self._container)) 89 | self._container.clear() 90 | 91 | if self.dispose_func: 92 | for value in values: 93 | self.dispose_func(value) 94 | 95 | def keys(self): 96 | with self.lock: 97 | return list(iterkeys(self._container)) 98 | 99 | 100 | class HTTPHeaderDict(MutableMapping): 101 | """ 102 | :param headers: 103 | An iterable of field-value pairs. Must not contain multiple field names 104 | when compared case-insensitively. 105 | 106 | :param kwargs: 107 | Additional field-value pairs to pass in to ``dict.update``. 108 | 109 | A ``dict`` like container for storing HTTP Headers. 110 | 111 | Field names are stored and compared case-insensitively in compliance with 112 | RFC 7230. Iteration provides the first case-sensitive key seen for each 113 | case-insensitive pair. 114 | 115 | Using ``__setitem__`` syntax overwrites fields that compare equal 116 | case-insensitively in order to maintain ``dict``'s api. For fields that 117 | compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` 118 | in a loop. 119 | 120 | If multiple fields that are equal case-insensitively are passed to the 121 | constructor or ``.update``, the behavior is undefined and some will be 122 | lost. 123 | 124 | >>> headers = HTTPHeaderDict() 125 | >>> headers.add('Set-Cookie', 'foo=bar') 126 | >>> headers.add('set-cookie', 'baz=quxx') 127 | >>> headers['content-length'] = '7' 128 | >>> headers['SET-cookie'] 129 | 'foo=bar, baz=quxx' 130 | >>> headers['Content-Length'] 131 | '7' 132 | 133 | If you want to access the raw headers with their original casing 134 | for debugging purposes you can access the private ``._data`` attribute 135 | which is a normal python ``dict`` that maps the case-insensitive key to a 136 | list of tuples stored as (case-sensitive-original-name, value). Using the 137 | structure from above as our example: 138 | 139 | >>> headers._data 140 | {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], 141 | 'content-length': [('content-length', '7')]} 142 | """ 143 | 144 | def __init__(self, headers=None, **kwargs): 145 | self._data = {} 146 | if headers is None: 147 | headers = {} 148 | self.update(headers, **kwargs) 149 | 150 | def add(self, key, value): 151 | """Adds a (name, value) pair, doesn't overwrite the value if it already 152 | exists. 153 | 154 | >>> headers = HTTPHeaderDict(foo='bar') 155 | >>> headers.add('Foo', 'baz') 156 | >>> headers['foo'] 157 | 'bar, baz' 158 | """ 159 | self._data.setdefault(key.lower(), []).append((key, value)) 160 | 161 | def getlist(self, key): 162 | """Returns a list of all the values for the named field. Returns an 163 | empty list if the key doesn't exist.""" 164 | return self[key].split(', ') if key in self else [] 165 | 166 | def copy(self): 167 | h = HTTPHeaderDict() 168 | for key in self._data: 169 | for rawkey, value in self._data[key]: 170 | h.add(rawkey, value) 171 | return h 172 | 173 | def __eq__(self, other): 174 | if not isinstance(other, Mapping): 175 | return False 176 | other = HTTPHeaderDict(other) 177 | return dict((k1, self[k1]) for k1 in self._data) == \ 178 | dict((k2, other[k2]) for k2 in other._data) 179 | 180 | def __getitem__(self, key): 181 | values = self._data[key.lower()] 182 | return ', '.join(value[1] for value in values) 183 | 184 | def __setitem__(self, key, value): 185 | self._data[key.lower()] = [(key, value)] 186 | 187 | def __delitem__(self, key): 188 | del self._data[key.lower()] 189 | 190 | def __len__(self): 191 | return len(self._data) 192 | 193 | def __iter__(self): 194 | for headers in itervalues(self._data): 195 | yield headers[0][0] 196 | 197 | def __repr__(self): 198 | return '%s(%r)' % (self.__class__.__name__, dict(self.items())) 199 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ring04h/weakfilescan/b1a3066e3fdcd60b8ecf635526f49cb5ad603064/libs/requests/packages/urllib3/contrib/__init__.py -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | 7 | try: 8 | from http.client import HTTPSConnection 9 | except ImportError: 10 | from httplib import HTTPSConnection 11 | from logging import getLogger 12 | from ntlm import ntlm 13 | 14 | from urllib3 import HTTPSConnectionPool 15 | 16 | 17 | log = getLogger(__name__) 18 | 19 | 20 | class NTLMConnectionPool(HTTPSConnectionPool): 21 | """ 22 | Implements an NTLM authentication version of an urllib3 connection pool 23 | """ 24 | 25 | scheme = 'https' 26 | 27 | def __init__(self, user, pw, authurl, *args, **kwargs): 28 | """ 29 | authurl is a random URL on the server that is protected by NTLM. 30 | user is the Windows user, probably in the DOMAIN\\username format. 31 | pw is the password for the user. 32 | """ 33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 34 | self.authurl = authurl 35 | self.rawuser = user 36 | user_parts = user.split('\\', 1) 37 | self.domain = user_parts[0].upper() 38 | self.user = user_parts[1] 39 | self.pw = pw 40 | 41 | def _new_conn(self): 42 | # Performs the NTLM handshake that secures the connection. The socket 43 | # must be kept open while requests are performed. 44 | self.num_connections += 1 45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % 46 | (self.num_connections, self.host, self.authurl)) 47 | 48 | headers = {} 49 | headers['Connection'] = 'Keep-Alive' 50 | req_header = 'Authorization' 51 | resp_header = 'www-authenticate' 52 | 53 | conn = HTTPSConnection(host=self.host, port=self.port) 54 | 55 | # Send negotiation message 56 | headers[req_header] = ( 57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) 58 | log.debug('Request headers: %s' % headers) 59 | conn.request('GET', self.authurl, None, headers) 60 | res = conn.getresponse() 61 | reshdr = dict(res.getheaders()) 62 | log.debug('Response status: %s %s' % (res.status, res.reason)) 63 | log.debug('Response headers: %s' % reshdr) 64 | log.debug('Response data: %s [...]' % res.read(100)) 65 | 66 | # Remove the reference to the socket, so that it can not be closed by 67 | # the response object (we want to keep the socket open) 68 | res.fp = None 69 | 70 | # Server should respond with a challenge message 71 | auth_header_values = reshdr[resp_header].split(', ') 72 | auth_header_value = None 73 | for s in auth_header_values: 74 | if s[:5] == 'NTLM ': 75 | auth_header_value = s[5:] 76 | if auth_header_value is None: 77 | raise Exception('Unexpected %s response header: %s' % 78 | (resp_header, reshdr[resp_header])) 79 | 80 | # Send authentication message 81 | ServerChallenge, NegotiateFlags = \ 82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) 83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, 84 | self.user, 85 | self.domain, 86 | self.pw, 87 | NegotiateFlags) 88 | headers[req_header] = 'NTLM %s' % auth_msg 89 | log.debug('Request headers: %s' % headers) 90 | conn.request('GET', self.authurl, None, headers) 91 | res = conn.getresponse() 92 | log.debug('Response status: %s %s' % (res.status, res.reason)) 93 | log.debug('Response headers: %s' % dict(res.getheaders())) 94 | log.debug('Response data: %s [...]' % res.read()[:100]) 95 | if res.status != 200: 96 | if res.status == 401: 97 | raise Exception('Server rejected request: wrong ' 98 | 'username or password') 99 | raise Exception('Wrong server response: %s %s' % 100 | (res.status, res.reason)) 101 | 102 | res.fp = None 103 | log.debug('Connection established') 104 | return conn 105 | 106 | def urlopen(self, method, url, body=None, headers=None, retries=3, 107 | redirect=True, assert_same_host=True): 108 | if headers is None: 109 | headers = {} 110 | headers['Connection'] = 'Keep-Alive' 111 | return super(NTLMConnectionPool, self).urlopen(method, url, body, 112 | headers, retries, 113 | redirect, 114 | assert_same_host) 115 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | ## Base Exceptions 3 | 4 | class HTTPError(Exception): 5 | "Base exception used by this module." 6 | pass 7 | 8 | class HTTPWarning(Warning): 9 | "Base warning used by this module." 10 | pass 11 | 12 | 13 | 14 | class PoolError(HTTPError): 15 | "Base exception for errors caused within a pool." 16 | def __init__(self, pool, message): 17 | self.pool = pool 18 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 19 | 20 | def __reduce__(self): 21 | # For pickling purposes. 22 | return self.__class__, (None, None) 23 | 24 | 25 | class RequestError(PoolError): 26 | "Base exception for PoolErrors that have associated URLs." 27 | def __init__(self, pool, url, message): 28 | self.url = url 29 | PoolError.__init__(self, pool, message) 30 | 31 | def __reduce__(self): 32 | # For pickling purposes. 33 | return self.__class__, (None, self.url, None) 34 | 35 | 36 | class SSLError(HTTPError): 37 | "Raised when SSL certificate fails in an HTTPS connection." 38 | pass 39 | 40 | 41 | class ProxyError(HTTPError): 42 | "Raised when the connection to a proxy fails." 43 | pass 44 | 45 | 46 | class DecodeError(HTTPError): 47 | "Raised when automatic decoding based on Content-Type fails." 48 | pass 49 | 50 | 51 | class ProtocolError(HTTPError): 52 | "Raised when something unexpected happens mid-request/response." 53 | pass 54 | 55 | 56 | #: Renamed to ProtocolError but aliased for backwards compatibility. 57 | ConnectionError = ProtocolError 58 | 59 | 60 | ## Leaf Exceptions 61 | 62 | class MaxRetryError(RequestError): 63 | """Raised when the maximum number of retries is exceeded. 64 | 65 | :param pool: The connection pool 66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 67 | :param string url: The requested Url 68 | :param exceptions.Exception reason: The underlying error 69 | 70 | """ 71 | 72 | def __init__(self, pool, url, reason=None): 73 | self.reason = reason 74 | 75 | message = "Max retries exceeded with url: %s (Caused by %r)" % ( 76 | url, reason) 77 | 78 | RequestError.__init__(self, pool, url, message) 79 | 80 | 81 | class HostChangedError(RequestError): 82 | "Raised when an existing pool gets a request for a foreign host." 83 | 84 | def __init__(self, pool, url, retries=3): 85 | message = "Tried to open a foreign host with url: %s" % url 86 | RequestError.__init__(self, pool, url, message) 87 | self.retries = retries 88 | 89 | 90 | class TimeoutStateError(HTTPError): 91 | """ Raised when passing an invalid state to a timeout """ 92 | pass 93 | 94 | 95 | class TimeoutError(HTTPError): 96 | """ Raised when a socket timeout error occurs. 97 | 98 | Catching this error will catch both :exc:`ReadTimeoutErrors 99 | ` and :exc:`ConnectTimeoutErrors `. 100 | """ 101 | pass 102 | 103 | 104 | class ReadTimeoutError(TimeoutError, RequestError): 105 | "Raised when a socket timeout occurs while receiving data from a server" 106 | pass 107 | 108 | 109 | # This timeout error does not have a URL attached and needs to inherit from the 110 | # base HTTPError 111 | class ConnectTimeoutError(TimeoutError): 112 | "Raised when a socket timeout occurs while connecting to a server" 113 | pass 114 | 115 | 116 | class EmptyPoolError(PoolError): 117 | "Raised when a pool runs out of connections and no more are allowed." 118 | pass 119 | 120 | 121 | class ClosedPoolError(PoolError): 122 | "Raised when a request enters a pool after the pool has been closed." 123 | pass 124 | 125 | 126 | class LocationValueError(ValueError, HTTPError): 127 | "Raised when there is something wrong with a given URL input." 128 | pass 129 | 130 | 131 | class LocationParseError(LocationValueError): 132 | "Raised when get_host or similar fails to parse the URL input." 133 | 134 | def __init__(self, location): 135 | message = "Failed to parse: %s" % location 136 | HTTPError.__init__(self, message) 137 | 138 | self.location = location 139 | 140 | 141 | class ResponseError(HTTPError): 142 | "Used as a container for an error reason supplied in a MaxRetryError." 143 | GENERIC_ERROR = 'too many error responses' 144 | SPECIFIC_ERROR = 'too many {status_code} error responses' 145 | 146 | 147 | class SecurityWarning(HTTPWarning): 148 | "Warned when perfoming security reducing actions" 149 | pass 150 | 151 | 152 | class InsecureRequestWarning(SecurityWarning): 153 | "Warned when making an unverified HTTPS request." 154 | pass 155 | 156 | 157 | class SystemTimeWarning(SecurityWarning): 158 | "Warned when system time is suspected to be wrong" 159 | pass 160 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/fields.py: -------------------------------------------------------------------------------- 1 | import email.utils 2 | import mimetypes 3 | 4 | from .packages import six 5 | 6 | 7 | def guess_content_type(filename, default='application/octet-stream'): 8 | """ 9 | Guess the "Content-Type" of a file. 10 | 11 | :param filename: 12 | The filename to guess the "Content-Type" of using :mod:`mimetypes`. 13 | :param default: 14 | If no "Content-Type" can be guessed, default to `default`. 15 | """ 16 | if filename: 17 | return mimetypes.guess_type(filename)[0] or default 18 | return default 19 | 20 | 21 | def format_header_param(name, value): 22 | """ 23 | Helper function to format and quote a single header parameter. 24 | 25 | Particularly useful for header parameters which might contain 26 | non-ASCII values, like file names. This follows RFC 2231, as 27 | suggested by RFC 2388 Section 4.4. 28 | 29 | :param name: 30 | The name of the parameter, a string expected to be ASCII only. 31 | :param value: 32 | The value of the parameter, provided as a unicode string. 33 | """ 34 | if not any(ch in value for ch in '"\\\r\n'): 35 | result = '%s="%s"' % (name, value) 36 | try: 37 | result.encode('ascii') 38 | except UnicodeEncodeError: 39 | pass 40 | else: 41 | return result 42 | if not six.PY3: # Python 2: 43 | value = value.encode('utf-8') 44 | value = email.utils.encode_rfc2231(value, 'utf-8') 45 | value = '%s*=%s' % (name, value) 46 | return value 47 | 48 | 49 | class RequestField(object): 50 | """ 51 | A data container for request body parameters. 52 | 53 | :param name: 54 | The name of this request field. 55 | :param data: 56 | The data/value body. 57 | :param filename: 58 | An optional filename of the request field. 59 | :param headers: 60 | An optional dict-like object of headers to initially use for the field. 61 | """ 62 | def __init__(self, name, data, filename=None, headers=None): 63 | self._name = name 64 | self._filename = filename 65 | self.data = data 66 | self.headers = {} 67 | if headers: 68 | self.headers = dict(headers) 69 | 70 | @classmethod 71 | def from_tuples(cls, fieldname, value): 72 | """ 73 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 74 | 75 | Supports constructing :class:`~urllib3.fields.RequestField` from 76 | parameter of key/value strings AND key/filetuple. A filetuple is a 77 | (filename, data, MIME type) tuple where the MIME type is optional. 78 | For example:: 79 | 80 | 'foo': 'bar', 81 | 'fakefile': ('foofile.txt', 'contents of foofile'), 82 | 'realfile': ('barfile.txt', open('realfile').read()), 83 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 84 | 'nonamefile': 'contents of nonamefile field', 85 | 86 | Field names and filenames must be unicode. 87 | """ 88 | if isinstance(value, tuple): 89 | if len(value) == 3: 90 | filename, data, content_type = value 91 | else: 92 | filename, data = value 93 | content_type = guess_content_type(filename) 94 | else: 95 | filename = None 96 | content_type = None 97 | data = value 98 | 99 | request_param = cls(fieldname, data, filename=filename) 100 | request_param.make_multipart(content_type=content_type) 101 | 102 | return request_param 103 | 104 | def _render_part(self, name, value): 105 | """ 106 | Overridable helper function to format a single header parameter. 107 | 108 | :param name: 109 | The name of the parameter, a string expected to be ASCII only. 110 | :param value: 111 | The value of the parameter, provided as a unicode string. 112 | """ 113 | return format_header_param(name, value) 114 | 115 | def _render_parts(self, header_parts): 116 | """ 117 | Helper function to format and quote a single header. 118 | 119 | Useful for single headers that are composed of multiple items. E.g., 120 | 'Content-Disposition' fields. 121 | 122 | :param header_parts: 123 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format 124 | as `k1="v1"; k2="v2"; ...`. 125 | """ 126 | parts = [] 127 | iterable = header_parts 128 | if isinstance(header_parts, dict): 129 | iterable = header_parts.items() 130 | 131 | for name, value in iterable: 132 | if value: 133 | parts.append(self._render_part(name, value)) 134 | 135 | return '; '.join(parts) 136 | 137 | def render_headers(self): 138 | """ 139 | Renders the headers for this request field. 140 | """ 141 | lines = [] 142 | 143 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] 144 | for sort_key in sort_keys: 145 | if self.headers.get(sort_key, False): 146 | lines.append('%s: %s' % (sort_key, self.headers[sort_key])) 147 | 148 | for header_name, header_value in self.headers.items(): 149 | if header_name not in sort_keys: 150 | if header_value: 151 | lines.append('%s: %s' % (header_name, header_value)) 152 | 153 | lines.append('\r\n') 154 | return '\r\n'.join(lines) 155 | 156 | def make_multipart(self, content_disposition=None, content_type=None, 157 | content_location=None): 158 | """ 159 | Makes this request field into a multipart request field. 160 | 161 | This method overrides "Content-Disposition", "Content-Type" and 162 | "Content-Location" headers to the request parameter. 163 | 164 | :param content_type: 165 | The 'Content-Type' of the request body. 166 | :param content_location: 167 | The 'Content-Location' of the request body. 168 | 169 | """ 170 | self.headers['Content-Disposition'] = content_disposition or 'form-data' 171 | self.headers['Content-Disposition'] += '; '.join([ 172 | '', self._render_parts( 173 | (('name', self._name), ('filename', self._filename)) 174 | ) 175 | ]) 176 | self.headers['Content-Type'] = content_type 177 | self.headers['Content-Location'] = content_location 178 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | from uuid import uuid4 4 | from io import BytesIO 5 | 6 | from .packages import six 7 | from .packages.six import b 8 | from .fields import RequestField 9 | 10 | writer = codecs.lookup('utf-8')[3] 11 | 12 | 13 | def choose_boundary(): 14 | """ 15 | Our embarassingly-simple replacement for mimetools.choose_boundary. 16 | """ 17 | return uuid4().hex 18 | 19 | 20 | def iter_field_objects(fields): 21 | """ 22 | Iterate over fields. 23 | 24 | Supports list of (k, v) tuples and dicts, and lists of 25 | :class:`~urllib3.fields.RequestField`. 26 | 27 | """ 28 | if isinstance(fields, dict): 29 | i = six.iteritems(fields) 30 | else: 31 | i = iter(fields) 32 | 33 | for field in i: 34 | if isinstance(field, RequestField): 35 | yield field 36 | else: 37 | yield RequestField.from_tuples(*field) 38 | 39 | 40 | def iter_fields(fields): 41 | """ 42 | .. deprecated:: 1.6 43 | 44 | Iterate over fields. 45 | 46 | The addition of :class:`~urllib3.fields.RequestField` makes this function 47 | obsolete. Instead, use :func:`iter_field_objects`, which returns 48 | :class:`~urllib3.fields.RequestField` objects. 49 | 50 | Supports list of (k, v) tuples and dicts. 51 | """ 52 | if isinstance(fields, dict): 53 | return ((k, v) for k, v in six.iteritems(fields)) 54 | 55 | return ((k, v) for k, v in fields) 56 | 57 | 58 | def encode_multipart_formdata(fields, boundary=None): 59 | """ 60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 61 | 62 | :param fields: 63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 64 | 65 | :param boundary: 66 | If not specified, then a random boundary will be generated using 67 | :func:`mimetools.choose_boundary`. 68 | """ 69 | body = BytesIO() 70 | if boundary is None: 71 | boundary = choose_boundary() 72 | 73 | for field in iter_field_objects(fields): 74 | body.write(b('--%s\r\n' % (boundary))) 75 | 76 | writer(body).write(field.render_headers()) 77 | data = field.data 78 | 79 | if isinstance(data, int): 80 | data = str(data) # Backwards compatibility 81 | 82 | if isinstance(data, six.text_type): 83 | writer(body).write(data) 84 | else: 85 | body.write(data) 86 | 87 | body.write(b'\r\n') 88 | 89 | body.write(b('--%s--\r\n' % (boundary))) 90 | 91 | content_type = str('multipart/form-data; boundary=%s' % boundary) 92 | 93 | return body.getvalue(), content_type 94 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Python 3.2+ 3 | from ssl import CertificateError, match_hostname 4 | except ImportError: 5 | try: 6 | # Backport of the function from a pypi module 7 | from backports.ssl_match_hostname import CertificateError, match_hostname 8 | except ImportError: 9 | # Our vendored copy 10 | from ._implementation import CertificateError, match_hostname 11 | 12 | # Not needed, but documenting what we provide. 13 | __all__ = ('CertificateError', 'match_hostname') 14 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | 8 | __version__ = '3.4.0.2' 9 | 10 | class CertificateError(ValueError): 11 | pass 12 | 13 | 14 | def _dnsname_match(dn, hostname, max_wildcards=1): 15 | """Matching according to RFC 6125, section 6.4.3 16 | 17 | http://tools.ietf.org/html/rfc6125#section-6.4.3 18 | """ 19 | pats = [] 20 | if not dn: 21 | return False 22 | 23 | # Ported from python3-syntax: 24 | # leftmost, *remainder = dn.split(r'.') 25 | parts = dn.split(r'.') 26 | leftmost = parts[0] 27 | remainder = parts[1:] 28 | 29 | wildcards = leftmost.count('*') 30 | if wildcards > max_wildcards: 31 | # Issue #17980: avoid denials of service by refusing more 32 | # than one wildcard per fragment. A survey of established 33 | # policy among SSL implementations showed it to be a 34 | # reasonable choice. 35 | raise CertificateError( 36 | "too many wildcards in certificate DNS name: " + repr(dn)) 37 | 38 | # speed up common case w/o wildcards 39 | if not wildcards: 40 | return dn.lower() == hostname.lower() 41 | 42 | # RFC 6125, section 6.4.3, subitem 1. 43 | # The client SHOULD NOT attempt to match a presented identifier in which 44 | # the wildcard character comprises a label other than the left-most label. 45 | if leftmost == '*': 46 | # When '*' is a fragment by itself, it matches a non-empty dotless 47 | # fragment. 48 | pats.append('[^.]+') 49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 50 | # RFC 6125, section 6.4.3, subitem 3. 51 | # The client SHOULD NOT attempt to match a presented identifier 52 | # where the wildcard character is embedded within an A-label or 53 | # U-label of an internationalized domain name. 54 | pats.append(re.escape(leftmost)) 55 | else: 56 | # Otherwise, '*' matches any dotless string, e.g. www* 57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 58 | 59 | # add the remaining fragments, ignore any wildcards 60 | for frag in remainder: 61 | pats.append(re.escape(frag)) 62 | 63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 64 | return pat.match(hostname) 65 | 66 | 67 | def match_hostname(cert, hostname): 68 | """Verify that *cert* (in decoded format as returned by 69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 70 | rules are followed, but IP addresses are not accepted for *hostname*. 71 | 72 | CertificateError is raised on failure. On success, the function 73 | returns nothing. 74 | """ 75 | if not cert: 76 | raise ValueError("empty or no certificate") 77 | dnsnames = [] 78 | san = cert.get('subjectAltName', ()) 79 | for key, value in san: 80 | if key == 'DNS': 81 | if _dnsname_match(value, hostname): 82 | return 83 | dnsnames.append(value) 84 | if not dnsnames: 85 | # The subject is only checked when there is no dNSName entry 86 | # in subjectAltName 87 | for sub in cert.get('subject', ()): 88 | for key, value in sub: 89 | # XXX according to RFC 2818, the most specific Common Name 90 | # must be used. 91 | if key == 'commonName': 92 | if _dnsname_match(value, hostname): 93 | return 94 | dnsnames.append(value) 95 | if len(dnsnames) > 1: 96 | raise CertificateError("hostname %r " 97 | "doesn't match either of %s" 98 | % (hostname, ', '.join(map(repr, dnsnames)))) 99 | elif len(dnsnames) == 1: 100 | raise CertificateError("hostname %r " 101 | "doesn't match %r" 102 | % (hostname, dnsnames[0])) 103 | else: 104 | raise CertificateError("no appropriate commonName or " 105 | "subjectAltName fields were found") 106 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/request.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.parse import urlencode 3 | except ImportError: 4 | from urllib import urlencode 5 | 6 | from .filepost import encode_multipart_formdata 7 | 8 | 9 | __all__ = ['RequestMethods'] 10 | 11 | 12 | class RequestMethods(object): 13 | """ 14 | Convenience mixin for classes who implement a :meth:`urlopen` method, such 15 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and 16 | :class:`~urllib3.poolmanager.PoolManager`. 17 | 18 | Provides behavior for making common types of HTTP request methods and 19 | decides which type of request field encoding to use. 20 | 21 | Specifically, 22 | 23 | :meth:`.request_encode_url` is for sending requests whose fields are 24 | encoded in the URL (such as GET, HEAD, DELETE). 25 | 26 | :meth:`.request_encode_body` is for sending requests whose fields are 27 | encoded in the *body* of the request using multipart or www-form-urlencoded 28 | (such as for POST, PUT, PATCH). 29 | 30 | :meth:`.request` is for making any kind of request, it will look up the 31 | appropriate encoding format and use one of the above two methods to make 32 | the request. 33 | 34 | Initializer parameters: 35 | 36 | :param headers: 37 | Headers to include with all requests, unless other headers are given 38 | explicitly. 39 | """ 40 | 41 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) 42 | 43 | def __init__(self, headers=None): 44 | self.headers = headers or {} 45 | 46 | def urlopen(self, method, url, body=None, headers=None, 47 | encode_multipart=True, multipart_boundary=None, 48 | **kw): # Abstract 49 | raise NotImplemented("Classes extending RequestMethods must implement " 50 | "their own ``urlopen`` method.") 51 | 52 | def request(self, method, url, fields=None, headers=None, **urlopen_kw): 53 | """ 54 | Make a request using :meth:`urlopen` with the appropriate encoding of 55 | ``fields`` based on the ``method`` used. 56 | 57 | This is a convenience method that requires the least amount of manual 58 | effort. It can be used in most situations, while still having the 59 | option to drop down to more specific methods when necessary, such as 60 | :meth:`request_encode_url`, :meth:`request_encode_body`, 61 | or even the lowest level :meth:`urlopen`. 62 | """ 63 | method = method.upper() 64 | 65 | if method in self._encode_url_methods: 66 | return self.request_encode_url(method, url, fields=fields, 67 | headers=headers, 68 | **urlopen_kw) 69 | else: 70 | return self.request_encode_body(method, url, fields=fields, 71 | headers=headers, 72 | **urlopen_kw) 73 | 74 | def request_encode_url(self, method, url, fields=None, **urlopen_kw): 75 | """ 76 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 77 | the url. This is useful for request methods like GET, HEAD, DELETE, etc. 78 | """ 79 | if fields: 80 | url += '?' + urlencode(fields) 81 | return self.urlopen(method, url, **urlopen_kw) 82 | 83 | def request_encode_body(self, method, url, fields=None, headers=None, 84 | encode_multipart=True, multipart_boundary=None, 85 | **urlopen_kw): 86 | """ 87 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 88 | the body. This is useful for request methods like POST, PUT, PATCH, etc. 89 | 90 | When ``encode_multipart=True`` (default), then 91 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode 92 | the payload with the appropriate content type. Otherwise 93 | :meth:`urllib.urlencode` is used with the 94 | 'application/x-www-form-urlencoded' content type. 95 | 96 | Multipart encoding must be used when posting files, and it's reasonably 97 | safe to use it in other times too. However, it may break request 98 | signing, such as with OAuth. 99 | 100 | Supports an optional ``fields`` parameter of key/value strings AND 101 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where 102 | the MIME type is optional. For example:: 103 | 104 | fields = { 105 | 'foo': 'bar', 106 | 'fakefile': ('foofile.txt', 'contents of foofile'), 107 | 'realfile': ('barfile.txt', open('realfile').read()), 108 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 109 | 'image/jpeg'), 110 | 'nonamefile': 'contents of nonamefile field', 111 | } 112 | 113 | When uploading a file, providing a filename (the first parameter of the 114 | tuple) is optional but recommended to best mimick behavior of browsers. 115 | 116 | Note that if ``headers`` are supplied, the 'Content-Type' header will 117 | be overwritten because it depends on the dynamic random boundary string 118 | which is used to compose the body of the request. The random boundary 119 | string can be explicitly set with the ``multipart_boundary`` parameter. 120 | """ 121 | if headers is None: 122 | headers = self.headers 123 | 124 | extra_kw = {'headers': {}} 125 | 126 | if fields: 127 | if 'body' in urlopen_kw: 128 | raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') 129 | 130 | if encode_multipart: 131 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) 132 | else: 133 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' 134 | 135 | extra_kw['body'] = body 136 | extra_kw['headers'] = {'Content-Type': content_type} 137 | 138 | extra_kw['headers'].update(headers) 139 | extra_kw.update(urlopen_kw) 140 | 141 | return self.urlopen(method, url, **extra_kw) 142 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | # For backwards compatibility, provide imports that used to be here. 2 | from .connection import is_connection_dropped 3 | from .request import make_headers 4 | from .response import is_fp_closed 5 | from .ssl_ import ( 6 | SSLContext, 7 | HAS_SNI, 8 | assert_fingerprint, 9 | resolve_cert_reqs, 10 | resolve_ssl_version, 11 | ssl_wrap_socket, 12 | ) 13 | from .timeout import ( 14 | current_time, 15 | Timeout, 16 | ) 17 | 18 | from .retry import Retry 19 | from .url import ( 20 | get_host, 21 | parse_url, 22 | split_first, 23 | Url, 24 | ) 25 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | import socket 2 | try: 3 | from select import poll, POLLIN 4 | except ImportError: # `poll` doesn't exist on OSX and other platforms 5 | poll = False 6 | try: 7 | from select import select 8 | except ImportError: # `select` doesn't exist on AppEngine. 9 | select = False 10 | 11 | 12 | def is_connection_dropped(conn): # Platform-specific 13 | """ 14 | Returns True if the connection is dropped and should be closed. 15 | 16 | :param conn: 17 | :class:`httplib.HTTPConnection` object. 18 | 19 | Note: For platforms like AppEngine, this will always return ``False`` to 20 | let the platform handle connection recycling transparently for us. 21 | """ 22 | sock = getattr(conn, 'sock', False) 23 | if sock is False: # Platform-specific: AppEngine 24 | return False 25 | if sock is None: # Connection already closed (such as by httplib). 26 | return True 27 | 28 | if not poll: 29 | if not select: # Platform-specific: AppEngine 30 | return False 31 | 32 | try: 33 | return select([sock], [], [], 0.0)[0] 34 | except socket.error: 35 | return True 36 | 37 | # This version is better on platforms that support it. 38 | p = poll() 39 | p.register(sock, POLLIN) 40 | for (fno, ev) in p.poll(0.0): 41 | if fno == sock.fileno(): 42 | # Either data is buffered (bad), or the connection is dropped. 43 | return True 44 | 45 | 46 | # This function is copied from socket.py in the Python 2.7 standard 47 | # library test suite. Added to its signature is only `socket_options`. 48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 49 | source_address=None, socket_options=None): 50 | """Connect to *address* and return the socket object. 51 | 52 | Convenience function. Connect to *address* (a 2-tuple ``(host, 53 | port)``) and return the socket object. Passing the optional 54 | *timeout* parameter will set the timeout on the socket instance 55 | before attempting to connect. If no *timeout* is supplied, the 56 | global default timeout setting returned by :func:`getdefaulttimeout` 57 | is used. If *source_address* is set it must be a tuple of (host, port) 58 | for the socket to bind as a source address before making the connection. 59 | An host of '' or port 0 tells the OS to use the default. 60 | """ 61 | 62 | host, port = address 63 | err = None 64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 65 | af, socktype, proto, canonname, sa = res 66 | sock = None 67 | try: 68 | sock = socket.socket(af, socktype, proto) 69 | 70 | # If provided, set socket level options before connecting. 71 | # This is the only addition urllib3 makes to this function. 72 | _set_socket_options(sock, socket_options) 73 | 74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 75 | sock.settimeout(timeout) 76 | if source_address: 77 | sock.bind(source_address) 78 | sock.connect(sa) 79 | return sock 80 | 81 | except socket.error as _: 82 | err = _ 83 | if sock is not None: 84 | sock.close() 85 | 86 | if err is not None: 87 | raise err 88 | else: 89 | raise socket.error("getaddrinfo returns an empty list") 90 | 91 | 92 | def _set_socket_options(sock, options): 93 | if options is None: 94 | return 95 | 96 | for opt in options: 97 | sock.setsockopt(*opt) 98 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | from ..packages.six import b 4 | 5 | ACCEPT_ENCODING = 'gzip,deflate' 6 | 7 | 8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 10 | """ 11 | Shortcuts for generating request headers. 12 | 13 | :param keep_alive: 14 | If ``True``, adds 'connection: keep-alive' header. 15 | 16 | :param accept_encoding: 17 | Can be a boolean, list, or string. 18 | ``True`` translates to 'gzip,deflate'. 19 | List will get joined by comma. 20 | String will be used as provided. 21 | 22 | :param user_agent: 23 | String representing the user-agent you want, such as 24 | "python-urllib3/0.6" 25 | 26 | :param basic_auth: 27 | Colon-separated username:password string for 'authorization: basic ...' 28 | auth header. 29 | 30 | :param proxy_basic_auth: 31 | Colon-separated username:password string for 'proxy-authorization: basic ...' 32 | auth header. 33 | 34 | :param disable_cache: 35 | If ``True``, adds 'cache-control: no-cache' header. 36 | 37 | Example:: 38 | 39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 41 | >>> make_headers(accept_encoding=True) 42 | {'accept-encoding': 'gzip,deflate'} 43 | """ 44 | headers = {} 45 | if accept_encoding: 46 | if isinstance(accept_encoding, str): 47 | pass 48 | elif isinstance(accept_encoding, list): 49 | accept_encoding = ','.join(accept_encoding) 50 | else: 51 | accept_encoding = ACCEPT_ENCODING 52 | headers['accept-encoding'] = accept_encoding 53 | 54 | if user_agent: 55 | headers['user-agent'] = user_agent 56 | 57 | if keep_alive: 58 | headers['connection'] = 'keep-alive' 59 | 60 | if basic_auth: 61 | headers['authorization'] = 'Basic ' + \ 62 | b64encode(b(basic_auth)).decode('utf-8') 63 | 64 | if proxy_basic_auth: 65 | headers['proxy-authorization'] = 'Basic ' + \ 66 | b64encode(b(proxy_basic_auth)).decode('utf-8') 67 | 68 | if disable_cache: 69 | headers['cache-control'] = 'no-cache' 70 | 71 | return headers 72 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | def is_fp_closed(obj): 2 | """ 3 | Checks whether a given file-like object is closed. 4 | 5 | :param obj: 6 | The file-like object to check. 7 | """ 8 | 9 | try: 10 | # Check via the official file-like-object way. 11 | return obj.closed 12 | except AttributeError: 13 | pass 14 | 15 | try: 16 | # Check if the object is a container for another file-like object that 17 | # gets released on exhaustion (e.g. HTTPResponse). 18 | return obj.fp is None 19 | except AttributeError: 20 | pass 21 | 22 | raise ValueError("Unable to determine whether fp is closed.") 23 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/ssl_.py: -------------------------------------------------------------------------------- 1 | from binascii import hexlify, unhexlify 2 | from hashlib import md5, sha1 3 | 4 | from ..exceptions import SSLError 5 | 6 | 7 | SSLContext = None 8 | HAS_SNI = False 9 | create_default_context = None 10 | 11 | import errno 12 | import ssl 13 | 14 | try: # Test for SSL features 15 | from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 16 | from ssl import HAS_SNI # Has SNI? 17 | except ImportError: 18 | pass 19 | 20 | 21 | try: 22 | from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION 23 | except ImportError: 24 | OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 25 | OP_NO_COMPRESSION = 0x20000 26 | 27 | try: 28 | from ssl import _DEFAULT_CIPHERS 29 | except ImportError: 30 | _DEFAULT_CIPHERS = ( 31 | 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' 32 | 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' 33 | 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' 34 | ) 35 | 36 | try: 37 | from ssl import SSLContext # Modern SSL? 38 | except ImportError: 39 | import sys 40 | 41 | class SSLContext(object): # Platform-specific: Python 2 & 3.1 42 | supports_set_ciphers = sys.version_info >= (2, 7) 43 | 44 | def __init__(self, protocol_version): 45 | self.protocol = protocol_version 46 | # Use default values from a real SSLContext 47 | self.check_hostname = False 48 | self.verify_mode = ssl.CERT_NONE 49 | self.ca_certs = None 50 | self.options = 0 51 | self.certfile = None 52 | self.keyfile = None 53 | self.ciphers = None 54 | 55 | def load_cert_chain(self, certfile, keyfile): 56 | self.certfile = certfile 57 | self.keyfile = keyfile 58 | 59 | def load_verify_locations(self, location): 60 | self.ca_certs = location 61 | 62 | def set_ciphers(self, cipher_suite): 63 | if not self.supports_set_ciphers: 64 | raise TypeError( 65 | 'Your version of Python does not support setting ' 66 | 'a custom cipher suite. Please upgrade to Python ' 67 | '2.7, 3.2, or later if you need this functionality.' 68 | ) 69 | self.ciphers = cipher_suite 70 | 71 | def wrap_socket(self, socket, server_hostname=None): 72 | kwargs = { 73 | 'keyfile': self.keyfile, 74 | 'certfile': self.certfile, 75 | 'ca_certs': self.ca_certs, 76 | 'cert_reqs': self.verify_mode, 77 | 'ssl_version': self.protocol, 78 | } 79 | if self.supports_set_ciphers: # Platform-specific: Python 2.7+ 80 | return wrap_socket(socket, ciphers=self.ciphers, **kwargs) 81 | else: # Platform-specific: Python 2.6 82 | return wrap_socket(socket, **kwargs) 83 | 84 | 85 | def assert_fingerprint(cert, fingerprint): 86 | """ 87 | Checks if given fingerprint matches the supplied certificate. 88 | 89 | :param cert: 90 | Certificate as bytes object. 91 | :param fingerprint: 92 | Fingerprint as string of hexdigits, can be interspersed by colons. 93 | """ 94 | 95 | # Maps the length of a digest to a possible hash function producing 96 | # this digest. 97 | hashfunc_map = { 98 | 16: md5, 99 | 20: sha1 100 | } 101 | 102 | fingerprint = fingerprint.replace(':', '').lower() 103 | digest_length, odd = divmod(len(fingerprint), 2) 104 | 105 | if odd or digest_length not in hashfunc_map: 106 | raise SSLError('Fingerprint is of invalid length.') 107 | 108 | # We need encode() here for py32; works on py2 and p33. 109 | fingerprint_bytes = unhexlify(fingerprint.encode()) 110 | 111 | hashfunc = hashfunc_map[digest_length] 112 | 113 | cert_digest = hashfunc(cert).digest() 114 | 115 | if not cert_digest == fingerprint_bytes: 116 | raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' 117 | .format(hexlify(fingerprint_bytes), 118 | hexlify(cert_digest))) 119 | 120 | 121 | def resolve_cert_reqs(candidate): 122 | """ 123 | Resolves the argument to a numeric constant, which can be passed to 124 | the wrap_socket function/method from the ssl module. 125 | Defaults to :data:`ssl.CERT_NONE`. 126 | If given a string it is assumed to be the name of the constant in the 127 | :mod:`ssl` module or its abbrevation. 128 | (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. 129 | If it's neither `None` nor a string we assume it is already the numeric 130 | constant which can directly be passed to wrap_socket. 131 | """ 132 | if candidate is None: 133 | return CERT_NONE 134 | 135 | if isinstance(candidate, str): 136 | res = getattr(ssl, candidate, None) 137 | if res is None: 138 | res = getattr(ssl, 'CERT_' + candidate) 139 | return res 140 | 141 | return candidate 142 | 143 | 144 | def resolve_ssl_version(candidate): 145 | """ 146 | like resolve_cert_reqs 147 | """ 148 | if candidate is None: 149 | return PROTOCOL_SSLv23 150 | 151 | if isinstance(candidate, str): 152 | res = getattr(ssl, candidate, None) 153 | if res is None: 154 | res = getattr(ssl, 'PROTOCOL_' + candidate) 155 | return res 156 | 157 | return candidate 158 | 159 | 160 | def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, 161 | options=None, ciphers=None): 162 | """All arguments have the same meaning as ``ssl_wrap_socket``. 163 | 164 | By default, this function does a lot of the same work that 165 | ``ssl.create_default_context`` does on Python 3.4+. It: 166 | 167 | - Disables SSLv2, SSLv3, and compression 168 | - Sets a restricted set of server ciphers 169 | 170 | If you wish to enable SSLv3, you can do:: 171 | 172 | from urllib3.util import ssl_ 173 | context = ssl_.create_urllib3_context() 174 | context.options &= ~ssl_.OP_NO_SSLv3 175 | 176 | You can do the same to enable compression (substituting ``COMPRESSION`` 177 | for ``SSLv3`` in the last line above). 178 | 179 | :param ssl_version: 180 | The desired protocol version to use. This will default to 181 | PROTOCOL_SSLv23 which will negotiate the highest protocol that both 182 | the server and your installation of OpenSSL support. 183 | :param cert_reqs: 184 | Whether to require the certificate verification. This defaults to 185 | ``ssl.CERT_REQUIRED``. 186 | :param options: 187 | Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, 188 | ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. 189 | :param ciphers: 190 | Which cipher suites to allow the server to select. 191 | :returns: 192 | Constructed SSLContext object with specified options 193 | :rtype: SSLContext 194 | """ 195 | context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) 196 | 197 | if options is None: 198 | options = 0 199 | # SSLv2 is easily broken and is considered harmful and dangerous 200 | options |= OP_NO_SSLv2 201 | # SSLv3 has several problems and is now dangerous 202 | options |= OP_NO_SSLv3 203 | # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ 204 | # (issue #309) 205 | options |= OP_NO_COMPRESSION 206 | 207 | context.options |= options 208 | 209 | if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 210 | context.set_ciphers(ciphers or _DEFAULT_CIPHERS) 211 | 212 | context.verify_mode = cert_reqs 213 | if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 214 | context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) 215 | return context 216 | 217 | 218 | def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, 219 | ca_certs=None, server_hostname=None, 220 | ssl_version=None, ciphers=None, ssl_context=None): 221 | """ 222 | All arguments except for server_hostname and ssl_context have the same 223 | meaning as they do when using :func:`ssl.wrap_socket`. 224 | 225 | :param server_hostname: 226 | When SNI is supported, the expected hostname of the certificate 227 | :param ssl_context: 228 | A pre-made :class:`SSLContext` object. If none is provided, one will 229 | be created using :func:`create_urllib3_context`. 230 | :param ciphers: 231 | A string of ciphers we wish the client to support. This is not 232 | supported on Python 2.6 as the ssl module does not support it. 233 | """ 234 | context = ssl_context 235 | if context is None: 236 | context = create_urllib3_context(ssl_version, cert_reqs, 237 | ciphers=ciphers) 238 | 239 | if ca_certs: 240 | try: 241 | context.load_verify_locations(ca_certs) 242 | except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 243 | raise SSLError(e) 244 | # Py33 raises FileNotFoundError which subclasses OSError 245 | # These are not equivalent unless we check the errno attribute 246 | except OSError as e: # Platform-specific: Python 3.3 and beyond 247 | if e.errno == errno.ENOENT: 248 | raise SSLError(e) 249 | raise 250 | if certfile: 251 | context.load_cert_chain(certfile, keyfile) 252 | if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI 253 | return context.wrap_socket(sock, server_hostname=server_hostname) 254 | return context.wrap_socket(sock) 255 | -------------------------------------------------------------------------------- /libs/requests/packages/urllib3/util/url.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from ..exceptions import LocationParseError 4 | 5 | 6 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] 7 | 8 | 9 | class Url(namedtuple('Url', url_attrs)): 10 | """ 11 | Datastructure for representing an HTTP URL. Used as a return value for 12 | :func:`parse_url`. 13 | """ 14 | slots = () 15 | 16 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, 17 | query=None, fragment=None): 18 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path, 19 | query, fragment) 20 | 21 | @property 22 | def hostname(self): 23 | """For backwards-compatibility with urlparse. We're nice like that.""" 24 | return self.host 25 | 26 | @property 27 | def request_uri(self): 28 | """Absolute path including the query string.""" 29 | uri = self.path or '/' 30 | 31 | if self.query is not None: 32 | uri += '?' + self.query 33 | 34 | return uri 35 | 36 | @property 37 | def netloc(self): 38 | """Network location including host and port""" 39 | if self.port: 40 | return '%s:%d' % (self.host, self.port) 41 | return self.host 42 | 43 | @property 44 | def url(self): 45 | """ 46 | Convert self into a url 47 | 48 | This function should more or less round-trip with :func:`.parse_url`. The 49 | returned url may not be exactly the same as the url inputted to 50 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls 51 | with a blank port will have : removed). 52 | 53 | Example: :: 54 | 55 | >>> U = parse_url('http://google.com/mail/') 56 | >>> U.url 57 | 'http://google.com/mail/' 58 | >>> Url('http', 'username:password', 'host.com', 80, 59 | ... '/path', 'query', 'fragment').url 60 | 'http://username:password@host.com:80/path?query#fragment' 61 | """ 62 | scheme, auth, host, port, path, query, fragment = self 63 | url = '' 64 | 65 | # We use "is not None" we want things to happen with empty strings (or 0 port) 66 | if scheme is not None: 67 | url += scheme + '://' 68 | if auth is not None: 69 | url += auth + '@' 70 | if host is not None: 71 | url += host 72 | if port is not None: 73 | url += ':' + str(port) 74 | if path is not None: 75 | url += path 76 | if query is not None: 77 | url += '?' + query 78 | if fragment is not None: 79 | url += '#' + fragment 80 | 81 | return url 82 | 83 | def __str__(self): 84 | return self.url 85 | 86 | def split_first(s, delims): 87 | """ 88 | Given a string and an iterable of delimiters, split on the first found 89 | delimiter. Return two split parts and the matched delimiter. 90 | 91 | If not found, then the first part is the full input string. 92 | 93 | Example:: 94 | 95 | >>> split_first('foo/bar?baz', '?/=') 96 | ('foo', 'bar?baz', '/') 97 | >>> split_first('foo/bar?baz', '123') 98 | ('foo/bar?baz', '', None) 99 | 100 | Scales linearly with number of delims. Not ideal for large number of delims. 101 | """ 102 | min_idx = None 103 | min_delim = None 104 | for d in delims: 105 | idx = s.find(d) 106 | if idx < 0: 107 | continue 108 | 109 | if min_idx is None or idx < min_idx: 110 | min_idx = idx 111 | min_delim = d 112 | 113 | if min_idx is None or min_idx < 0: 114 | return s, '', None 115 | 116 | return s[:min_idx], s[min_idx+1:], min_delim 117 | 118 | 119 | def parse_url(url): 120 | """ 121 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is 122 | performed to parse incomplete urls. Fields not provided will be None. 123 | 124 | Partly backwards-compatible with :mod:`urlparse`. 125 | 126 | Example:: 127 | 128 | >>> parse_url('http://google.com/mail/') 129 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...) 130 | >>> parse_url('google.com:80') 131 | Url(scheme=None, host='google.com', port=80, path=None, ...) 132 | >>> parse_url('/foo?bar') 133 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) 134 | """ 135 | 136 | # While this code has overlap with stdlib's urlparse, it is much 137 | # simplified for our needs and less annoying. 138 | # Additionally, this implementations does silly things to be optimal 139 | # on CPython. 140 | 141 | if not url: 142 | # Empty 143 | return Url() 144 | 145 | scheme = None 146 | auth = None 147 | host = None 148 | port = None 149 | path = None 150 | fragment = None 151 | query = None 152 | 153 | # Scheme 154 | if '://' in url: 155 | scheme, url = url.split('://', 1) 156 | 157 | # Find the earliest Authority Terminator 158 | # (http://tools.ietf.org/html/rfc3986#section-3.2) 159 | url, path_, delim = split_first(url, ['/', '?', '#']) 160 | 161 | if delim: 162 | # Reassemble the path 163 | path = delim + path_ 164 | 165 | # Auth 166 | if '@' in url: 167 | # Last '@' denotes end of auth part 168 | auth, url = url.rsplit('@', 1) 169 | 170 | # IPv6 171 | if url and url[0] == '[': 172 | host, url = url.split(']', 1) 173 | host += ']' 174 | 175 | # Port 176 | if ':' in url: 177 | _host, port = url.split(':', 1) 178 | 179 | if not host: 180 | host = _host 181 | 182 | if port: 183 | # If given, ports must be integers. 184 | if not port.isdigit(): 185 | raise LocationParseError(url) 186 | port = int(port) 187 | else: 188 | # Blank ports are cool, too. (rfc3986#section-3.2.3) 189 | port = None 190 | 191 | elif not host and url: 192 | host = url 193 | 194 | if not path: 195 | return Url(scheme, auth, host, port, path, query, fragment) 196 | 197 | # Fragment 198 | if '#' in path: 199 | path, fragment = path.split('#', 1) 200 | 201 | # Query 202 | if '?' in path: 203 | path, query = path.split('?', 1) 204 | 205 | return Url(scheme, auth, host, port, path, query, fragment) 206 | 207 | def get_host(url): 208 | """ 209 | Deprecated. Use :func:`.parse_url` instead. 210 | """ 211 | p = parse_url(url) 212 | return p.scheme or 'http', p.hostname, p.port 213 | -------------------------------------------------------------------------------- /libs/requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('permanent_redirect', 34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 35 | 36 | # Client Error. 37 | 400: ('bad_request', 'bad'), 38 | 401: ('unauthorized',), 39 | 402: ('payment_required', 'payment'), 40 | 403: ('forbidden',), 41 | 404: ('not_found', '-o-'), 42 | 405: ('method_not_allowed', 'not_allowed'), 43 | 406: ('not_acceptable',), 44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 45 | 408: ('request_timeout', 'timeout'), 46 | 409: ('conflict',), 47 | 410: ('gone',), 48 | 411: ('length_required',), 49 | 412: ('precondition_failed', 'precondition'), 50 | 413: ('request_entity_too_large',), 51 | 414: ('request_uri_too_large',), 52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 54 | 417: ('expectation_failed',), 55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 56 | 422: ('unprocessable_entity', 'unprocessable'), 57 | 423: ('locked',), 58 | 424: ('failed_dependency', 'dependency'), 59 | 425: ('unordered_collection', 'unordered'), 60 | 426: ('upgrade_required', 'upgrade'), 61 | 428: ('precondition_required', 'precondition'), 62 | 429: ('too_many_requests', 'too_many'), 63 | 431: ('header_fields_too_large', 'fields_too_large'), 64 | 444: ('no_response', 'none'), 65 | 449: ('retry_with', 'retry'), 66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 68 | 499: ('client_closed_request',), 69 | 70 | # Server Error. 71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 72 | 501: ('not_implemented',), 73 | 502: ('bad_gateway',), 74 | 503: ('service_unavailable', 'unavailable'), 75 | 504: ('gateway_timeout',), 76 | 505: ('http_version_not_supported', 'http_version'), 77 | 506: ('variant_also_negotiates',), 78 | 507: ('insufficient_storage',), 79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 80 | 510: ('not_extended',), 81 | } 82 | 83 | codes = LookupDict(name='status_codes') 84 | 85 | for (code, titles) in list(_codes.items()): 86 | for title in titles: 87 | setattr(codes, title, code) 88 | if not title.startswith('\\'): 89 | setattr(codes, title.upper(), code) 90 | -------------------------------------------------------------------------------- /libs/requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | 9 | """ 10 | 11 | import collections 12 | 13 | 14 | class CaseInsensitiveDict(collections.MutableMapping): 15 | """ 16 | A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | 41 | """ 42 | def __init__(self, data=None, **kwargs): 43 | self._store = dict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, collections.Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | class LookupDict(dict): 89 | """Dictionary lookup object.""" 90 | 91 | def __init__(self, name=None): 92 | self.name = name 93 | super(LookupDict, self).__init__() 94 | 95 | def __repr__(self): 96 | return '' % (self.name) 97 | 98 | def __getitem__(self, key): 99 | # We allow fall-through here, so values default to None 100 | 101 | return self.__dict__.get(key, None) 102 | 103 | def get(self, key, default=None): 104 | return self.__dict__.get(key, default) 105 | -------------------------------------------------------------------------------- /libs/utils/FileUtils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import os.path 5 | 6 | class File(object): 7 | 8 | def __init__(self, *pathComponents): 9 | self._path = FileUtils.buildPath(*pathComponents) 10 | self.content = None 11 | 12 | @property 13 | def path(self): 14 | return self._path 15 | 16 | @path.setter 17 | def path(self, value): 18 | raise NotImplemented 19 | 20 | def isValid(self): 21 | return FileUtils.isFile(self.path) 22 | 23 | def exists(self): 24 | return FileUtils.exists(self.path) 25 | 26 | def canRead(self): 27 | return FileUtils.canRead(self.path) 28 | 29 | def canWrite(self): 30 | return FileUtils.canWrite(self.path) 31 | 32 | def read(self): 33 | return FileUtils.read(self.path) 34 | 35 | def update(self): 36 | self.content = self.read() 37 | 38 | def content(self): 39 | if not self.content: 40 | self.content = FileUtils.read() 41 | return self.content() 42 | 43 | def getLines(self): 44 | for line in FileUtils.getLines(self.path): 45 | yield line 46 | 47 | def __cmp__(self, other): 48 | if not isinstance(other, File): 49 | raise NotImplemented 50 | return cmp(self.content(), other.content()) 51 | 52 | def __enter__(self): 53 | return self 54 | 55 | def __exit__(self, type, value, tb): 56 | pass 57 | 58 | 59 | 60 | class FileUtils(object): 61 | 62 | @staticmethod 63 | def buildPath(*pathComponents): 64 | if pathComponents: 65 | path = os.path.join(*pathComponents) 66 | else: 67 | path = '' 68 | return path 69 | 70 | @staticmethod 71 | def exists(fileName): 72 | return os.access(fileName, os.F_OK) 73 | 74 | @staticmethod 75 | def canRead(fileName): 76 | if not os.access(fileName, os.R_OK): 77 | return False 78 | try: 79 | with open(fileName): 80 | pass 81 | except IOError: 82 | return False 83 | return True 84 | 85 | @staticmethod 86 | def canWrite(fileName): 87 | return os.access(fileName, os.W_OK) 88 | 89 | @staticmethod 90 | def read(fileName): 91 | result = '' 92 | with open(fileName, 'r') as fd: 93 | for line in fd.readlines(): 94 | result += line 95 | return result 96 | 97 | @staticmethod 98 | def getLines(fileName): 99 | with open(fileName, 'r') as fd: 100 | for line in fd.readlines(): 101 | yield line.replace('\n', '') 102 | 103 | @staticmethod 104 | def isDir(fileName): 105 | return os.path.isdir(fileName) 106 | 107 | @staticmethod 108 | def isFile(fileName): 109 | return os.path.isfile(fileName) 110 | 111 | @staticmethod 112 | def createDirectory(directory): 113 | if not FileUtils.exists(directory): 114 | os.makedirs(directory) 115 | 116 | @staticmethod 117 | def sizeHuman(num): 118 | base = 1024 119 | for x in ['B ','KB','MB','GB']: 120 | if num < base and num > -base: 121 | return "%3.0f%s" % (num, x) 122 | num /= base 123 | return "%3.0f %s" % (num, 'TB') 124 | 125 | -------------------------------------------------------------------------------- /libs/utils/Queue.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | try: 4 | import threading as _threading 5 | except ImportError: 6 | import dummy_threading as _threading 7 | from collections import deque 8 | import heapq 9 | 10 | class Queue(object): 11 | def __init__(self): 12 | self._init() 13 | self.mutex = _threading.Lock() 14 | self.not_empty = _threading.Event() 15 | 16 | def qsize(self): 17 | """Return the approximate size of the queue (not reliable!).""" 18 | try: 19 | self.mutex.acquire() 20 | n = self._qsize() 21 | return n 22 | finally: 23 | try: 24 | self.mutex.release() 25 | except: 26 | pass 27 | 28 | def empty(self): 29 | """Return True if the queue is empty, False otherwise (not reliable!).""" 30 | try: 31 | self.mutex.acquire() 32 | n = not self._qsize() 33 | return n 34 | finally: 35 | try: 36 | self.mutex.release() 37 | except: 38 | pass 39 | 40 | def put(self, item): 41 | try: 42 | self.mutex.acquire() 43 | if not self.not_empty.isSet(): 44 | self.not_empty.set() 45 | self._put(item) 46 | 47 | finally: 48 | try: 49 | self.mutex.release() 50 | except: 51 | pass 52 | 53 | def get(self): 54 | try: 55 | self.mutex.acquire() 56 | while not self.not_empty.isSet(): 57 | self.mutex.release() 58 | self.not_empty.wait() 59 | self.mutex.acquire() 60 | item = self._get() 61 | if self._qsize() == 0: 62 | self.not_empty.clear() 63 | return item 64 | finally: 65 | try: 66 | self.mutex.release() 67 | except: 68 | pass 69 | 70 | def _init(self): 71 | self.queue = deque() 72 | 73 | def _qsize(self, len=len): 74 | return len(self.queue) 75 | 76 | # Put a new item in the queue 77 | def _put(self, item): 78 | self.queue.append(item) 79 | 80 | # Get an item from the queue 81 | def _get(self): 82 | return self.queue.popleft() 83 | 84 | -------------------------------------------------------------------------------- /libs/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ring04h/weakfilescan/b1a3066e3fdcd60b8ecf635526f49cb5ad603064/libs/utils/__init__.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | beautifulsoup4 3 | -------------------------------------------------------------------------------- /wyspider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # email: ringzero@0x557.org 4 | # http://github.com/ring04h/weakfilescan 5 | 6 | """ 7 | weakfilescan 8 | userage: python wyspider.py http://wuyun.org 9 | """ 10 | 11 | import sys 12 | import libs.requests as requests 13 | from controller import * 14 | 15 | if __name__ == "__main__": 16 | if len(sys.argv) == 3: 17 | print json.dumps(start_wyspider(sys.argv[1]), indent=2) 18 | sys.exit(0) 19 | elif len(sys.argv) == 2: 20 | print json.dumps(start_wyspider(sys.argv[1]),indent=2) 21 | sys.exit(0) 22 | else: 23 | print ("usage: %s http://wuyun.org php" % sys.argv[0]) 24 | sys.exit(-1) 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | --------------------------------------------------------------------------------