├── README.md ├── pytorctl ├── GeoIPSupport.py ├── LICENSE ├── PathSupport.py ├── README ├── SQLSupport.py ├── ScanSupport.py ├── StatsSupport.py ├── TorCtl.py ├── TorUtil.py ├── __init__.py └── example.py ├── requests ├── __init__.py ├── adapters.py ├── api.py ├── auth.py ├── cacert.pem ├── certs.py ├── compat.py ├── cookies.py ├── exceptions.py ├── hooks.py ├── models.py ├── packages │ ├── __init__.py │ ├── chardet │ │ ├── __init__.py │ │ ├── big5freq.py │ │ ├── big5prober.py │ │ ├── chardetect.py │ │ ├── chardistribution.py │ │ ├── charsetgroupprober.py │ │ ├── charsetprober.py │ │ ├── codingstatemachine.py │ │ ├── compat.py │ │ ├── constants.py │ │ ├── cp949prober.py │ │ ├── escprober.py │ │ ├── escsm.py │ │ ├── eucjpprober.py │ │ ├── euckrfreq.py │ │ ├── euckrprober.py │ │ ├── euctwfreq.py │ │ ├── euctwprober.py │ │ ├── gb2312freq.py │ │ ├── gb2312prober.py │ │ ├── hebrewprober.py │ │ ├── jisfreq.py │ │ ├── jpcntx.py │ │ ├── langbulgarianmodel.py │ │ ├── langcyrillicmodel.py │ │ ├── langgreekmodel.py │ │ ├── langhebrewmodel.py │ │ ├── langhungarianmodel.py │ │ ├── langthaimodel.py │ │ ├── latin1prober.py │ │ ├── mbcharsetprober.py │ │ ├── mbcsgroupprober.py │ │ ├── mbcssm.py │ │ ├── sbcharsetprober.py │ │ ├── sbcsgroupprober.py │ │ ├── sjisprober.py │ │ ├── universaldetector.py │ │ └── utf8prober.py │ └── urllib3 │ │ ├── __init__.py │ │ ├── _collections.py │ │ ├── connection.py │ │ ├── connectionpool.py │ │ ├── contrib │ │ ├── __init__.py │ │ ├── ntlmpool.py │ │ └── pyopenssl.py │ │ ├── exceptions.py │ │ ├── fields.py │ │ ├── filepost.py │ │ ├── packages │ │ ├── __init__.py │ │ ├── ordered_dict.py │ │ ├── six.py │ │ └── ssl_match_hostname │ │ │ ├── __init__.py │ │ │ └── _implementation.py │ │ ├── poolmanager.py │ │ ├── request.py │ │ ├── response.py │ │ └── util │ │ ├── __init__.py │ │ ├── connection.py │ │ ├── request.py │ │ ├── response.py │ │ ├── retry.py │ │ ├── ssl_.py │ │ ├── timeout.py │ │ └── url.py ├── sessions.py ├── status_codes.py ├── structures.py └── utils.py └── wytorproxy.py /README.md: -------------------------------------------------------------------------------- 1 | # wytorproxy 2 | wyscan tor proxy lib & help doc
3 | (当你需要更多的IP，防止服务被屏蔽，写一个tor的控制脚本，就能每分钟刷新一下tor洋葱池的IP) 4 | 5 | BUG反馈 6 | ----------------------------------- 7 | > 微博：http://weibo.com/ringzero
8 | > 邮箱：ringzero@0x557.org
9 | 10 | #### 使用实例 11 | > [root@10-8-11-221 wytorproxy]# python wytorproxy.py
12 | > {"code":0,"data":{"country":"美国","country_id":"US","area":"","area_id":"","region":"","region_id":"","city":"","city_id":"","county":"","county_id":"","isp":"","isp_id":"","ip":"96.47.226.20"}} 13 | 14 | 安装配置tor & privoxy 15 | ----------------------------------- 16 | ### 安装tor 17 | yum -y install libevent libevent-devel libpcap-devel openssl-devel 18 | wget http://tor.hermetix.org/dist/tor-0.2.1.25.tar.gz 19 | tar zvxf tor-0.2.1.25.tar.gz 20 | cd tor-0.2.1.25 21 | ./configure & make & make install 22 | /* 后台启动 */ 23 | nohup tor & 24 | 25 | ### 安装 privoxy 并配置与tor连接 26 | yum -y install privoxy 27 | /* 配置privoxy连接tor路由 */ 28 | echo 'forward-socks5 / 127.0.0.1:9050 .' >> /etc/privoxy/config 29 | /* 启动privoxy服务 */ 30 | service privoxy start 31 | 32 | 使用TOR代理 33 | ----------------------------------- 34 | > 127.0.0.1:8118 35 | > 此时127.0.0.1的8118端口就接入tor的网络，享用匿名IP服务了 36 | 37 | ### 使用iptables做NAT转换，映射到外网IP上供更多的服务器使用 38 | (如果你想在外网使用的话，下面是实现方法) 39 | 40 | sed -i '/net.ipv4.ip_forward/ s/\(.*= \).*/\11/' /etc/sysctl.conf 41 | sysctl -p 42 | iptables -t nat -A PREROUTING -p tcp -i eth0 --dport 8778 -j DNAT --to 127.0.0.1:8118 43 | iptables -t nat -A POSTROUTING -j MASQUERADE 44 | service iptables save 45 | service iptables restart 46 | -------------------------------------------------------------------------------- /pytorctl/GeoIPSupport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2007 Johannes Renner and Mike Perry. See LICENSE file. 3 | 4 | import struct 5 | import socket 6 | import TorCtl 7 | import StatsSupport 8 | 9 | from TorUtil import plog 10 | try: 11 | import GeoIP 12 | # GeoIP data object: choose database here 13 | geoip = GeoIP.new(GeoIP.GEOIP_STANDARD) 14 | #geoip = GeoIP.open("./GeoLiteCity.dat", GeoIP.GEOIP_STANDARD) 15 | except: 16 | plog("NOTICE", "No GeoIP library. GeoIPSupport.py will not work correctly") 17 | # XXX: How do we bail entirely.. 18 | 19 | 20 | class Continent: 21 | """ Continent class: The group attribute is to partition the continents 22 | in groups, to determine the number of ocean crossings """ 23 | def __init__(self, continent_code): 24 | self.code = continent_code 25 | self.group = None 26 | self.countries = [] 27 | 28 | def contains(self, country_code): 29 | return country_code in self.countries 30 | 31 | # Set countries to continents 32 | africa = Continent("AF") 33 | africa.group = 1 34 | africa.countries = ["AO","BF","BI","BJ","BV","BW","CD","CF","CG","CI","CM", 35 | "CV","DJ","DZ","EG","EH","ER","ET","GA","GH","GM","GN","GQ","GW","HM","KE", 36 | "KM","LR","LS","LY","MA","MG","ML","MR","MU","MW","MZ","NA","NE","NG","RE", 37 | "RW","SC","SD","SH","SL","SN","SO","ST","SZ","TD","TF","TG","TN","TZ","UG", 38 | "YT","ZA","ZM","ZR","ZW"] 39 | 40 | asia = Continent("AS") 41 | asia.group = 1 42 | asia.countries = ["AP","AE","AF","AM","AZ","BD","BH","BN","BT","CC","CN","CX", 43 | "CY","GE","HK","ID","IL","IN","IO","IQ","IR","JO","JP","KG","KH","KP","KR", 44 | "KW","KZ","LA","LB","LK","MM","MN","MO","MV","MY","NP","OM","PH","PK","PS", 45 | "QA","RU","SA","SG","SY","TH","TJ","TM","TP","TR","TW","UZ","VN","YE"] 46 | 47 | europe = Continent("EU") 48 | europe.group = 1 49 | europe.countries = ["EU","AD","AL","AT","BA","BE","BG","BY","CH","CZ","DE", 50 | "DK","EE","ES","FI","FO","FR","FX","GB","GI","GR","HR","HU","IE","IS","IT", 51 | "LI","LT","LU","LV","MC","MD","MK","MT","NL","NO","PL","PT","RO","SE","SI", 52 | "SJ","SK","SM","UA","VA","YU"] 53 | 54 | oceania = Continent("OC") 55 | oceania.group = 2 56 | oceania.countries = ["AS","AU","CK","FJ","FM","GU","KI","MH","MP","NC","NF", 57 | "NR","NU","NZ","PF","PG","PN","PW","SB","TK","TO","TV","UM","VU","WF","WS"] 58 | 59 | north_america = Continent("NA") 60 | north_america.group = 0 61 | north_america.countries = ["CA","MX","US"] 62 | 63 | south_america = Continent("SA") 64 | south_america.group = 0 65 | south_america.countries = ["AG","AI","AN","AR","AW","BB","BM","BO","BR","BS", 66 | "BZ","CL","CO","CR","CU","DM","DO","EC","FK","GD","GF","GL","GP","GS","GT", 67 | "GY","HN","HT","JM","KN","KY","LC","MQ","MS","NI","PA","PE","PM","PR","PY", 68 | "SA","SR","SV","TC","TT","UY","VC","VE","VG","VI"] 69 | 70 | # List of continents 71 | continents = [africa, asia, europe, north_america, oceania, south_america] 72 | 73 | def get_continent(country_code): 74 | """ Perform country -- continent mapping """ 75 | for c in continents: 76 | if c.contains(country_code): 77 | return c 78 | plog("INFO", country_code + " is not on any continent") 79 | return None 80 | 81 | def get_country(ip): 82 | """ Get the country via the library """ 83 | return geoip.country_code_by_addr(ip) 84 | 85 | def get_country_from_record(ip): 86 | """ Get the country code out of a GeoLiteCity record (not used) """ 87 | record = geoip.record_by_addr(ip) 88 | if record != None: 89 | return record['country_code'] 90 | 91 | class GeoIPRouter(TorCtl.Router): 92 | # TODO: Its really shitty that this has to be a TorCtl.Router 93 | # and can't be a StatsRouter.. 94 | """ Router class extended to GeoIP """ 95 | def __init__(self, router): 96 | self.__dict__ = router.__dict__ 97 | self.country_code = get_country(self.get_ip_dotted()) 98 | if self.country_code != None: 99 | c = get_continent(self.country_code) 100 | if c != None: 101 | self.continent = c.code 102 | self.cont_group = c.group 103 | else: 104 | plog("INFO", self.nickname + ": Country code not found") 105 | self.continent = None 106 | 107 | def get_ip_dotted(self): 108 | """ Convert long int back to dotted quad string """ 109 | return socket.inet_ntoa(struct.pack('>I', self.ip)) 110 | 111 | class GeoIPConfig: 112 | """ Class to configure GeoIP-based path building """ 113 | def __init__(self, unique_countries=None, continent_crossings=4, 114 | ocean_crossings=None, entry_country=None, middle_country=None, 115 | exit_country=None, excludes=None): 116 | # TODO: Somehow ensure validity of a configuration: 117 | # - continent_crossings >= ocean_crossings 118 | # - unique_countries=False --> continent_crossings!=None 119 | # - echelon? set entry_country to source and exit_country to None 120 | 121 | # Do not use a country twice in a route 122 | # [True --> unique, False --> same or None --> pass] 123 | self.unique_countries = unique_countries 124 | 125 | # Configure max continent crossings in one path 126 | # [integer number 0-n or None --> ContinentJumper/UniqueContinent] 127 | self.continent_crossings = continent_crossings 128 | self.ocean_crossings = ocean_crossings 129 | 130 | # Try to find an exit node in the destination country 131 | # use exit_country as backup, if country cannot not be found 132 | self.echelon = False 133 | 134 | # Specify countries for positions [single country code or None] 135 | self.entry_country = entry_country 136 | self.middle_country = middle_country 137 | self.exit_country = exit_country 138 | 139 | # List of countries not to use in routes 140 | # [(empty) list of country codes or None] 141 | self.excludes = excludes 142 | -------------------------------------------------------------------------------- /pytorctl/LICENSE: -------------------------------------------------------------------------------- 1 | =============================================================================== 2 | The Python Tor controller code is distributed under this license: 3 | 4 | Copyright 2005, Nick Mathewson, Roger Dingledine 5 | Copyright 2007-2010, Mike Perry 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | 14 | * Redistributions in binary form must reproduce the above 15 | copyright notice, this list of conditions and the following disclaimer 16 | in the documentation and/or other materials provided with the 17 | distribution. 18 | 19 | * Neither the names of the copyright owners nor the names of its 20 | contributors may be used to endorse or promote products derived from 21 | this software without specific prior written permission. 22 | 23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 29 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 | -------------------------------------------------------------------------------- /pytorctl/README: -------------------------------------------------------------------------------- 1 | Note: TorCtl is mostly unmaintained. It serves primarily as the support 2 | library for the Bandwidth Authorities, Exit Scanner, and other projects in 3 | TorFlow. For more actively maintained python libraries, you may consider using 4 | Stem or TxTorCon. See: 5 | https://stem.torproject.org/ and https://github.com/meejah/txtorcon 6 | 7 | 8 | 9 | 10 | TorCtl Python Bindings 11 | 12 | 13 | TorCtl is a python Tor controller with extensions to support path 14 | building and various constraints on node and path selection, as well as 15 | statistics gathering. 16 | 17 | Apps can hook into the TorCtl package at whatever level they wish. 18 | 19 | The lowest level of interaction is to use the TorCtl module 20 | (TorCtl/TorCtl.py). Typically this is done by importing TorCtl.TorCtl 21 | and creating a TorCtl.Connection and extending from TorCtl.EventHandler. 22 | This class receives Tor controller events packaged into python classes 23 | from a TorCtl.Connection. 24 | 25 | The next level up is to use the TorCtl.PathSupport module. This is done 26 | by importing TorCtl.PathSupport and instantiating or extending from 27 | PathSupport.PathBuilder, which itself extends from TorCtl.EventHandler. 28 | This class handles circuit construction and stream attachment subject to 29 | policies defined by PathSupport.NodeRestrictor and 30 | PathSupport.PathRestrictor implementations. 31 | 32 | If you are interested in gathering statistics, you can instead 33 | instantiate or extend from StatsSupport.StatsHandler, which is 34 | again an event handler with hooks to record statistics on circuit 35 | creation, stream bandwidth, and circuit failure information. 36 | 37 | All of these modules are pydoced. For more detailed information than 38 | the above overview, you can do: 39 | 40 | # pydoc TorCtl 41 | # pydoc PathSupport 42 | # pydoc StatsSupport 43 | 44 | There is a minimalistic example of usage of the basic TorCtl.Connection 45 | and TorCtl.EventHandler in run_example() in TorCtl.py in this directory. 46 | Other components also have unit tests at the end of their source files. 47 | 48 | For more extensive examples of the PathSupport and StatsSupport 49 | interfaces, see the TorFlow project at git url: 50 | 51 | git clone git://git.torproject.org/git/torflow.git 52 | -------------------------------------------------------------------------------- /pytorctl/ScanSupport.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # Copyright 2009-2010 Mike Perry. See LICENSE file. 3 | import PathSupport 4 | import threading 5 | import copy 6 | import time 7 | import shutil 8 | import TorCtl 9 | 10 | from TorUtil import plog 11 | 12 | SQLSupport = None 13 | 14 | # Note: be careful writing functions for this class. Remember that 15 | # the PathBuilder has its own thread that it recieves events on 16 | # independent from your thread that calls into here. 17 | class ScanHandler(PathSupport.PathBuilder): 18 | def set_pct_rstr(self, percent_skip, percent_fast): 19 | def notlambda(sm): 20 | sm.percent_fast=percent_fast 21 | sm.percent_skip=percent_skip 22 | self.schedule_selmgr(notlambda) 23 | 24 | def reset_stats(self): 25 | def notlambda(this): 26 | this.reset() 27 | self.schedule_low_prio(notlambda) 28 | 29 | def commit(self): 30 | plog("INFO", "Scanner committing jobs...") 31 | cond = threading.Condition() 32 | def notlambda2(this): 33 | cond.acquire() 34 | this.run_all_jobs = False 35 | plog("INFO", "Commit done.") 36 | cond.notify() 37 | cond.release() 38 | 39 | def notlambda1(this): 40 | plog("INFO", "Committing jobs...") 41 | this.run_all_jobs = True 42 | self.schedule_low_prio(notlambda2) 43 | 44 | cond.acquire() 45 | self.schedule_immediate(notlambda1) 46 | 47 | cond.wait() 48 | cond.release() 49 | plog("INFO", "Scanner commit done.") 50 | 51 | def close_circuits(self): 52 | cond = threading.Condition() 53 | def notlambda(this): 54 | cond.acquire() 55 | this.close_all_circuits() 56 | cond.notify() 57 | cond.release() 58 | cond.acquire() 59 | self.schedule_low_prio(notlambda) 60 | cond.wait() 61 | cond.release() 62 | 63 | def close_streams(self, reason): 64 | cond = threading.Condition() 65 | plog("NOTICE", "Wedged Tor stream. Closing all streams") 66 | def notlambda(this): 67 | cond.acquire() 68 | this.close_all_streams(reason) 69 | cond.notify() 70 | cond.release() 71 | cond.acquire() 72 | self.schedule_low_prio(notlambda) 73 | cond.wait() 74 | cond.release() 75 | 76 | def new_exit(self): 77 | cond = threading.Condition() 78 | def notlambda(this): 79 | cond.acquire() 80 | this.new_nym = True 81 | if this.selmgr.bad_restrictions: 82 | plog("NOTICE", "Clearing bad restrictions with reconfigure..") 83 | this.selmgr.reconfigure(this.current_consensus()) 84 | lines = this.c.sendAndRecv("SIGNAL CLEARDNSCACHE\r\n") 85 | for _,msg,more in lines: 86 | plog("DEBUG", msg) 87 | cond.notify() 88 | cond.release() 89 | cond.acquire() 90 | self.schedule_low_prio(notlambda) 91 | cond.wait() 92 | cond.release() 93 | 94 | def idhex_to_r(self, idhex): 95 | cond = threading.Condition() 96 | def notlambda(this): 97 | cond.acquire() 98 | if idhex in self.routers: 99 | cond._result = self.routers[idhex] 100 | else: 101 | cond._result = None 102 | cond.notify() 103 | cond.release() 104 | cond.acquire() 105 | self.schedule_low_prio(notlambda) 106 | cond.wait() 107 | cond.release() 108 | return cond._result 109 | 110 | def name_to_idhex(self, nick): 111 | cond = threading.Condition() 112 | def notlambda(this): 113 | cond.acquire() 114 | if nick in self.name_to_key: 115 | cond._result = self.name_to_key[nick] 116 | else: 117 | cond._result = None 118 | cond.notify() 119 | cond.release() 120 | cond.acquire() 121 | self.schedule_low_prio(notlambda) 122 | cond.wait() 123 | cond.release() 124 | return cond._result 125 | 126 | def rank_to_percent(self, rank): 127 | cond = threading.Condition() 128 | def notlambda(this): 129 | cond.acquire() 130 | cond._pct = (100.0*rank)/len(this.sorted_r) # lol moar haxx 131 | cond.notify() 132 | cond.release() 133 | cond.acquire() 134 | self.schedule_low_prio(notlambda) 135 | cond.wait() 136 | cond.release() 137 | return cond._pct 138 | 139 | def percent_to_rank(self, pct): 140 | cond = threading.Condition() 141 | def notlambda(this): 142 | cond.acquire() 143 | cond._rank = int(round((pct*len(this.sorted_r))/100.0,0)) # lol moar haxx 144 | cond.notify() 145 | cond.release() 146 | cond.acquire() 147 | self.schedule_low_prio(notlambda) 148 | cond.wait() 149 | cond.release() 150 | return cond._rank 151 | 152 | def get_exit_node(self): 153 | ret = copy.copy(self.last_exit) # GIL FTW 154 | if ret: 155 | plog("DEBUG", "Got last exit of "+ret.idhex) 156 | else: 157 | plog("DEBUG", "No last exit.") 158 | return ret 159 | 160 | def set_exit_node(self, arg): 161 | cond = threading.Condition() 162 | exit_name = arg 163 | plog("DEBUG", "Got Setexit: "+exit_name) 164 | def notlambda(sm): 165 | plog("DEBUG", "Job for setexit: "+exit_name) 166 | cond.acquire() 167 | # Clear last successful exit, we're running a new test 168 | self.last_exit = None 169 | sm.set_exit(exit_name) 170 | cond.notify() 171 | cond.release() 172 | cond.acquire() 173 | self.schedule_selmgr(notlambda) 174 | cond.wait() 175 | cond.release() 176 | 177 | class SQLScanHandler(ScanHandler): 178 | def __init__(self, c, selmgr, RouterClass=TorCtl.Router, 179 | strm_selector=PathSupport.StreamSelector): 180 | # Only require sqlalchemy if we really need it. 181 | global SQLSupport 182 | if SQLSupport is None: 183 | import SQLSupport 184 | ScanHandler.__init__(self, c, selmgr, RouterClass, strm_selector) 185 | 186 | def attach_sql_listener(self, db_uri): 187 | plog("DEBUG", "Got db: "+db_uri) 188 | SQLSupport.setup_db(db_uri, echo=False, drop=True) 189 | self.sql_consensus_listener = SQLSupport.ConsensusTrackerListener() 190 | self.add_event_listener(self.sql_consensus_listener) 191 | self.add_event_listener(SQLSupport.StreamListener()) 192 | 193 | def write_sql_stats(self, rfilename=None, stats_filter=None): 194 | if not rfilename: 195 | rfilename="./data/stats/sql-"+time.strftime("20%y-%m-%d-%H:%M:%S") 196 | cond = threading.Condition() 197 | def notlambda(h): 198 | cond.acquire() 199 | SQLSupport.RouterStats.write_stats(file(rfilename, "w"), 200 | 0, 100, order_by=SQLSupport.RouterStats.sbw, 201 | recompute=True, disp_clause=stats_filter) 202 | cond.notify() 203 | cond.release() 204 | cond.acquire() 205 | self.schedule_low_prio(notlambda) 206 | cond.wait() 207 | cond.release() 208 | 209 | def write_strm_bws(self, rfilename=None, slice_num=0, stats_filter=None): 210 | if not rfilename: 211 | rfilename="./data/stats/bws-"+time.strftime("20%y-%m-%d-%H:%M:%S") 212 | cond = threading.Condition() 213 | def notlambda(this): 214 | cond.acquire() 215 | f=file(rfilename, "w") 216 | f.write("slicenum="+str(slice_num)+"\n") 217 | SQLSupport.RouterStats.write_bws(f, 0, 100, 218 | order_by=SQLSupport.RouterStats.sbw, 219 | recompute=False, disp_clause=stats_filter) 220 | f.close() 221 | cond.notify() 222 | cond.release() 223 | cond.acquire() 224 | self.schedule_low_prio(notlambda) 225 | cond.wait() 226 | cond.release() 227 | 228 | def save_sql_file(self, sql_file, new_file): 229 | cond = threading.Condition() 230 | def notlambda(this): 231 | cond.acquire() 232 | SQLSupport.tc_session.close() 233 | try: 234 | shutil.copy(sql_file, new_file) 235 | except Exception,e: 236 | plog("WARN", "Error moving sql file: "+str(e)) 237 | SQLSupport.reset_all() 238 | cond.notify() 239 | cond.release() 240 | cond.acquire() 241 | self.schedule_low_prio(notlambda) 242 | cond.wait() 243 | cond.release() 244 | 245 | def wait_for_consensus(self): 246 | cond = threading.Condition() 247 | def notlambda(this): 248 | if this.sql_consensus_listener.last_desc_at \ 249 | != SQLSupport.ConsensusTrackerListener.CONSENSUS_DONE: 250 | this.sql_consensus_listener.wait_for_signal = False 251 | plog("INFO", "Waiting on consensus result: "+str(this.run_all_jobs)) 252 | this.schedule_low_prio(notlambda) 253 | else: 254 | cond.acquire() 255 | this.sql_consensus_listener.wait_for_signal = True 256 | cond.notify() 257 | cond.release() 258 | plog("DEBUG", "Checking for consensus") 259 | cond.acquire() 260 | self.schedule_low_prio(notlambda) 261 | cond.wait() 262 | cond.release() 263 | plog("INFO", "Consensus OK") 264 | 265 | def reset_stats(self): 266 | cond = threading.Condition() 267 | def notlambda(this): 268 | cond.acquire() 269 | ScanHandler.reset_stats(self) 270 | SQLSupport.reset_all() 271 | this.sql_consensus_listener.update_consensus() 272 | this.sql_consensus_listener._update_rank_history(this.sql_consensus_listener.consensus.ns_map.iterkeys()) 273 | SQLSupport.refresh_all() 274 | cond.notify() 275 | cond.release() 276 | cond.acquire() 277 | self.schedule_low_prio(notlambda) 278 | cond.wait() 279 | cond.release() 280 | -------------------------------------------------------------------------------- /pytorctl/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | TorCtl is a python Tor controller with extensions to support path 3 | building and various constraints on node and path selection, as well as 4 | statistics gathering. 5 | 6 | Apps can hook into the TorCtl package at whatever level they wish. 7 | 8 | The lowest level of interaction is to use the TorCtl module 9 | (TorCtl/TorCtl.py). Typically this is done by importing TorCtl.TorCtl 10 | and creating a TorCtl.Connection and extending from TorCtl.EventHandler. 11 | This class receives Tor controller events packaged into python classes 12 | from a TorCtl.Connection. 13 | 14 | The next level up is to use the TorCtl.PathSupport module. This is done 15 | by importing TorCtl.PathSupport and instantiating or extending from 16 | PathSupport.PathBuilder, which itself extends from TorCtl.EventHandler. 17 | This class handles circuit construction and stream attachment subject to 18 | policies defined by PathSupport.NodeRestrictor and 19 | PathSupport.PathRestrictor implementations. 20 | 21 | If you are interested in gathering statistics, you can instead 22 | instantiate or extend from StatsSupport.StatsHandler, which is 23 | again an event handler with hooks to record statistics on circuit 24 | creation, stream bandwidth, and circuit failure information. 25 | """ 26 | 27 | __all__ = ["TorUtil", "GeoIPSupport", "PathSupport", "TorCtl", "StatsSupport", 28 | "SQLSupport", "ScanSupport"] 29 | -------------------------------------------------------------------------------- /pytorctl/example.py: -------------------------------------------------------------------------------- 1 | """ 2 | The following is a simple example of TorCtl usage. This attaches a listener 3 | that prints the amount of traffic going over tor each second. 4 | """ 5 | 6 | import time 7 | import TorCtl 8 | 9 | class BandwidthListener(TorCtl.PostEventListener): 10 | def __init__(self): 11 | TorCtl.PostEventListener.__init__(self) 12 | 13 | def bandwidth_event(self, event): 14 | print "tor read %i bytes and wrote %i bytes" % (event.read, event.written) 15 | 16 | # constructs a listener that prints BW events 17 | myListener = BandwidthListener() 18 | 19 | # initiates a TorCtl connection, returning None if it was unsuccessful 20 | conn = TorCtl.connect() 21 | 22 | if conn: 23 | # tells tor to send us BW events 24 | conn.set_events(["BW"]) 25 | 26 | # attaches the listener so it'll receive BW events 27 | conn.add_event_listener(myListener) 28 | 29 | # run until we get a keyboard interrupt 30 | try: 31 | while True: 32 | time.sleep(10) 33 | except KeyboardInterrupt: pass 34 | 35 | -------------------------------------------------------------------------------- /requests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # __ 4 | # /__) _ _ _ _ _/ _ 5 | # / ( (- (/ (/ (- _) / _) 6 | # / 7 | 8 | """ 9 | requests HTTP library 10 | ~~~~~~~~~~~~~~~~~~~~~ 11 | 12 | Requests is an HTTP library, written in Python, for human beings. Basic GET 13 | usage: 14 | 15 | >>> import requests 16 | >>> r = requests.get('https://www.python.org') 17 | >>> r.status_code 18 | 200 19 | >>> 'Python is a programming language' in r.content 20 | True 21 | 22 | ... or POST: 23 | 24 | >>> payload = dict(key1='value1', key2='value2') 25 | >>> r = requests.post('http://httpbin.org/post', data=payload) 26 | >>> print(r.text) 27 | { 28 | ... 29 | "form": { 30 | "key2": "value2", 31 | "key1": "value1" 32 | }, 33 | ... 34 | } 35 | 36 | The other HTTP methods are supported - see `requests.api`. Full documentation 37 | is at . 38 | 39 | :copyright: (c) 2014 by Kenneth Reitz. 40 | :license: Apache 2.0, see LICENSE for more details. 41 | 42 | """ 43 | 44 | __title__ = 'requests' 45 | __version__ = '2.5.1' 46 | __build__ = 0x020501 47 | __author__ = 'Kenneth Reitz' 48 | __license__ = 'Apache 2.0' 49 | __copyright__ = 'Copyright 2014 Kenneth Reitz' 50 | 51 | # Attempt to enable urllib3's SNI support, if possible 52 | try: 53 | from .packages.urllib3.contrib import pyopenssl 54 | pyopenssl.inject_into_urllib3() 55 | except ImportError: 56 | pass 57 | 58 | from . import utils 59 | from .models import Request, Response, PreparedRequest 60 | from .api import request, get, head, post, patch, put, delete, options 61 | from .sessions import session, Session 62 | from .status_codes import codes 63 | from .exceptions import ( 64 | RequestException, Timeout, URLRequired, 65 | TooManyRedirects, HTTPError, ConnectionError 66 | ) 67 | 68 | # Set default logging handler to avoid "No handler found" warnings. 69 | import logging 70 | try: # Python 2.7+ 71 | from logging import NullHandler 72 | except ImportError: 73 | class NullHandler(logging.Handler): 74 | def emit(self, record): 75 | pass 76 | 77 | logging.getLogger(__name__).addHandler(NullHandler()) 78 | -------------------------------------------------------------------------------- /requests/api.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.api 5 | ~~~~~~~~~~~~ 6 | 7 | This module implements the Requests API. 8 | 9 | :copyright: (c) 2012 by Kenneth Reitz. 10 | :license: Apache2, see LICENSE for more details. 11 | 12 | """ 13 | 14 | from . import sessions 15 | 16 | 17 | def request(method, url, **kwargs): 18 | """Constructs and sends a :class:`Request `. 19 | Returns :class:`Response ` object. 20 | 21 | :param method: method for the new :class:`Request` object. 22 | :param url: URL for the new :class:`Request` object. 23 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. 24 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 25 | :param json: (optional) json data to send in the body of the :class:`Request`. 26 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. 27 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. 28 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload. 29 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth. 30 | :param timeout: (optional) How long to wait for the server to send data 31 | before giving up, as a float, or a (`connect timeout, read timeout 32 | `_) tuple. 33 | :type timeout: float or tuple 34 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed. 35 | :type allow_redirects: bool 36 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. 37 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. 38 | :param stream: (optional) if ``False``, the response content will be immediately downloaded. 39 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. 40 | 41 | Usage:: 42 | 43 | >>> import requests 44 | >>> req = requests.request('GET', 'http://httpbin.org/get') 45 | 46 | """ 47 | 48 | session = sessions.Session() 49 | response = session.request(method=method, url=url, **kwargs) 50 | # By explicitly closing the session, we avoid leaving sockets open which 51 | # can trigger a ResourceWarning in some cases, and look like a memory leak 52 | # in others. 53 | session.close() 54 | return response 55 | 56 | 57 | def get(url, **kwargs): 58 | """Sends a GET request. Returns :class:`Response` object. 59 | 60 | :param url: URL for the new :class:`Request` object. 61 | :param \*\*kwargs: Optional arguments that ``request`` takes. 62 | """ 63 | 64 | kwargs.setdefault('allow_redirects', True) 65 | return request('get', url, **kwargs) 66 | 67 | 68 | def options(url, **kwargs): 69 | """Sends a OPTIONS request. Returns :class:`Response` object. 70 | 71 | :param url: URL for the new :class:`Request` object. 72 | :param \*\*kwargs: Optional arguments that ``request`` takes. 73 | """ 74 | 75 | kwargs.setdefault('allow_redirects', True) 76 | return request('options', url, **kwargs) 77 | 78 | 79 | def head(url, **kwargs): 80 | """Sends a HEAD request. Returns :class:`Response` object. 81 | 82 | :param url: URL for the new :class:`Request` object. 83 | :param \*\*kwargs: Optional arguments that ``request`` takes. 84 | """ 85 | 86 | kwargs.setdefault('allow_redirects', False) 87 | return request('head', url, **kwargs) 88 | 89 | 90 | def post(url, data=None, json=None, **kwargs): 91 | """Sends a POST request. Returns :class:`Response` object. 92 | 93 | :param url: URL for the new :class:`Request` object. 94 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 95 | :param json: (optional) json data to send in the body of the :class:`Request`. 96 | :param \*\*kwargs: Optional arguments that ``request`` takes. 97 | """ 98 | 99 | return request('post', url, data=data, json=json, **kwargs) 100 | 101 | 102 | def put(url, data=None, **kwargs): 103 | """Sends a PUT request. Returns :class:`Response` object. 104 | 105 | :param url: URL for the new :class:`Request` object. 106 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 107 | :param \*\*kwargs: Optional arguments that ``request`` takes. 108 | """ 109 | 110 | return request('put', url, data=data, **kwargs) 111 | 112 | 113 | def patch(url, data=None, **kwargs): 114 | """Sends a PATCH request. Returns :class:`Response` object. 115 | 116 | :param url: URL for the new :class:`Request` object. 117 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. 118 | :param \*\*kwargs: Optional arguments that ``request`` takes. 119 | """ 120 | 121 | return request('patch', url, data=data, **kwargs) 122 | 123 | 124 | def delete(url, **kwargs): 125 | """Sends a DELETE request. Returns :class:`Response` object. 126 | 127 | :param url: URL for the new :class:`Request` object. 128 | :param \*\*kwargs: Optional arguments that ``request`` takes. 129 | """ 130 | 131 | return request('delete', url, **kwargs) 132 | -------------------------------------------------------------------------------- /requests/auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.auth 5 | ~~~~~~~~~~~~~ 6 | 7 | This module contains the authentication handlers for Requests. 8 | """ 9 | 10 | import os 11 | import re 12 | import time 13 | import hashlib 14 | 15 | from base64 import b64encode 16 | 17 | from .compat import urlparse, str 18 | from .cookies import extract_cookies_to_jar 19 | from .utils import parse_dict_header, to_native_string 20 | from .status_codes import codes 21 | 22 | CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded' 23 | CONTENT_TYPE_MULTI_PART = 'multipart/form-data' 24 | 25 | 26 | def _basic_auth_str(username, password): 27 | """Returns a Basic Auth string.""" 28 | 29 | authstr = 'Basic ' + to_native_string( 30 | b64encode(('%s:%s' % (username, password)).encode('latin1')).strip() 31 | ) 32 | 33 | return authstr 34 | 35 | 36 | class AuthBase(object): 37 | """Base class that all auth implementations derive from""" 38 | 39 | def __call__(self, r): 40 | raise NotImplementedError('Auth hooks must be callable.') 41 | 42 | 43 | class HTTPBasicAuth(AuthBase): 44 | """Attaches HTTP Basic Authentication to the given Request object.""" 45 | def __init__(self, username, password): 46 | self.username = username 47 | self.password = password 48 | 49 | def __call__(self, r): 50 | r.headers['Authorization'] = _basic_auth_str(self.username, self.password) 51 | return r 52 | 53 | 54 | class HTTPProxyAuth(HTTPBasicAuth): 55 | """Attaches HTTP Proxy Authentication to a given Request object.""" 56 | def __call__(self, r): 57 | r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password) 58 | return r 59 | 60 | 61 | class HTTPDigestAuth(AuthBase): 62 | """Attaches HTTP Digest Authentication to the given Request object.""" 63 | def __init__(self, username, password): 64 | self.username = username 65 | self.password = password 66 | self.last_nonce = '' 67 | self.nonce_count = 0 68 | self.chal = {} 69 | self.pos = None 70 | self.num_401_calls = 1 71 | 72 | def build_digest_header(self, method, url): 73 | 74 | realm = self.chal['realm'] 75 | nonce = self.chal['nonce'] 76 | qop = self.chal.get('qop') 77 | algorithm = self.chal.get('algorithm') 78 | opaque = self.chal.get('opaque') 79 | 80 | if algorithm is None: 81 | _algorithm = 'MD5' 82 | else: 83 | _algorithm = algorithm.upper() 84 | # lambdas assume digest modules are imported at the top level 85 | if _algorithm == 'MD5' or _algorithm == 'MD5-SESS': 86 | def md5_utf8(x): 87 | if isinstance(x, str): 88 | x = x.encode('utf-8') 89 | return hashlib.md5(x).hexdigest() 90 | hash_utf8 = md5_utf8 91 | elif _algorithm == 'SHA': 92 | def sha_utf8(x): 93 | if isinstance(x, str): 94 | x = x.encode('utf-8') 95 | return hashlib.sha1(x).hexdigest() 96 | hash_utf8 = sha_utf8 97 | 98 | KD = lambda s, d: hash_utf8("%s:%s" % (s, d)) 99 | 100 | if hash_utf8 is None: 101 | return None 102 | 103 | # XXX not implemented yet 104 | entdig = None 105 | p_parsed = urlparse(url) 106 | path = p_parsed.path 107 | if p_parsed.query: 108 | path += '?' + p_parsed.query 109 | 110 | A1 = '%s:%s:%s' % (self.username, realm, self.password) 111 | A2 = '%s:%s' % (method, path) 112 | 113 | HA1 = hash_utf8(A1) 114 | HA2 = hash_utf8(A2) 115 | 116 | if nonce == self.last_nonce: 117 | self.nonce_count += 1 118 | else: 119 | self.nonce_count = 1 120 | ncvalue = '%08x' % self.nonce_count 121 | s = str(self.nonce_count).encode('utf-8') 122 | s += nonce.encode('utf-8') 123 | s += time.ctime().encode('utf-8') 124 | s += os.urandom(8) 125 | 126 | cnonce = (hashlib.sha1(s).hexdigest()[:16]) 127 | noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2) 128 | if _algorithm == 'MD5-SESS': 129 | HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce)) 130 | 131 | if qop is None: 132 | respdig = KD(HA1, "%s:%s" % (nonce, HA2)) 133 | elif qop == 'auth' or 'auth' in qop.split(','): 134 | respdig = KD(HA1, noncebit) 135 | else: 136 | # XXX handle auth-int. 137 | return None 138 | 139 | self.last_nonce = nonce 140 | 141 | # XXX should the partial digests be encoded too? 142 | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 143 | 'response="%s"' % (self.username, realm, nonce, path, respdig) 144 | if opaque: 145 | base += ', opaque="%s"' % opaque 146 | if algorithm: 147 | base += ', algorithm="%s"' % algorithm 148 | if entdig: 149 | base += ', digest="%s"' % entdig 150 | if qop: 151 | base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce) 152 | 153 | return 'Digest %s' % (base) 154 | 155 | def handle_redirect(self, r, **kwargs): 156 | """Reset num_401_calls counter on redirects.""" 157 | if r.is_redirect: 158 | self.num_401_calls = 1 159 | 160 | def handle_401(self, r, **kwargs): 161 | """Takes the given response and tries digest-auth, if needed.""" 162 | 163 | if self.pos is not None: 164 | # Rewind the file position indicator of the body to where 165 | # it was to resend the request. 166 | r.request.body.seek(self.pos) 167 | num_401_calls = getattr(self, 'num_401_calls', 1) 168 | s_auth = r.headers.get('www-authenticate', '') 169 | 170 | if 'digest' in s_auth.lower() and num_401_calls < 2: 171 | 172 | self.num_401_calls += 1 173 | pat = re.compile(r'digest ', flags=re.IGNORECASE) 174 | self.chal = parse_dict_header(pat.sub('', s_auth, count=1)) 175 | 176 | # Consume content and release the original connection 177 | # to allow our new request to reuse the same one. 178 | r.content 179 | r.raw.release_conn() 180 | prep = r.request.copy() 181 | extract_cookies_to_jar(prep._cookies, r.request, r.raw) 182 | prep.prepare_cookies(prep._cookies) 183 | 184 | prep.headers['Authorization'] = self.build_digest_header( 185 | prep.method, prep.url) 186 | _r = r.connection.send(prep, **kwargs) 187 | _r.history.append(r) 188 | _r.request = prep 189 | 190 | return _r 191 | 192 | self.num_401_calls = 1 193 | return r 194 | 195 | def __call__(self, r): 196 | # If we have a saved nonce, skip the 401 197 | if self.last_nonce: 198 | r.headers['Authorization'] = self.build_digest_header(r.method, r.url) 199 | try: 200 | self.pos = r.body.tell() 201 | except AttributeError: 202 | # In the case of HTTPDigestAuth being reused and the body of 203 | # the previous request was a file-like object, pos has the 204 | # file position of the previous body. Ensure it's set to 205 | # None. 206 | self.pos = None 207 | r.register_hook('response', self.handle_401) 208 | r.register_hook('response', self.handle_redirect) 209 | return r 210 | -------------------------------------------------------------------------------- /requests/certs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | certs.py 6 | ~~~~~~~~ 7 | 8 | This module returns the preferred default CA certificate bundle. 9 | 10 | If you are packaging Requests, e.g., for a Linux distribution or a managed 11 | environment, you can change the definition of where() to return a separately 12 | packaged CA bundle. 13 | """ 14 | import os.path 15 | 16 | try: 17 | from certifi import where 18 | except ImportError: 19 | def where(): 20 | """Return the preferred certificate bundle.""" 21 | # vendored bundle inside Requests 22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem') 23 | 24 | if __name__ == '__main__': 25 | print(where()) 26 | -------------------------------------------------------------------------------- /requests/compat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | pythoncompat 5 | """ 6 | 7 | from .packages import chardet 8 | 9 | import sys 10 | 11 | # ------- 12 | # Pythons 13 | # ------- 14 | 15 | # Syntax sugar. 16 | _ver = sys.version_info 17 | 18 | #: Python 2.x? 19 | is_py2 = (_ver[0] == 2) 20 | 21 | #: Python 3.x? 22 | is_py3 = (_ver[0] == 3) 23 | 24 | #: Python 3.0.x 25 | is_py30 = (is_py3 and _ver[1] == 0) 26 | 27 | #: Python 3.1.x 28 | is_py31 = (is_py3 and _ver[1] == 1) 29 | 30 | #: Python 3.2.x 31 | is_py32 = (is_py3 and _ver[1] == 2) 32 | 33 | #: Python 3.3.x 34 | is_py33 = (is_py3 and _ver[1] == 3) 35 | 36 | #: Python 3.4.x 37 | is_py34 = (is_py3 and _ver[1] == 4) 38 | 39 | #: Python 2.7.x 40 | is_py27 = (is_py2 and _ver[1] == 7) 41 | 42 | #: Python 2.6.x 43 | is_py26 = (is_py2 and _ver[1] == 6) 44 | 45 | #: Python 2.5.x 46 | is_py25 = (is_py2 and _ver[1] == 5) 47 | 48 | #: Python 2.4.x 49 | is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice. 50 | 51 | 52 | # --------- 53 | # Platforms 54 | # --------- 55 | 56 | 57 | # Syntax sugar. 58 | _ver = sys.version.lower() 59 | 60 | is_pypy = ('pypy' in _ver) 61 | is_jython = ('jython' in _ver) 62 | is_ironpython = ('iron' in _ver) 63 | 64 | # Assume CPython, if nothing else. 65 | is_cpython = not any((is_pypy, is_jython, is_ironpython)) 66 | 67 | # Windows-based system. 68 | is_windows = 'win32' in str(sys.platform).lower() 69 | 70 | # Standard Linux 2+ system. 71 | is_linux = ('linux' in str(sys.platform).lower()) 72 | is_osx = ('darwin' in str(sys.platform).lower()) 73 | is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess. 74 | is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess. 75 | 76 | try: 77 | import simplejson as json 78 | except (ImportError, SyntaxError): 79 | # simplejson does not support Python 3.2, it throws a SyntaxError 80 | # because of u'...' Unicode literals. 81 | import json 82 | 83 | # --------- 84 | # Specifics 85 | # --------- 86 | 87 | if is_py2: 88 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass 89 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag 90 | from urllib2 import parse_http_list 91 | import cookielib 92 | from Cookie import Morsel 93 | from StringIO import StringIO 94 | from .packages.urllib3.packages.ordered_dict import OrderedDict 95 | 96 | builtin_str = str 97 | bytes = str 98 | str = unicode 99 | basestring = basestring 100 | numeric_types = (int, long, float) 101 | 102 | 103 | elif is_py3: 104 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag 105 | from urllib.request import parse_http_list, getproxies, proxy_bypass 106 | from http import cookiejar as cookielib 107 | from http.cookies import Morsel 108 | from io import StringIO 109 | from collections import OrderedDict 110 | 111 | builtin_str = str 112 | str = str 113 | bytes = bytes 114 | basestring = (str, bytes) 115 | numeric_types = (int, float) 116 | -------------------------------------------------------------------------------- /requests/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.exceptions 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Requests' exceptions. 8 | 9 | """ 10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError 11 | 12 | 13 | class RequestException(IOError): 14 | """There was an ambiguous exception that occurred while handling your 15 | request.""" 16 | 17 | def __init__(self, *args, **kwargs): 18 | """ 19 | Initialize RequestException with `request` and `response` objects. 20 | """ 21 | response = kwargs.pop('response', None) 22 | self.response = response 23 | self.request = kwargs.pop('request', None) 24 | if (response is not None and not self.request and 25 | hasattr(response, 'request')): 26 | self.request = self.response.request 27 | super(RequestException, self).__init__(*args, **kwargs) 28 | 29 | 30 | class HTTPError(RequestException): 31 | """An HTTP error occurred.""" 32 | 33 | 34 | class ConnectionError(RequestException): 35 | """A Connection error occurred.""" 36 | 37 | 38 | class ProxyError(ConnectionError): 39 | """A proxy error occurred.""" 40 | 41 | 42 | class SSLError(ConnectionError): 43 | """An SSL error occurred.""" 44 | 45 | 46 | class Timeout(RequestException): 47 | """The request timed out. 48 | 49 | Catching this error will catch both 50 | :exc:`~requests.exceptions.ConnectTimeout` and 51 | :exc:`~requests.exceptions.ReadTimeout` errors. 52 | """ 53 | 54 | 55 | class ConnectTimeout(ConnectionError, Timeout): 56 | """The request timed out while trying to connect to the remote server. 57 | 58 | Requests that produced this error are safe to retry. 59 | """ 60 | 61 | 62 | class ReadTimeout(Timeout): 63 | """The server did not send any data in the allotted amount of time.""" 64 | 65 | 66 | class URLRequired(RequestException): 67 | """A valid URL is required to make a request.""" 68 | 69 | 70 | class TooManyRedirects(RequestException): 71 | """Too many redirects.""" 72 | 73 | 74 | class MissingSchema(RequestException, ValueError): 75 | """The URL schema (e.g. http or https) is missing.""" 76 | 77 | 78 | class InvalidSchema(RequestException, ValueError): 79 | """See defaults.py for valid schemas.""" 80 | 81 | 82 | class InvalidURL(RequestException, ValueError): 83 | """ The URL provided was somehow invalid. """ 84 | 85 | 86 | class ChunkedEncodingError(RequestException): 87 | """The server declared chunked encoding but sent an invalid chunk.""" 88 | 89 | 90 | class ContentDecodingError(RequestException, BaseHTTPError): 91 | """Failed to decode response content""" 92 | 93 | 94 | class StreamConsumedError(RequestException, TypeError): 95 | """The content for this response was already consumed""" 96 | 97 | 98 | class RetryError(RequestException): 99 | """Custom retries logic failed""" 100 | -------------------------------------------------------------------------------- /requests/hooks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.hooks 5 | ~~~~~~~~~~~~~~ 6 | 7 | This module provides the capabilities for the Requests hooks system. 8 | 9 | Available hooks: 10 | 11 | ``response``: 12 | The response generated from a Request. 13 | 14 | """ 15 | 16 | 17 | HOOKS = ['response'] 18 | 19 | 20 | def default_hooks(): 21 | hooks = {} 22 | for event in HOOKS: 23 | hooks[event] = [] 24 | return hooks 25 | 26 | # TODO: response is the only one 27 | 28 | 29 | def dispatch_hook(key, hooks, hook_data, **kwargs): 30 | """Dispatches a hook dictionary on a given piece of data.""" 31 | 32 | hooks = hooks or dict() 33 | 34 | if key in hooks: 35 | hooks = hooks.get(key) 36 | 37 | if hasattr(hooks, '__call__'): 38 | hooks = [hooks] 39 | 40 | for hook in hooks: 41 | _hook_data = hook(hook_data, **kwargs) 42 | if _hook_data is not None: 43 | hook_data = _hook_data 44 | 45 | return hook_data 46 | -------------------------------------------------------------------------------- /requests/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import urllib3 4 | -------------------------------------------------------------------------------- /requests/packages/chardet/__init__.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # This library is free software; you can redistribute it and/or 3 | # modify it under the terms of the GNU Lesser General Public 4 | # License as published by the Free Software Foundation; either 5 | # version 2.1 of the License, or (at your option) any later version. 6 | # 7 | # This library is distributed in the hope that it will be useful, 8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 | # Lesser General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU Lesser General Public 13 | # License along with this library; if not, write to the Free Software 14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 15 | # 02110-1301 USA 16 | ######################### END LICENSE BLOCK ######################### 17 | 18 | __version__ = "2.3.0" 19 | from sys import version_info 20 | 21 | 22 | def detect(aBuf): 23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or 24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))): 25 | raise ValueError('Expected a bytes object, not a unicode object') 26 | 27 | from . import universaldetector 28 | u = universaldetector.UniversalDetector() 29 | u.reset() 30 | u.feed(aBuf) 31 | u.close() 32 | return u.result 33 | -------------------------------------------------------------------------------- /requests/packages/chardet/big5prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import Big5DistributionAnalysis 31 | from .mbcssm import Big5SMModel 32 | 33 | 34 | class Big5Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(Big5SMModel) 38 | self._mDistributionAnalyzer = Big5DistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "Big5" 43 | -------------------------------------------------------------------------------- /requests/packages/chardet/chardetect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script which takes one or more file paths and reports on their detected 4 | encodings 5 | 6 | Example:: 7 | 8 | % chardetect somefile someotherfile 9 | somefile: windows-1252 with confidence 0.5 10 | someotherfile: ascii with confidence 1.0 11 | 12 | If no paths are provided, it takes its input from stdin. 13 | 14 | """ 15 | 16 | from __future__ import absolute_import, print_function, unicode_literals 17 | 18 | import argparse 19 | import sys 20 | from io import open 21 | 22 | from chardet import __version__ 23 | from chardet.universaldetector import UniversalDetector 24 | 25 | 26 | def description_of(lines, name='stdin'): 27 | """ 28 | Return a string describing the probable encoding of a file or 29 | list of strings. 30 | 31 | :param lines: The lines to get the encoding of. 32 | :type lines: Iterable of bytes 33 | :param name: Name of file or collection of lines 34 | :type name: str 35 | """ 36 | u = UniversalDetector() 37 | for line in lines: 38 | u.feed(line) 39 | u.close() 40 | result = u.result 41 | if result['encoding']: 42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'], 43 | result['confidence']) 44 | else: 45 | return '{0}: no result'.format(name) 46 | 47 | 48 | def main(argv=None): 49 | ''' 50 | Handles command line arguments and gets things started. 51 | 52 | :param argv: List of arguments, as if specified on the command-line. 53 | If None, ``sys.argv[1:]`` is used instead. 54 | :type argv: list of str 55 | ''' 56 | # Get command line arguments 57 | parser = argparse.ArgumentParser( 58 | description="Takes one or more file paths and reports their detected \ 59 | encodings", 60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 61 | conflict_handler='resolve') 62 | parser.add_argument('input', 63 | help='File whose encoding we would like to determine.', 64 | type=argparse.FileType('rb'), nargs='*', 65 | default=[sys.stdin]) 66 | parser.add_argument('--version', action='version', 67 | version='%(prog)s {0}'.format(__version__)) 68 | args = parser.parse_args(argv) 69 | 70 | for f in args.input: 71 | if f.isatty(): 72 | print("You are running chardetect interactively. Press " + 73 | "CTRL-D twice at the start of a blank line to signal the " + 74 | "end of your input. If you want help, run chardetect " + 75 | "--help\n", file=sys.stderr) 76 | print(description_of(f, f.name)) 77 | 78 | 79 | if __name__ == '__main__': 80 | main() 81 | -------------------------------------------------------------------------------- /requests/packages/chardet/charsetgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Communicator client code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | import sys 30 | from .charsetprober import CharSetProber 31 | 32 | 33 | class CharSetGroupProber(CharSetProber): 34 | def __init__(self): 35 | CharSetProber.__init__(self) 36 | self._mActiveNum = 0 37 | self._mProbers = [] 38 | self._mBestGuessProber = None 39 | 40 | def reset(self): 41 | CharSetProber.reset(self) 42 | self._mActiveNum = 0 43 | for prober in self._mProbers: 44 | if prober: 45 | prober.reset() 46 | prober.active = True 47 | self._mActiveNum += 1 48 | self._mBestGuessProber = None 49 | 50 | def get_charset_name(self): 51 | if not self._mBestGuessProber: 52 | self.get_confidence() 53 | if not self._mBestGuessProber: 54 | return None 55 | # self._mBestGuessProber = self._mProbers[0] 56 | return self._mBestGuessProber.get_charset_name() 57 | 58 | def feed(self, aBuf): 59 | for prober in self._mProbers: 60 | if not prober: 61 | continue 62 | if not prober.active: 63 | continue 64 | st = prober.feed(aBuf) 65 | if not st: 66 | continue 67 | if st == constants.eFoundIt: 68 | self._mBestGuessProber = prober 69 | return self.get_state() 70 | elif st == constants.eNotMe: 71 | prober.active = False 72 | self._mActiveNum -= 1 73 | if self._mActiveNum <= 0: 74 | self._mState = constants.eNotMe 75 | return self.get_state() 76 | return self.get_state() 77 | 78 | def get_confidence(self): 79 | st = self.get_state() 80 | if st == constants.eFoundIt: 81 | return 0.99 82 | elif st == constants.eNotMe: 83 | return 0.01 84 | bestConf = 0.0 85 | self._mBestGuessProber = None 86 | for prober in self._mProbers: 87 | if not prober: 88 | continue 89 | if not prober.active: 90 | if constants._debug: 91 | sys.stderr.write(prober.get_charset_name() 92 | + ' not active\n') 93 | continue 94 | cf = prober.get_confidence() 95 | if constants._debug: 96 | sys.stderr.write('%s confidence = %s\n' % 97 | (prober.get_charset_name(), cf)) 98 | if bestConf < cf: 99 | bestConf = cf 100 | self._mBestGuessProber = prober 101 | if not self._mBestGuessProber: 102 | return 0.0 103 | return bestConf 104 | # else: 105 | # self._mBestGuessProber = self._mProbers[0] 106 | # return self._mBestGuessProber.get_confidence() 107 | -------------------------------------------------------------------------------- /requests/packages/chardet/charsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import re 31 | 32 | 33 | class CharSetProber: 34 | def __init__(self): 35 | pass 36 | 37 | def reset(self): 38 | self._mState = constants.eDetecting 39 | 40 | def get_charset_name(self): 41 | return None 42 | 43 | def feed(self, aBuf): 44 | pass 45 | 46 | def get_state(self): 47 | return self._mState 48 | 49 | def get_confidence(self): 50 | return 0.0 51 | 52 | def filter_high_bit_only(self, aBuf): 53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf) 54 | return aBuf 55 | 56 | def filter_without_english_letters(self, aBuf): 57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf) 58 | return aBuf 59 | 60 | def filter_with_english_letters(self, aBuf): 61 | # TODO 62 | return aBuf 63 | -------------------------------------------------------------------------------- /requests/packages/chardet/codingstatemachine.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart 29 | from .compat import wrap_ord 30 | 31 | 32 | class CodingStateMachine: 33 | def __init__(self, sm): 34 | self._mModel = sm 35 | self._mCurrentBytePos = 0 36 | self._mCurrentCharLen = 0 37 | self.reset() 38 | 39 | def reset(self): 40 | self._mCurrentState = eStart 41 | 42 | def next_state(self, c): 43 | # for each byte we get its class 44 | # if it is first byte, we also get byte length 45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte 46 | byteCls = self._mModel['classTable'][wrap_ord(c)] 47 | if self._mCurrentState == eStart: 48 | self._mCurrentBytePos = 0 49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls] 50 | # from byte's class and stateTable, we get its next state 51 | curr_state = (self._mCurrentState * self._mModel['classFactor'] 52 | + byteCls) 53 | self._mCurrentState = self._mModel['stateTable'][curr_state] 54 | self._mCurrentBytePos += 1 55 | return self._mCurrentState 56 | 57 | def get_current_charlen(self): 58 | return self._mCurrentCharLen 59 | 60 | def get_coding_state_machine(self): 61 | return self._mModel['name'] 62 | -------------------------------------------------------------------------------- /requests/packages/chardet/compat.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # Contributor(s): 3 | # Ian Cordasco - port to Python 4 | # 5 | # This library is free software; you can redistribute it and/or 6 | # modify it under the terms of the GNU Lesser General Public 7 | # License as published by the Free Software Foundation; either 8 | # version 2.1 of the License, or (at your option) any later version. 9 | # 10 | # This library is distributed in the hope that it will be useful, 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 | # Lesser General Public License for more details. 14 | # 15 | # You should have received a copy of the GNU Lesser General Public 16 | # License along with this library; if not, write to the Free Software 17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 18 | # 02110-1301 USA 19 | ######################### END LICENSE BLOCK ######################### 20 | 21 | import sys 22 | 23 | 24 | if sys.version_info < (3, 0): 25 | base_str = (str, unicode) 26 | else: 27 | base_str = (bytes, str) 28 | 29 | 30 | def wrap_ord(a): 31 | if sys.version_info < (3, 0) and isinstance(a, base_str): 32 | return ord(a) 33 | else: 34 | return a 35 | -------------------------------------------------------------------------------- /requests/packages/chardet/constants.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | _debug = 0 30 | 31 | eDetecting = 0 32 | eFoundIt = 1 33 | eNotMe = 2 34 | 35 | eStart = 0 36 | eError = 1 37 | eItsMe = 2 38 | 39 | SHORTCUT_THRESHOLD = 0.95 40 | -------------------------------------------------------------------------------- /requests/packages/chardet/cp949prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import CP949SMModel 32 | 33 | 34 | class CP949Prober(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(CP949SMModel) 38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be 39 | # not different. 40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 41 | self.reset() 42 | 43 | def get_charset_name(self): 44 | return "CP949" 45 | -------------------------------------------------------------------------------- /requests/packages/chardet/escprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel, 30 | ISO2022KRSMModel) 31 | from .charsetprober import CharSetProber 32 | from .codingstatemachine import CodingStateMachine 33 | from .compat import wrap_ord 34 | 35 | 36 | class EscCharSetProber(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = [ 40 | CodingStateMachine(HZSMModel), 41 | CodingStateMachine(ISO2022CNSMModel), 42 | CodingStateMachine(ISO2022JPSMModel), 43 | CodingStateMachine(ISO2022KRSMModel) 44 | ] 45 | self.reset() 46 | 47 | def reset(self): 48 | CharSetProber.reset(self) 49 | for codingSM in self._mCodingSM: 50 | if not codingSM: 51 | continue 52 | codingSM.active = True 53 | codingSM.reset() 54 | self._mActiveSM = len(self._mCodingSM) 55 | self._mDetectedCharset = None 56 | 57 | def get_charset_name(self): 58 | return self._mDetectedCharset 59 | 60 | def get_confidence(self): 61 | if self._mDetectedCharset: 62 | return 0.99 63 | else: 64 | return 0.00 65 | 66 | def feed(self, aBuf): 67 | for c in aBuf: 68 | # PY3K: aBuf is a byte array, so c is an int, not a byte 69 | for codingSM in self._mCodingSM: 70 | if not codingSM: 71 | continue 72 | if not codingSM.active: 73 | continue 74 | codingState = codingSM.next_state(wrap_ord(c)) 75 | if codingState == constants.eError: 76 | codingSM.active = False 77 | self._mActiveSM -= 1 78 | if self._mActiveSM <= 0: 79 | self._mState = constants.eNotMe 80 | return self.get_state() 81 | elif codingState == constants.eItsMe: 82 | self._mState = constants.eFoundIt 83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8 84 | return self.get_state() 85 | 86 | return self.get_state() 87 | -------------------------------------------------------------------------------- /requests/packages/chardet/escsm.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .constants import eStart, eError, eItsMe 29 | 30 | HZ_cls = ( 31 | 1,0,0,0,0,0,0,0, # 00 - 07 32 | 0,0,0,0,0,0,0,0, # 08 - 0f 33 | 0,0,0,0,0,0,0,0, # 10 - 17 34 | 0,0,0,1,0,0,0,0, # 18 - 1f 35 | 0,0,0,0,0,0,0,0, # 20 - 27 36 | 0,0,0,0,0,0,0,0, # 28 - 2f 37 | 0,0,0,0,0,0,0,0, # 30 - 37 38 | 0,0,0,0,0,0,0,0, # 38 - 3f 39 | 0,0,0,0,0,0,0,0, # 40 - 47 40 | 0,0,0,0,0,0,0,0, # 48 - 4f 41 | 0,0,0,0,0,0,0,0, # 50 - 57 42 | 0,0,0,0,0,0,0,0, # 58 - 5f 43 | 0,0,0,0,0,0,0,0, # 60 - 67 44 | 0,0,0,0,0,0,0,0, # 68 - 6f 45 | 0,0,0,0,0,0,0,0, # 70 - 77 46 | 0,0,0,4,0,5,2,0, # 78 - 7f 47 | 1,1,1,1,1,1,1,1, # 80 - 87 48 | 1,1,1,1,1,1,1,1, # 88 - 8f 49 | 1,1,1,1,1,1,1,1, # 90 - 97 50 | 1,1,1,1,1,1,1,1, # 98 - 9f 51 | 1,1,1,1,1,1,1,1, # a0 - a7 52 | 1,1,1,1,1,1,1,1, # a8 - af 53 | 1,1,1,1,1,1,1,1, # b0 - b7 54 | 1,1,1,1,1,1,1,1, # b8 - bf 55 | 1,1,1,1,1,1,1,1, # c0 - c7 56 | 1,1,1,1,1,1,1,1, # c8 - cf 57 | 1,1,1,1,1,1,1,1, # d0 - d7 58 | 1,1,1,1,1,1,1,1, # d8 - df 59 | 1,1,1,1,1,1,1,1, # e0 - e7 60 | 1,1,1,1,1,1,1,1, # e8 - ef 61 | 1,1,1,1,1,1,1,1, # f0 - f7 62 | 1,1,1,1,1,1,1,1, # f8 - ff 63 | ) 64 | 65 | HZ_st = ( 66 | eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07 67 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 68 | eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17 69 | 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f 70 | 4,eError, 4, 4, 4,eError, 4,eError,# 20-27 71 | 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f 72 | ) 73 | 74 | HZCharLenTable = (0, 0, 0, 0, 0, 0) 75 | 76 | HZSMModel = {'classTable': HZ_cls, 77 | 'classFactor': 6, 78 | 'stateTable': HZ_st, 79 | 'charLenTable': HZCharLenTable, 80 | 'name': "HZ-GB-2312"} 81 | 82 | ISO2022CN_cls = ( 83 | 2,0,0,0,0,0,0,0, # 00 - 07 84 | 0,0,0,0,0,0,0,0, # 08 - 0f 85 | 0,0,0,0,0,0,0,0, # 10 - 17 86 | 0,0,0,1,0,0,0,0, # 18 - 1f 87 | 0,0,0,0,0,0,0,0, # 20 - 27 88 | 0,3,0,0,0,0,0,0, # 28 - 2f 89 | 0,0,0,0,0,0,0,0, # 30 - 37 90 | 0,0,0,0,0,0,0,0, # 38 - 3f 91 | 0,0,0,4,0,0,0,0, # 40 - 47 92 | 0,0,0,0,0,0,0,0, # 48 - 4f 93 | 0,0,0,0,0,0,0,0, # 50 - 57 94 | 0,0,0,0,0,0,0,0, # 58 - 5f 95 | 0,0,0,0,0,0,0,0, # 60 - 67 96 | 0,0,0,0,0,0,0,0, # 68 - 6f 97 | 0,0,0,0,0,0,0,0, # 70 - 77 98 | 0,0,0,0,0,0,0,0, # 78 - 7f 99 | 2,2,2,2,2,2,2,2, # 80 - 87 100 | 2,2,2,2,2,2,2,2, # 88 - 8f 101 | 2,2,2,2,2,2,2,2, # 90 - 97 102 | 2,2,2,2,2,2,2,2, # 98 - 9f 103 | 2,2,2,2,2,2,2,2, # a0 - a7 104 | 2,2,2,2,2,2,2,2, # a8 - af 105 | 2,2,2,2,2,2,2,2, # b0 - b7 106 | 2,2,2,2,2,2,2,2, # b8 - bf 107 | 2,2,2,2,2,2,2,2, # c0 - c7 108 | 2,2,2,2,2,2,2,2, # c8 - cf 109 | 2,2,2,2,2,2,2,2, # d0 - d7 110 | 2,2,2,2,2,2,2,2, # d8 - df 111 | 2,2,2,2,2,2,2,2, # e0 - e7 112 | 2,2,2,2,2,2,2,2, # e8 - ef 113 | 2,2,2,2,2,2,2,2, # f0 - f7 114 | 2,2,2,2,2,2,2,2, # f8 - ff 115 | ) 116 | 117 | ISO2022CN_st = ( 118 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 119 | eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f 120 | eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 121 | eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f 122 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27 123 | 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f 124 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37 125 | eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f 126 | ) 127 | 128 | ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0) 129 | 130 | ISO2022CNSMModel = {'classTable': ISO2022CN_cls, 131 | 'classFactor': 9, 132 | 'stateTable': ISO2022CN_st, 133 | 'charLenTable': ISO2022CNCharLenTable, 134 | 'name': "ISO-2022-CN"} 135 | 136 | ISO2022JP_cls = ( 137 | 2,0,0,0,0,0,0,0, # 00 - 07 138 | 0,0,0,0,0,0,2,2, # 08 - 0f 139 | 0,0,0,0,0,0,0,0, # 10 - 17 140 | 0,0,0,1,0,0,0,0, # 18 - 1f 141 | 0,0,0,0,7,0,0,0, # 20 - 27 142 | 3,0,0,0,0,0,0,0, # 28 - 2f 143 | 0,0,0,0,0,0,0,0, # 30 - 37 144 | 0,0,0,0,0,0,0,0, # 38 - 3f 145 | 6,0,4,0,8,0,0,0, # 40 - 47 146 | 0,9,5,0,0,0,0,0, # 48 - 4f 147 | 0,0,0,0,0,0,0,0, # 50 - 57 148 | 0,0,0,0,0,0,0,0, # 58 - 5f 149 | 0,0,0,0,0,0,0,0, # 60 - 67 150 | 0,0,0,0,0,0,0,0, # 68 - 6f 151 | 0,0,0,0,0,0,0,0, # 70 - 77 152 | 0,0,0,0,0,0,0,0, # 78 - 7f 153 | 2,2,2,2,2,2,2,2, # 80 - 87 154 | 2,2,2,2,2,2,2,2, # 88 - 8f 155 | 2,2,2,2,2,2,2,2, # 90 - 97 156 | 2,2,2,2,2,2,2,2, # 98 - 9f 157 | 2,2,2,2,2,2,2,2, # a0 - a7 158 | 2,2,2,2,2,2,2,2, # a8 - af 159 | 2,2,2,2,2,2,2,2, # b0 - b7 160 | 2,2,2,2,2,2,2,2, # b8 - bf 161 | 2,2,2,2,2,2,2,2, # c0 - c7 162 | 2,2,2,2,2,2,2,2, # c8 - cf 163 | 2,2,2,2,2,2,2,2, # d0 - d7 164 | 2,2,2,2,2,2,2,2, # d8 - df 165 | 2,2,2,2,2,2,2,2, # e0 - e7 166 | 2,2,2,2,2,2,2,2, # e8 - ef 167 | 2,2,2,2,2,2,2,2, # f0 - f7 168 | 2,2,2,2,2,2,2,2, # f8 - ff 169 | ) 170 | 171 | ISO2022JP_st = ( 172 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07 173 | eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f 174 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17 175 | eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f 176 | eError, 5,eError,eError,eError, 4,eError,eError,# 20-27 177 | eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f 178 | eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37 179 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f 180 | eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47 181 | ) 182 | 183 | ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 184 | 185 | ISO2022JPSMModel = {'classTable': ISO2022JP_cls, 186 | 'classFactor': 10, 187 | 'stateTable': ISO2022JP_st, 188 | 'charLenTable': ISO2022JPCharLenTable, 189 | 'name': "ISO-2022-JP"} 190 | 191 | ISO2022KR_cls = ( 192 | 2,0,0,0,0,0,0,0, # 00 - 07 193 | 0,0,0,0,0,0,0,0, # 08 - 0f 194 | 0,0,0,0,0,0,0,0, # 10 - 17 195 | 0,0,0,1,0,0,0,0, # 18 - 1f 196 | 0,0,0,0,3,0,0,0, # 20 - 27 197 | 0,4,0,0,0,0,0,0, # 28 - 2f 198 | 0,0,0,0,0,0,0,0, # 30 - 37 199 | 0,0,0,0,0,0,0,0, # 38 - 3f 200 | 0,0,0,5,0,0,0,0, # 40 - 47 201 | 0,0,0,0,0,0,0,0, # 48 - 4f 202 | 0,0,0,0,0,0,0,0, # 50 - 57 203 | 0,0,0,0,0,0,0,0, # 58 - 5f 204 | 0,0,0,0,0,0,0,0, # 60 - 67 205 | 0,0,0,0,0,0,0,0, # 68 - 6f 206 | 0,0,0,0,0,0,0,0, # 70 - 77 207 | 0,0,0,0,0,0,0,0, # 78 - 7f 208 | 2,2,2,2,2,2,2,2, # 80 - 87 209 | 2,2,2,2,2,2,2,2, # 88 - 8f 210 | 2,2,2,2,2,2,2,2, # 90 - 97 211 | 2,2,2,2,2,2,2,2, # 98 - 9f 212 | 2,2,2,2,2,2,2,2, # a0 - a7 213 | 2,2,2,2,2,2,2,2, # a8 - af 214 | 2,2,2,2,2,2,2,2, # b0 - b7 215 | 2,2,2,2,2,2,2,2, # b8 - bf 216 | 2,2,2,2,2,2,2,2, # c0 - c7 217 | 2,2,2,2,2,2,2,2, # c8 - cf 218 | 2,2,2,2,2,2,2,2, # d0 - d7 219 | 2,2,2,2,2,2,2,2, # d8 - df 220 | 2,2,2,2,2,2,2,2, # e0 - e7 221 | 2,2,2,2,2,2,2,2, # e8 - ef 222 | 2,2,2,2,2,2,2,2, # f0 - f7 223 | 2,2,2,2,2,2,2,2, # f8 - ff 224 | ) 225 | 226 | ISO2022KR_st = ( 227 | eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07 228 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f 229 | eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17 230 | eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f 231 | eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27 232 | ) 233 | 234 | ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0) 235 | 236 | ISO2022KRSMModel = {'classTable': ISO2022KR_cls, 237 | 'classFactor': 6, 238 | 'stateTable': ISO2022KR_st, 239 | 'charLenTable': ISO2022KRCharLenTable, 240 | 'name': "ISO-2022-KR"} 241 | 242 | # flake8: noqa 243 | -------------------------------------------------------------------------------- /requests/packages/chardet/eucjpprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from . import constants 30 | from .mbcharsetprober import MultiByteCharSetProber 31 | from .codingstatemachine import CodingStateMachine 32 | from .chardistribution import EUCJPDistributionAnalysis 33 | from .jpcntx import EUCJPContextAnalysis 34 | from .mbcssm import EUCJPSMModel 35 | 36 | 37 | class EUCJPProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel) 41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis() 42 | self._mContextAnalyzer = EUCJPContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return "EUC-JP" 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mContextAnalyzer.feed(self._mLastChar, charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen) 75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 76 | charLen) 77 | 78 | self._mLastChar[0] = aBuf[aLen - 1] 79 | 80 | if self.get_state() == constants.eDetecting: 81 | if (self._mContextAnalyzer.got_enough_data() and 82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 83 | self._mState = constants.eFoundIt 84 | 85 | return self.get_state() 86 | 87 | def get_confidence(self): 88 | contxtCf = self._mContextAnalyzer.get_confidence() 89 | distribCf = self._mDistributionAnalyzer.get_confidence() 90 | return max(contxtCf, distribCf) 91 | -------------------------------------------------------------------------------- /requests/packages/chardet/euckrprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCKRDistributionAnalysis 31 | from .mbcssm import EUCKRSMModel 32 | 33 | 34 | class EUCKRProber(MultiByteCharSetProber): 35 | def __init__(self): 36 | MultiByteCharSetProber.__init__(self) 37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel) 38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis() 39 | self.reset() 40 | 41 | def get_charset_name(self): 42 | return "EUC-KR" 43 | -------------------------------------------------------------------------------- /requests/packages/chardet/euctwprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import EUCTWDistributionAnalysis 31 | from .mbcssm import EUCTWSMModel 32 | 33 | class EUCTWProber(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel) 37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "EUC-TW" 42 | -------------------------------------------------------------------------------- /requests/packages/chardet/gb2312prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from .mbcharsetprober import MultiByteCharSetProber 29 | from .codingstatemachine import CodingStateMachine 30 | from .chardistribution import GB2312DistributionAnalysis 31 | from .mbcssm import GB2312SMModel 32 | 33 | class GB2312Prober(MultiByteCharSetProber): 34 | def __init__(self): 35 | MultiByteCharSetProber.__init__(self) 36 | self._mCodingSM = CodingStateMachine(GB2312SMModel) 37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis() 38 | self.reset() 39 | 40 | def get_charset_name(self): 41 | return "GB2312" 42 | -------------------------------------------------------------------------------- /requests/packages/chardet/latin1prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetprober import CharSetProber 30 | from .constants import eNotMe 31 | from .compat import wrap_ord 32 | 33 | FREQ_CAT_NUM = 4 34 | 35 | UDF = 0 # undefined 36 | OTH = 1 # other 37 | ASC = 2 # ascii capital letter 38 | ASS = 3 # ascii small letter 39 | ACV = 4 # accent capital vowel 40 | ACO = 5 # accent capital other 41 | ASV = 6 # accent small vowel 42 | ASO = 7 # accent small other 43 | CLASS_NUM = 8 # total classes 44 | 45 | Latin1_CharToClass = ( 46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F 48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F 50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F 52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F 54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F 56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F 58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F 60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F 62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87 63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F 64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97 65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F 66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7 67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF 68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF 70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7 71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF 72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7 73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF 74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7 75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF 76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7 77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF 78 | ) 79 | 80 | # 0 : illegal 81 | # 1 : very unlikely 82 | # 2 : normal 83 | # 3 : very likely 84 | Latin1ClassModel = ( 85 | # UDF OTH ASC ASS ACV ACO ASV ASO 86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF 87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH 88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC 89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS 90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV 91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO 92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV 93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO 94 | ) 95 | 96 | 97 | class Latin1Prober(CharSetProber): 98 | def __init__(self): 99 | CharSetProber.__init__(self) 100 | self.reset() 101 | 102 | def reset(self): 103 | self._mLastCharClass = OTH 104 | self._mFreqCounter = [0] * FREQ_CAT_NUM 105 | CharSetProber.reset(self) 106 | 107 | def get_charset_name(self): 108 | return "windows-1252" 109 | 110 | def feed(self, aBuf): 111 | aBuf = self.filter_with_english_letters(aBuf) 112 | for c in aBuf: 113 | charClass = Latin1_CharToClass[wrap_ord(c)] 114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM) 115 | + charClass] 116 | if freq == 0: 117 | self._mState = eNotMe 118 | break 119 | self._mFreqCounter[freq] += 1 120 | self._mLastCharClass = charClass 121 | 122 | return self.get_state() 123 | 124 | def get_confidence(self): 125 | if self.get_state() == eNotMe: 126 | return 0.01 127 | 128 | total = sum(self._mFreqCounter) 129 | if total < 0.01: 130 | confidence = 0.0 131 | else: 132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0) 133 | / total) 134 | if confidence < 0.0: 135 | confidence = 0.0 136 | # lower the confidence of latin1 so that other more accurate 137 | # detector can take priority. 138 | confidence = confidence * 0.73 139 | return confidence 140 | -------------------------------------------------------------------------------- /requests/packages/chardet/mbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | import sys 31 | from . import constants 32 | from .charsetprober import CharSetProber 33 | 34 | 35 | class MultiByteCharSetProber(CharSetProber): 36 | def __init__(self): 37 | CharSetProber.__init__(self) 38 | self._mDistributionAnalyzer = None 39 | self._mCodingSM = None 40 | self._mLastChar = [0, 0] 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | if self._mCodingSM: 45 | self._mCodingSM.reset() 46 | if self._mDistributionAnalyzer: 47 | self._mDistributionAnalyzer.reset() 48 | self._mLastChar = [0, 0] 49 | 50 | def get_charset_name(self): 51 | pass 52 | 53 | def feed(self, aBuf): 54 | aLen = len(aBuf) 55 | for i in range(0, aLen): 56 | codingState = self._mCodingSM.next_state(aBuf[i]) 57 | if codingState == constants.eError: 58 | if constants._debug: 59 | sys.stderr.write(self.get_charset_name() 60 | + ' prober hit error at byte ' + str(i) 61 | + '\n') 62 | self._mState = constants.eNotMe 63 | break 64 | elif codingState == constants.eItsMe: 65 | self._mState = constants.eFoundIt 66 | break 67 | elif codingState == constants.eStart: 68 | charLen = self._mCodingSM.get_current_charlen() 69 | if i == 0: 70 | self._mLastChar[1] = aBuf[0] 71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 72 | else: 73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 74 | charLen) 75 | 76 | self._mLastChar[0] = aBuf[aLen - 1] 77 | 78 | if self.get_state() == constants.eDetecting: 79 | if (self._mDistributionAnalyzer.got_enough_data() and 80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 81 | self._mState = constants.eFoundIt 82 | 83 | return self.get_state() 84 | 85 | def get_confidence(self): 86 | return self._mDistributionAnalyzer.get_confidence() 87 | -------------------------------------------------------------------------------- /requests/packages/chardet/mbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # Proofpoint, Inc. 13 | # 14 | # This library is free software; you can redistribute it and/or 15 | # modify it under the terms of the GNU Lesser General Public 16 | # License as published by the Free Software Foundation; either 17 | # version 2.1 of the License, or (at your option) any later version. 18 | # 19 | # This library is distributed in the hope that it will be useful, 20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 | # Lesser General Public License for more details. 23 | # 24 | # You should have received a copy of the GNU Lesser General Public 25 | # License along with this library; if not, write to the Free Software 26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 27 | # 02110-1301 USA 28 | ######################### END LICENSE BLOCK ######################### 29 | 30 | from .charsetgroupprober import CharSetGroupProber 31 | from .utf8prober import UTF8Prober 32 | from .sjisprober import SJISProber 33 | from .eucjpprober import EUCJPProber 34 | from .gb2312prober import GB2312Prober 35 | from .euckrprober import EUCKRProber 36 | from .cp949prober import CP949Prober 37 | from .big5prober import Big5Prober 38 | from .euctwprober import EUCTWProber 39 | 40 | 41 | class MBCSGroupProber(CharSetGroupProber): 42 | def __init__(self): 43 | CharSetGroupProber.__init__(self) 44 | self._mProbers = [ 45 | UTF8Prober(), 46 | SJISProber(), 47 | EUCJPProber(), 48 | GB2312Prober(), 49 | EUCKRProber(), 50 | CP949Prober(), 51 | Big5Prober(), 52 | EUCTWProber() 53 | ] 54 | self.reset() 55 | -------------------------------------------------------------------------------- /requests/packages/chardet/sbcharsetprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | import sys 30 | from . import constants 31 | from .charsetprober import CharSetProber 32 | from .compat import wrap_ord 33 | 34 | SAMPLE_SIZE = 64 35 | SB_ENOUGH_REL_THRESHOLD = 1024 36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95 37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05 38 | SYMBOL_CAT_ORDER = 250 39 | NUMBER_OF_SEQ_CAT = 4 40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1 41 | #NEGATIVE_CAT = 0 42 | 43 | 44 | class SingleByteCharSetProber(CharSetProber): 45 | def __init__(self, model, reversed=False, nameProber=None): 46 | CharSetProber.__init__(self) 47 | self._mModel = model 48 | # TRUE if we need to reverse every pair in the model lookup 49 | self._mReversed = reversed 50 | # Optional auxiliary prober for name decision 51 | self._mNameProber = nameProber 52 | self.reset() 53 | 54 | def reset(self): 55 | CharSetProber.reset(self) 56 | # char order of last character 57 | self._mLastOrder = 255 58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT 59 | self._mTotalSeqs = 0 60 | self._mTotalChar = 0 61 | # characters that fall in our sampling range 62 | self._mFreqChar = 0 63 | 64 | def get_charset_name(self): 65 | if self._mNameProber: 66 | return self._mNameProber.get_charset_name() 67 | else: 68 | return self._mModel['charsetName'] 69 | 70 | def feed(self, aBuf): 71 | if not self._mModel['keepEnglishLetter']: 72 | aBuf = self.filter_without_english_letters(aBuf) 73 | aLen = len(aBuf) 74 | if not aLen: 75 | return self.get_state() 76 | for c in aBuf: 77 | order = self._mModel['charToOrderMap'][wrap_ord(c)] 78 | if order < SYMBOL_CAT_ORDER: 79 | self._mTotalChar += 1 80 | if order < SAMPLE_SIZE: 81 | self._mFreqChar += 1 82 | if self._mLastOrder < SAMPLE_SIZE: 83 | self._mTotalSeqs += 1 84 | if not self._mReversed: 85 | i = (self._mLastOrder * SAMPLE_SIZE) + order 86 | model = self._mModel['precedenceMatrix'][i] 87 | else: # reverse the order of the letters in the lookup 88 | i = (order * SAMPLE_SIZE) + self._mLastOrder 89 | model = self._mModel['precedenceMatrix'][i] 90 | self._mSeqCounters[model] += 1 91 | self._mLastOrder = order 92 | 93 | if self.get_state() == constants.eDetecting: 94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD: 95 | cf = self.get_confidence() 96 | if cf > POSITIVE_SHORTCUT_THRESHOLD: 97 | if constants._debug: 98 | sys.stderr.write('%s confidence = %s, we have a' 99 | 'winner\n' % 100 | (self._mModel['charsetName'], cf)) 101 | self._mState = constants.eFoundIt 102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD: 103 | if constants._debug: 104 | sys.stderr.write('%s confidence = %s, below negative' 105 | 'shortcut threshhold %s\n' % 106 | (self._mModel['charsetName'], cf, 107 | NEGATIVE_SHORTCUT_THRESHOLD)) 108 | self._mState = constants.eNotMe 109 | 110 | return self.get_state() 111 | 112 | def get_confidence(self): 113 | r = 0.01 114 | if self._mTotalSeqs > 0: 115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs 116 | / self._mModel['mTypicalPositiveRatio']) 117 | r = r * self._mFreqChar / self._mTotalChar 118 | if r >= 1.0: 119 | r = 0.99 120 | return r 121 | -------------------------------------------------------------------------------- /requests/packages/chardet/sbcsgroupprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from .charsetgroupprober import CharSetGroupProber 30 | from .sbcharsetprober import SingleByteCharSetProber 31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel, 32 | Latin5CyrillicModel, MacCyrillicModel, 33 | Ibm866Model, Ibm855Model) 34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel 35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel 36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel 37 | from .langthaimodel import TIS620ThaiModel 38 | from .langhebrewmodel import Win1255HebrewModel 39 | from .hebrewprober import HebrewProber 40 | 41 | 42 | class SBCSGroupProber(CharSetGroupProber): 43 | def __init__(self): 44 | CharSetGroupProber.__init__(self) 45 | self._mProbers = [ 46 | SingleByteCharSetProber(Win1251CyrillicModel), 47 | SingleByteCharSetProber(Koi8rModel), 48 | SingleByteCharSetProber(Latin5CyrillicModel), 49 | SingleByteCharSetProber(MacCyrillicModel), 50 | SingleByteCharSetProber(Ibm866Model), 51 | SingleByteCharSetProber(Ibm855Model), 52 | SingleByteCharSetProber(Latin7GreekModel), 53 | SingleByteCharSetProber(Win1253GreekModel), 54 | SingleByteCharSetProber(Latin5BulgarianModel), 55 | SingleByteCharSetProber(Win1251BulgarianModel), 56 | SingleByteCharSetProber(Latin2HungarianModel), 57 | SingleByteCharSetProber(Win1250HungarianModel), 58 | SingleByteCharSetProber(TIS620ThaiModel), 59 | ] 60 | hebrewProber = HebrewProber() 61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, 62 | False, hebrewProber) 63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True, 64 | hebrewProber) 65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber) 66 | self._mProbers.extend([hebrewProber, logicalHebrewProber, 67 | visualHebrewProber]) 68 | 69 | self.reset() 70 | -------------------------------------------------------------------------------- /requests/packages/chardet/sjisprober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | import sys 29 | from .mbcharsetprober import MultiByteCharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .chardistribution import SJISDistributionAnalysis 32 | from .jpcntx import SJISContextAnalysis 33 | from .mbcssm import SJISSMModel 34 | from . import constants 35 | 36 | 37 | class SJISProber(MultiByteCharSetProber): 38 | def __init__(self): 39 | MultiByteCharSetProber.__init__(self) 40 | self._mCodingSM = CodingStateMachine(SJISSMModel) 41 | self._mDistributionAnalyzer = SJISDistributionAnalysis() 42 | self._mContextAnalyzer = SJISContextAnalysis() 43 | self.reset() 44 | 45 | def reset(self): 46 | MultiByteCharSetProber.reset(self) 47 | self._mContextAnalyzer.reset() 48 | 49 | def get_charset_name(self): 50 | return self._mContextAnalyzer.get_charset_name() 51 | 52 | def feed(self, aBuf): 53 | aLen = len(aBuf) 54 | for i in range(0, aLen): 55 | codingState = self._mCodingSM.next_state(aBuf[i]) 56 | if codingState == constants.eError: 57 | if constants._debug: 58 | sys.stderr.write(self.get_charset_name() 59 | + ' prober hit error at byte ' + str(i) 60 | + '\n') 61 | self._mState = constants.eNotMe 62 | break 63 | elif codingState == constants.eItsMe: 64 | self._mState = constants.eFoundIt 65 | break 66 | elif codingState == constants.eStart: 67 | charLen = self._mCodingSM.get_current_charlen() 68 | if i == 0: 69 | self._mLastChar[1] = aBuf[0] 70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:], 71 | charLen) 72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen) 73 | else: 74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3 75 | - charLen], charLen) 76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1], 77 | charLen) 78 | 79 | self._mLastChar[0] = aBuf[aLen - 1] 80 | 81 | if self.get_state() == constants.eDetecting: 82 | if (self._mContextAnalyzer.got_enough_data() and 83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)): 84 | self._mState = constants.eFoundIt 85 | 86 | return self.get_state() 87 | 88 | def get_confidence(self): 89 | contxtCf = self._mContextAnalyzer.get_confidence() 90 | distribCf = self._mDistributionAnalyzer.get_confidence() 91 | return max(contxtCf, distribCf) 92 | -------------------------------------------------------------------------------- /requests/packages/chardet/universaldetector.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is Mozilla Universal charset detector code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 2001 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # Shy Shalom - original C code 12 | # 13 | # This library is free software; you can redistribute it and/or 14 | # modify it under the terms of the GNU Lesser General Public 15 | # License as published by the Free Software Foundation; either 16 | # version 2.1 of the License, or (at your option) any later version. 17 | # 18 | # This library is distributed in the hope that it will be useful, 19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 | # Lesser General Public License for more details. 22 | # 23 | # You should have received a copy of the GNU Lesser General Public 24 | # License along with this library; if not, write to the Free Software 25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 26 | # 02110-1301 USA 27 | ######################### END LICENSE BLOCK ######################### 28 | 29 | from . import constants 30 | import sys 31 | import codecs 32 | from .latin1prober import Latin1Prober # windows-1252 33 | from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets 34 | from .sbcsgroupprober import SBCSGroupProber # single-byte character sets 35 | from .escprober import EscCharSetProber # ISO-2122, etc. 36 | import re 37 | 38 | MINIMUM_THRESHOLD = 0.20 39 | ePureAscii = 0 40 | eEscAscii = 1 41 | eHighbyte = 2 42 | 43 | 44 | class UniversalDetector: 45 | def __init__(self): 46 | self._highBitDetector = re.compile(b'[\x80-\xFF]') 47 | self._escDetector = re.compile(b'(\033|~{)') 48 | self._mEscCharSetProber = None 49 | self._mCharSetProbers = [] 50 | self.reset() 51 | 52 | def reset(self): 53 | self.result = {'encoding': None, 'confidence': 0.0} 54 | self.done = False 55 | self._mStart = True 56 | self._mGotData = False 57 | self._mInputState = ePureAscii 58 | self._mLastChar = b'' 59 | if self._mEscCharSetProber: 60 | self._mEscCharSetProber.reset() 61 | for prober in self._mCharSetProbers: 62 | prober.reset() 63 | 64 | def feed(self, aBuf): 65 | if self.done: 66 | return 67 | 68 | aLen = len(aBuf) 69 | if not aLen: 70 | return 71 | 72 | if not self._mGotData: 73 | # If the data starts with BOM, we know it is UTF 74 | if aBuf[:3] == codecs.BOM_UTF8: 75 | # EF BB BF UTF-8 with BOM 76 | self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0} 77 | elif aBuf[:4] == codecs.BOM_UTF32_LE: 78 | # FF FE 00 00 UTF-32, little-endian BOM 79 | self.result = {'encoding': "UTF-32LE", 'confidence': 1.0} 80 | elif aBuf[:4] == codecs.BOM_UTF32_BE: 81 | # 00 00 FE FF UTF-32, big-endian BOM 82 | self.result = {'encoding': "UTF-32BE", 'confidence': 1.0} 83 | elif aBuf[:4] == b'\xFE\xFF\x00\x00': 84 | # FE FF 00 00 UCS-4, unusual octet order BOM (3412) 85 | self.result = { 86 | 'encoding': "X-ISO-10646-UCS-4-3412", 87 | 'confidence': 1.0 88 | } 89 | elif aBuf[:4] == b'\x00\x00\xFF\xFE': 90 | # 00 00 FF FE UCS-4, unusual octet order BOM (2143) 91 | self.result = { 92 | 'encoding': "X-ISO-10646-UCS-4-2143", 93 | 'confidence': 1.0 94 | } 95 | elif aBuf[:2] == codecs.BOM_LE: 96 | # FF FE UTF-16, little endian BOM 97 | self.result = {'encoding': "UTF-16LE", 'confidence': 1.0} 98 | elif aBuf[:2] == codecs.BOM_BE: 99 | # FE FF UTF-16, big endian BOM 100 | self.result = {'encoding': "UTF-16BE", 'confidence': 1.0} 101 | 102 | self._mGotData = True 103 | if self.result['encoding'] and (self.result['confidence'] > 0.0): 104 | self.done = True 105 | return 106 | 107 | if self._mInputState == ePureAscii: 108 | if self._highBitDetector.search(aBuf): 109 | self._mInputState = eHighbyte 110 | elif ((self._mInputState == ePureAscii) and 111 | self._escDetector.search(self._mLastChar + aBuf)): 112 | self._mInputState = eEscAscii 113 | 114 | self._mLastChar = aBuf[-1:] 115 | 116 | if self._mInputState == eEscAscii: 117 | if not self._mEscCharSetProber: 118 | self._mEscCharSetProber = EscCharSetProber() 119 | if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt: 120 | self.result = {'encoding': self._mEscCharSetProber.get_charset_name(), 121 | 'confidence': self._mEscCharSetProber.get_confidence()} 122 | self.done = True 123 | elif self._mInputState == eHighbyte: 124 | if not self._mCharSetProbers: 125 | self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(), 126 | Latin1Prober()] 127 | for prober in self._mCharSetProbers: 128 | if prober.feed(aBuf) == constants.eFoundIt: 129 | self.result = {'encoding': prober.get_charset_name(), 130 | 'confidence': prober.get_confidence()} 131 | self.done = True 132 | break 133 | 134 | def close(self): 135 | if self.done: 136 | return 137 | if not self._mGotData: 138 | if constants._debug: 139 | sys.stderr.write('no data received!\n') 140 | return 141 | self.done = True 142 | 143 | if self._mInputState == ePureAscii: 144 | self.result = {'encoding': 'ascii', 'confidence': 1.0} 145 | return self.result 146 | 147 | if self._mInputState == eHighbyte: 148 | proberConfidence = None 149 | maxProberConfidence = 0.0 150 | maxProber = None 151 | for prober in self._mCharSetProbers: 152 | if not prober: 153 | continue 154 | proberConfidence = prober.get_confidence() 155 | if proberConfidence > maxProberConfidence: 156 | maxProberConfidence = proberConfidence 157 | maxProber = prober 158 | if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD): 159 | self.result = {'encoding': maxProber.get_charset_name(), 160 | 'confidence': maxProber.get_confidence()} 161 | return self.result 162 | 163 | if constants._debug: 164 | sys.stderr.write('no probers hit minimum threshhold\n') 165 | for prober in self._mCharSetProbers[0].mProbers: 166 | if not prober: 167 | continue 168 | sys.stderr.write('%s confidence = %s\n' % 169 | (prober.get_charset_name(), 170 | prober.get_confidence())) 171 | -------------------------------------------------------------------------------- /requests/packages/chardet/utf8prober.py: -------------------------------------------------------------------------------- 1 | ######################## BEGIN LICENSE BLOCK ######################## 2 | # The Original Code is mozilla.org code. 3 | # 4 | # The Initial Developer of the Original Code is 5 | # Netscape Communications Corporation. 6 | # Portions created by the Initial Developer are Copyright (C) 1998 7 | # the Initial Developer. All Rights Reserved. 8 | # 9 | # Contributor(s): 10 | # Mark Pilgrim - port to Python 11 | # 12 | # This library is free software; you can redistribute it and/or 13 | # modify it under the terms of the GNU Lesser General Public 14 | # License as published by the Free Software Foundation; either 15 | # version 2.1 of the License, or (at your option) any later version. 16 | # 17 | # This library is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 | # Lesser General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Lesser General Public 23 | # License along with this library; if not, write to the Free Software 24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 25 | # 02110-1301 USA 26 | ######################### END LICENSE BLOCK ######################### 27 | 28 | from . import constants 29 | from .charsetprober import CharSetProber 30 | from .codingstatemachine import CodingStateMachine 31 | from .mbcssm import UTF8SMModel 32 | 33 | ONE_CHAR_PROB = 0.5 34 | 35 | 36 | class UTF8Prober(CharSetProber): 37 | def __init__(self): 38 | CharSetProber.__init__(self) 39 | self._mCodingSM = CodingStateMachine(UTF8SMModel) 40 | self.reset() 41 | 42 | def reset(self): 43 | CharSetProber.reset(self) 44 | self._mCodingSM.reset() 45 | self._mNumOfMBChar = 0 46 | 47 | def get_charset_name(self): 48 | return "utf-8" 49 | 50 | def feed(self, aBuf): 51 | for c in aBuf: 52 | codingState = self._mCodingSM.next_state(c) 53 | if codingState == constants.eError: 54 | self._mState = constants.eNotMe 55 | break 56 | elif codingState == constants.eItsMe: 57 | self._mState = constants.eFoundIt 58 | break 59 | elif codingState == constants.eStart: 60 | if self._mCodingSM.get_current_charlen() >= 2: 61 | self._mNumOfMBChar += 1 62 | 63 | if self.get_state() == constants.eDetecting: 64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD: 65 | self._mState = constants.eFoundIt 66 | 67 | return self.get_state() 68 | 69 | def get_confidence(self): 70 | unlike = 0.99 71 | if self._mNumOfMBChar < 6: 72 | for i in range(0, self._mNumOfMBChar): 73 | unlike = unlike * ONE_CHAR_PROB 74 | return 1.0 - unlike 75 | else: 76 | return unlike 77 | -------------------------------------------------------------------------------- /requests/packages/urllib3/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | urllib3 - Thread-safe connection pooling and re-using. 3 | """ 4 | 5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)' 6 | __license__ = 'MIT' 7 | __version__ = 'dev' 8 | 9 | 10 | from .connectionpool import ( 11 | HTTPConnectionPool, 12 | HTTPSConnectionPool, 13 | connection_from_url 14 | ) 15 | 16 | from . import exceptions 17 | from .filepost import encode_multipart_formdata 18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url 19 | from .response import HTTPResponse 20 | from .util.request import make_headers 21 | from .util.url import get_host 22 | from .util.timeout import Timeout 23 | from .util.retry import Retry 24 | 25 | 26 | # Set default logging handler to avoid "No handler found" warnings. 27 | import logging 28 | try: # Python 2.7+ 29 | from logging import NullHandler 30 | except ImportError: 31 | class NullHandler(logging.Handler): 32 | def emit(self, record): 33 | pass 34 | 35 | logging.getLogger(__name__).addHandler(NullHandler()) 36 | 37 | def add_stderr_logger(level=logging.DEBUG): 38 | """ 39 | Helper for quickly adding a StreamHandler to the logger. Useful for 40 | debugging. 41 | 42 | Returns the handler after adding it. 43 | """ 44 | # This method needs to be in this __init__.py to get the __name__ correct 45 | # even if urllib3 is vendored within another package. 46 | logger = logging.getLogger(__name__) 47 | handler = logging.StreamHandler() 48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s')) 49 | logger.addHandler(handler) 50 | logger.setLevel(level) 51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__) 52 | return handler 53 | 54 | # ... Clean up. 55 | del NullHandler 56 | 57 | 58 | # Set security warning to only go off once by default. 59 | import warnings 60 | warnings.simplefilter('always', exceptions.SecurityWarning) 61 | 62 | def disable_warnings(category=exceptions.HTTPWarning): 63 | """ 64 | Helper for quickly disabling all urllib3 warnings. 65 | """ 66 | warnings.simplefilter('ignore', category) 67 | -------------------------------------------------------------------------------- /requests/packages/urllib3/_collections.py: -------------------------------------------------------------------------------- 1 | from collections import Mapping, MutableMapping 2 | try: 3 | from threading import RLock 4 | except ImportError: # Platform-specific: No threads available 5 | class RLock: 6 | def __enter__(self): 7 | pass 8 | 9 | def __exit__(self, exc_type, exc_value, traceback): 10 | pass 11 | 12 | 13 | try: # Python 2.7+ 14 | from collections import OrderedDict 15 | except ImportError: 16 | from .packages.ordered_dict import OrderedDict 17 | from .packages.six import iterkeys, itervalues 18 | 19 | 20 | __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict'] 21 | 22 | 23 | _Null = object() 24 | 25 | 26 | class RecentlyUsedContainer(MutableMapping): 27 | """ 28 | Provides a thread-safe dict-like container which maintains up to 29 | ``maxsize`` keys while throwing away the least-recently-used keys beyond 30 | ``maxsize``. 31 | 32 | :param maxsize: 33 | Maximum number of recent elements to retain. 34 | 35 | :param dispose_func: 36 | Every time an item is evicted from the container, 37 | ``dispose_func(value)`` is called. Callback which will get called 38 | """ 39 | 40 | ContainerCls = OrderedDict 41 | 42 | def __init__(self, maxsize=10, dispose_func=None): 43 | self._maxsize = maxsize 44 | self.dispose_func = dispose_func 45 | 46 | self._container = self.ContainerCls() 47 | self.lock = RLock() 48 | 49 | def __getitem__(self, key): 50 | # Re-insert the item, moving it to the end of the eviction line. 51 | with self.lock: 52 | item = self._container.pop(key) 53 | self._container[key] = item 54 | return item 55 | 56 | def __setitem__(self, key, value): 57 | evicted_value = _Null 58 | with self.lock: 59 | # Possibly evict the existing value of 'key' 60 | evicted_value = self._container.get(key, _Null) 61 | self._container[key] = value 62 | 63 | # If we didn't evict an existing value, we might have to evict the 64 | # least recently used item from the beginning of the container. 65 | if len(self._container) > self._maxsize: 66 | _key, evicted_value = self._container.popitem(last=False) 67 | 68 | if self.dispose_func and evicted_value is not _Null: 69 | self.dispose_func(evicted_value) 70 | 71 | def __delitem__(self, key): 72 | with self.lock: 73 | value = self._container.pop(key) 74 | 75 | if self.dispose_func: 76 | self.dispose_func(value) 77 | 78 | def __len__(self): 79 | with self.lock: 80 | return len(self._container) 81 | 82 | def __iter__(self): 83 | raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.') 84 | 85 | def clear(self): 86 | with self.lock: 87 | # Copy pointers to all values, then wipe the mapping 88 | values = list(itervalues(self._container)) 89 | self._container.clear() 90 | 91 | if self.dispose_func: 92 | for value in values: 93 | self.dispose_func(value) 94 | 95 | def keys(self): 96 | with self.lock: 97 | return list(iterkeys(self._container)) 98 | 99 | 100 | class HTTPHeaderDict(MutableMapping): 101 | """ 102 | :param headers: 103 | An iterable of field-value pairs. Must not contain multiple field names 104 | when compared case-insensitively. 105 | 106 | :param kwargs: 107 | Additional field-value pairs to pass in to ``dict.update``. 108 | 109 | A ``dict`` like container for storing HTTP Headers. 110 | 111 | Field names are stored and compared case-insensitively in compliance with 112 | RFC 7230. Iteration provides the first case-sensitive key seen for each 113 | case-insensitive pair. 114 | 115 | Using ``__setitem__`` syntax overwrites fields that compare equal 116 | case-insensitively in order to maintain ``dict``'s api. For fields that 117 | compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add`` 118 | in a loop. 119 | 120 | If multiple fields that are equal case-insensitively are passed to the 121 | constructor or ``.update``, the behavior is undefined and some will be 122 | lost. 123 | 124 | >>> headers = HTTPHeaderDict() 125 | >>> headers.add('Set-Cookie', 'foo=bar') 126 | >>> headers.add('set-cookie', 'baz=quxx') 127 | >>> headers['content-length'] = '7' 128 | >>> headers['SET-cookie'] 129 | 'foo=bar, baz=quxx' 130 | >>> headers['Content-Length'] 131 | '7' 132 | 133 | If you want to access the raw headers with their original casing 134 | for debugging purposes you can access the private ``._data`` attribute 135 | which is a normal python ``dict`` that maps the case-insensitive key to a 136 | list of tuples stored as (case-sensitive-original-name, value). Using the 137 | structure from above as our example: 138 | 139 | >>> headers._data 140 | {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')], 141 | 'content-length': [('content-length', '7')]} 142 | """ 143 | 144 | def __init__(self, headers=None, **kwargs): 145 | self._data = {} 146 | if headers is None: 147 | headers = {} 148 | self.update(headers, **kwargs) 149 | 150 | def add(self, key, value): 151 | """Adds a (name, value) pair, doesn't overwrite the value if it already 152 | exists. 153 | 154 | >>> headers = HTTPHeaderDict(foo='bar') 155 | >>> headers.add('Foo', 'baz') 156 | >>> headers['foo'] 157 | 'bar, baz' 158 | """ 159 | self._data.setdefault(key.lower(), []).append((key, value)) 160 | 161 | def getlist(self, key): 162 | """Returns a list of all the values for the named field. Returns an 163 | empty list if the key doesn't exist.""" 164 | return self[key].split(', ') if key in self else [] 165 | 166 | def copy(self): 167 | h = HTTPHeaderDict() 168 | for key in self._data: 169 | for rawkey, value in self._data[key]: 170 | h.add(rawkey, value) 171 | return h 172 | 173 | def __eq__(self, other): 174 | if not isinstance(other, Mapping): 175 | return False 176 | other = HTTPHeaderDict(other) 177 | return dict((k1, self[k1]) for k1 in self._data) == \ 178 | dict((k2, other[k2]) for k2 in other._data) 179 | 180 | def __getitem__(self, key): 181 | values = self._data[key.lower()] 182 | return ', '.join(value[1] for value in values) 183 | 184 | def __setitem__(self, key, value): 185 | self._data[key.lower()] = [(key, value)] 186 | 187 | def __delitem__(self, key): 188 | del self._data[key.lower()] 189 | 190 | def __len__(self): 191 | return len(self._data) 192 | 193 | def __iter__(self): 194 | for headers in itervalues(self._data): 195 | yield headers[0][0] 196 | 197 | def __repr__(self): 198 | return '%s(%r)' % (self.__class__.__name__, dict(self.items())) 199 | -------------------------------------------------------------------------------- /requests/packages/urllib3/connection.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import sys 3 | import socket 4 | from socket import timeout as SocketTimeout 5 | import warnings 6 | from .packages import six 7 | 8 | try: # Python 3 9 | from http.client import HTTPConnection as _HTTPConnection, HTTPException 10 | except ImportError: 11 | from httplib import HTTPConnection as _HTTPConnection, HTTPException 12 | 13 | 14 | class DummyConnection(object): 15 | "Used to detect a failed ConnectionCls import." 16 | pass 17 | 18 | 19 | try: # Compiled with SSL? 20 | HTTPSConnection = DummyConnection 21 | import ssl 22 | BaseSSLError = ssl.SSLError 23 | except (ImportError, AttributeError): # Platform-specific: No SSL. 24 | ssl = None 25 | 26 | class BaseSSLError(BaseException): 27 | pass 28 | 29 | 30 | try: # Python 3: 31 | # Not a no-op, we're adding this to the namespace so it can be imported. 32 | ConnectionError = ConnectionError 33 | except NameError: # Python 2: 34 | class ConnectionError(Exception): 35 | pass 36 | 37 | 38 | from .exceptions import ( 39 | ConnectTimeoutError, 40 | SystemTimeWarning, 41 | SecurityWarning, 42 | ) 43 | from .packages.ssl_match_hostname import match_hostname 44 | 45 | from .util.ssl_ import ( 46 | resolve_cert_reqs, 47 | resolve_ssl_version, 48 | ssl_wrap_socket, 49 | assert_fingerprint, 50 | ) 51 | 52 | 53 | from .util import connection 54 | 55 | port_by_scheme = { 56 | 'http': 80, 57 | 'https': 443, 58 | } 59 | 60 | RECENT_DATE = datetime.date(2014, 1, 1) 61 | 62 | 63 | class HTTPConnection(_HTTPConnection, object): 64 | """ 65 | Based on httplib.HTTPConnection but provides an extra constructor 66 | backwards-compatibility layer between older and newer Pythons. 67 | 68 | Additional keyword parameters are used to configure attributes of the connection. 69 | Accepted parameters include: 70 | 71 | - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool` 72 | - ``source_address``: Set the source address for the current connection. 73 | 74 | .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x 75 | 76 | - ``socket_options``: Set specific options on the underlying socket. If not specified, then 77 | defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 78 | Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 79 | 80 | For example, if you wish to enable TCP Keep Alive in addition to the defaults, 81 | you might pass:: 82 | 83 | HTTPConnection.default_socket_options + [ 84 | (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 85 | ] 86 | 87 | Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 88 | """ 89 | 90 | default_port = port_by_scheme['http'] 91 | 92 | #: Disable Nagle's algorithm by default. 93 | #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 94 | default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)] 95 | 96 | #: Whether this connection verifies the host's certificate. 97 | is_verified = False 98 | 99 | def __init__(self, *args, **kw): 100 | if six.PY3: # Python 3 101 | kw.pop('strict', None) 102 | 103 | # Pre-set source_address in case we have an older Python like 2.6. 104 | self.source_address = kw.get('source_address') 105 | 106 | if sys.version_info < (2, 7): # Python 2.6 107 | # _HTTPConnection on Python 2.6 will balk at this keyword arg, but 108 | # not newer versions. We can still use it when creating a 109 | # connection though, so we pop it *after* we have saved it as 110 | # self.source_address. 111 | kw.pop('source_address', None) 112 | 113 | #: The socket options provided by the user. If no options are 114 | #: provided, we use the default options. 115 | self.socket_options = kw.pop('socket_options', self.default_socket_options) 116 | 117 | # Superclass also sets self.source_address in Python 2.7+. 118 | _HTTPConnection.__init__(self, *args, **kw) 119 | 120 | def _new_conn(self): 121 | """ Establish a socket connection and set nodelay settings on it. 122 | 123 | :return: New socket connection. 124 | """ 125 | extra_kw = {} 126 | if self.source_address: 127 | extra_kw['source_address'] = self.source_address 128 | 129 | if self.socket_options: 130 | extra_kw['socket_options'] = self.socket_options 131 | 132 | try: 133 | conn = connection.create_connection( 134 | (self.host, self.port), self.timeout, **extra_kw) 135 | 136 | except SocketTimeout: 137 | raise ConnectTimeoutError( 138 | self, "Connection to %s timed out. (connect timeout=%s)" % 139 | (self.host, self.timeout)) 140 | 141 | return conn 142 | 143 | def _prepare_conn(self, conn): 144 | self.sock = conn 145 | # the _tunnel_host attribute was added in python 2.6.3 (via 146 | # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do 147 | # not have them. 148 | if getattr(self, '_tunnel_host', None): 149 | # TODO: Fix tunnel so it doesn't depend on self.sock state. 150 | self._tunnel() 151 | # Mark this connection as not reusable 152 | self.auto_open = 0 153 | 154 | def connect(self): 155 | conn = self._new_conn() 156 | self._prepare_conn(conn) 157 | 158 | 159 | class HTTPSConnection(HTTPConnection): 160 | default_port = port_by_scheme['https'] 161 | 162 | def __init__(self, host, port=None, key_file=None, cert_file=None, 163 | strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw): 164 | 165 | HTTPConnection.__init__(self, host, port, strict=strict, 166 | timeout=timeout, **kw) 167 | 168 | self.key_file = key_file 169 | self.cert_file = cert_file 170 | 171 | # Required property for Google AppEngine 1.9.0 which otherwise causes 172 | # HTTPS requests to go out as HTTP. (See Issue #356) 173 | self._protocol = 'https' 174 | 175 | def connect(self): 176 | conn = self._new_conn() 177 | self._prepare_conn(conn) 178 | self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file) 179 | 180 | 181 | class VerifiedHTTPSConnection(HTTPSConnection): 182 | """ 183 | Based on httplib.HTTPSConnection but wraps the socket with 184 | SSL certification. 185 | """ 186 | cert_reqs = None 187 | ca_certs = None 188 | ssl_version = None 189 | assert_fingerprint = None 190 | 191 | def set_cert(self, key_file=None, cert_file=None, 192 | cert_reqs=None, ca_certs=None, 193 | assert_hostname=None, assert_fingerprint=None): 194 | 195 | self.key_file = key_file 196 | self.cert_file = cert_file 197 | self.cert_reqs = cert_reqs 198 | self.ca_certs = ca_certs 199 | self.assert_hostname = assert_hostname 200 | self.assert_fingerprint = assert_fingerprint 201 | 202 | def connect(self): 203 | # Add certificate verification 204 | conn = self._new_conn() 205 | 206 | resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs) 207 | resolved_ssl_version = resolve_ssl_version(self.ssl_version) 208 | 209 | hostname = self.host 210 | if getattr(self, '_tunnel_host', None): 211 | # _tunnel_host was added in Python 2.6.3 212 | # (See: http://hg.python.org/cpython/rev/0f57b30a152f) 213 | 214 | self.sock = conn 215 | # Calls self._set_hostport(), so self.host is 216 | # self._tunnel_host below. 217 | self._tunnel() 218 | # Mark this connection as not reusable 219 | self.auto_open = 0 220 | 221 | # Override the host with the one we're requesting data from. 222 | hostname = self._tunnel_host 223 | 224 | is_time_off = datetime.date.today() < RECENT_DATE 225 | if is_time_off: 226 | warnings.warn(( 227 | 'System time is way off (before {0}). This will probably ' 228 | 'lead to SSL verification errors').format(RECENT_DATE), 229 | SystemTimeWarning 230 | ) 231 | 232 | # Wrap socket using verification with the root certs in 233 | # trusted_root_certs 234 | self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file, 235 | cert_reqs=resolved_cert_reqs, 236 | ca_certs=self.ca_certs, 237 | server_hostname=hostname, 238 | ssl_version=resolved_ssl_version) 239 | 240 | if self.assert_fingerprint: 241 | assert_fingerprint(self.sock.getpeercert(binary_form=True), 242 | self.assert_fingerprint) 243 | elif resolved_cert_reqs != ssl.CERT_NONE \ 244 | and self.assert_hostname is not False: 245 | cert = self.sock.getpeercert() 246 | if not cert.get('subjectAltName', ()): 247 | warnings.warn(( 248 | 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. ' 249 | 'This feature is being removed by major browsers and deprecated by RFC 2818. ' 250 | '(See https://github.com/shazow/urllib3/issues/497 for details.)'), 251 | SecurityWarning 252 | ) 253 | match_hostname(cert, self.assert_hostname or hostname) 254 | 255 | self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED 256 | or self.assert_fingerprint is not None) 257 | 258 | 259 | if ssl: 260 | # Make a copy for testing. 261 | UnverifiedHTTPSConnection = HTTPSConnection 262 | HTTPSConnection = VerifiedHTTPSConnection 263 | -------------------------------------------------------------------------------- /requests/packages/urllib3/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ring04h/wytorproxy/0e61fd5cebd55231a915e5d633904582fddcf77f/requests/packages/urllib3/contrib/__init__.py -------------------------------------------------------------------------------- /requests/packages/urllib3/contrib/ntlmpool.py: -------------------------------------------------------------------------------- 1 | """ 2 | NTLM authenticating pool, contributed by erikcederstran 3 | 4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10 5 | """ 6 | 7 | try: 8 | from http.client import HTTPSConnection 9 | except ImportError: 10 | from httplib import HTTPSConnection 11 | from logging import getLogger 12 | from ntlm import ntlm 13 | 14 | from urllib3 import HTTPSConnectionPool 15 | 16 | 17 | log = getLogger(__name__) 18 | 19 | 20 | class NTLMConnectionPool(HTTPSConnectionPool): 21 | """ 22 | Implements an NTLM authentication version of an urllib3 connection pool 23 | """ 24 | 25 | scheme = 'https' 26 | 27 | def __init__(self, user, pw, authurl, *args, **kwargs): 28 | """ 29 | authurl is a random URL on the server that is protected by NTLM. 30 | user is the Windows user, probably in the DOMAIN\\username format. 31 | pw is the password for the user. 32 | """ 33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs) 34 | self.authurl = authurl 35 | self.rawuser = user 36 | user_parts = user.split('\\', 1) 37 | self.domain = user_parts[0].upper() 38 | self.user = user_parts[1] 39 | self.pw = pw 40 | 41 | def _new_conn(self): 42 | # Performs the NTLM handshake that secures the connection. The socket 43 | # must be kept open while requests are performed. 44 | self.num_connections += 1 45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' % 46 | (self.num_connections, self.host, self.authurl)) 47 | 48 | headers = {} 49 | headers['Connection'] = 'Keep-Alive' 50 | req_header = 'Authorization' 51 | resp_header = 'www-authenticate' 52 | 53 | conn = HTTPSConnection(host=self.host, port=self.port) 54 | 55 | # Send negotiation message 56 | headers[req_header] = ( 57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser)) 58 | log.debug('Request headers: %s' % headers) 59 | conn.request('GET', self.authurl, None, headers) 60 | res = conn.getresponse() 61 | reshdr = dict(res.getheaders()) 62 | log.debug('Response status: %s %s' % (res.status, res.reason)) 63 | log.debug('Response headers: %s' % reshdr) 64 | log.debug('Response data: %s [...]' % res.read(100)) 65 | 66 | # Remove the reference to the socket, so that it can not be closed by 67 | # the response object (we want to keep the socket open) 68 | res.fp = None 69 | 70 | # Server should respond with a challenge message 71 | auth_header_values = reshdr[resp_header].split(', ') 72 | auth_header_value = None 73 | for s in auth_header_values: 74 | if s[:5] == 'NTLM ': 75 | auth_header_value = s[5:] 76 | if auth_header_value is None: 77 | raise Exception('Unexpected %s response header: %s' % 78 | (resp_header, reshdr[resp_header])) 79 | 80 | # Send authentication message 81 | ServerChallenge, NegotiateFlags = \ 82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value) 83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge, 84 | self.user, 85 | self.domain, 86 | self.pw, 87 | NegotiateFlags) 88 | headers[req_header] = 'NTLM %s' % auth_msg 89 | log.debug('Request headers: %s' % headers) 90 | conn.request('GET', self.authurl, None, headers) 91 | res = conn.getresponse() 92 | log.debug('Response status: %s %s' % (res.status, res.reason)) 93 | log.debug('Response headers: %s' % dict(res.getheaders())) 94 | log.debug('Response data: %s [...]' % res.read()[:100]) 95 | if res.status != 200: 96 | if res.status == 401: 97 | raise Exception('Server rejected request: wrong ' 98 | 'username or password') 99 | raise Exception('Wrong server response: %s %s' % 100 | (res.status, res.reason)) 101 | 102 | res.fp = None 103 | log.debug('Connection established') 104 | return conn 105 | 106 | def urlopen(self, method, url, body=None, headers=None, retries=3, 107 | redirect=True, assert_same_host=True): 108 | if headers is None: 109 | headers = {} 110 | headers['Connection'] = 'Keep-Alive' 111 | return super(NTLMConnectionPool, self).urlopen(method, url, body, 112 | headers, retries, 113 | redirect, 114 | assert_same_host) 115 | -------------------------------------------------------------------------------- /requests/packages/urllib3/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | ## Base Exceptions 3 | 4 | class HTTPError(Exception): 5 | "Base exception used by this module." 6 | pass 7 | 8 | class HTTPWarning(Warning): 9 | "Base warning used by this module." 10 | pass 11 | 12 | 13 | 14 | class PoolError(HTTPError): 15 | "Base exception for errors caused within a pool." 16 | def __init__(self, pool, message): 17 | self.pool = pool 18 | HTTPError.__init__(self, "%s: %s" % (pool, message)) 19 | 20 | def __reduce__(self): 21 | # For pickling purposes. 22 | return self.__class__, (None, None) 23 | 24 | 25 | class RequestError(PoolError): 26 | "Base exception for PoolErrors that have associated URLs." 27 | def __init__(self, pool, url, message): 28 | self.url = url 29 | PoolError.__init__(self, pool, message) 30 | 31 | def __reduce__(self): 32 | # For pickling purposes. 33 | return self.__class__, (None, self.url, None) 34 | 35 | 36 | class SSLError(HTTPError): 37 | "Raised when SSL certificate fails in an HTTPS connection." 38 | pass 39 | 40 | 41 | class ProxyError(HTTPError): 42 | "Raised when the connection to a proxy fails." 43 | pass 44 | 45 | 46 | class DecodeError(HTTPError): 47 | "Raised when automatic decoding based on Content-Type fails." 48 | pass 49 | 50 | 51 | class ProtocolError(HTTPError): 52 | "Raised when something unexpected happens mid-request/response." 53 | pass 54 | 55 | 56 | #: Renamed to ProtocolError but aliased for backwards compatibility. 57 | ConnectionError = ProtocolError 58 | 59 | 60 | ## Leaf Exceptions 61 | 62 | class MaxRetryError(RequestError): 63 | """Raised when the maximum number of retries is exceeded. 64 | 65 | :param pool: The connection pool 66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool` 67 | :param string url: The requested Url 68 | :param exceptions.Exception reason: The underlying error 69 | 70 | """ 71 | 72 | def __init__(self, pool, url, reason=None): 73 | self.reason = reason 74 | 75 | message = "Max retries exceeded with url: %s (Caused by %r)" % ( 76 | url, reason) 77 | 78 | RequestError.__init__(self, pool, url, message) 79 | 80 | 81 | class HostChangedError(RequestError): 82 | "Raised when an existing pool gets a request for a foreign host." 83 | 84 | def __init__(self, pool, url, retries=3): 85 | message = "Tried to open a foreign host with url: %s" % url 86 | RequestError.__init__(self, pool, url, message) 87 | self.retries = retries 88 | 89 | 90 | class TimeoutStateError(HTTPError): 91 | """ Raised when passing an invalid state to a timeout """ 92 | pass 93 | 94 | 95 | class TimeoutError(HTTPError): 96 | """ Raised when a socket timeout error occurs. 97 | 98 | Catching this error will catch both :exc:`ReadTimeoutErrors 99 | ` and :exc:`ConnectTimeoutErrors `. 100 | """ 101 | pass 102 | 103 | 104 | class ReadTimeoutError(TimeoutError, RequestError): 105 | "Raised when a socket timeout occurs while receiving data from a server" 106 | pass 107 | 108 | 109 | # This timeout error does not have a URL attached and needs to inherit from the 110 | # base HTTPError 111 | class ConnectTimeoutError(TimeoutError): 112 | "Raised when a socket timeout occurs while connecting to a server" 113 | pass 114 | 115 | 116 | class EmptyPoolError(PoolError): 117 | "Raised when a pool runs out of connections and no more are allowed." 118 | pass 119 | 120 | 121 | class ClosedPoolError(PoolError): 122 | "Raised when a request enters a pool after the pool has been closed." 123 | pass 124 | 125 | 126 | class LocationValueError(ValueError, HTTPError): 127 | "Raised when there is something wrong with a given URL input." 128 | pass 129 | 130 | 131 | class LocationParseError(LocationValueError): 132 | "Raised when get_host or similar fails to parse the URL input." 133 | 134 | def __init__(self, location): 135 | message = "Failed to parse: %s" % location 136 | HTTPError.__init__(self, message) 137 | 138 | self.location = location 139 | 140 | 141 | class ResponseError(HTTPError): 142 | "Used as a container for an error reason supplied in a MaxRetryError." 143 | GENERIC_ERROR = 'too many error responses' 144 | SPECIFIC_ERROR = 'too many {status_code} error responses' 145 | 146 | 147 | class SecurityWarning(HTTPWarning): 148 | "Warned when perfoming security reducing actions" 149 | pass 150 | 151 | 152 | class InsecureRequestWarning(SecurityWarning): 153 | "Warned when making an unverified HTTPS request." 154 | pass 155 | 156 | 157 | class SystemTimeWarning(SecurityWarning): 158 | "Warned when system time is suspected to be wrong" 159 | pass 160 | -------------------------------------------------------------------------------- /requests/packages/urllib3/fields.py: -------------------------------------------------------------------------------- 1 | import email.utils 2 | import mimetypes 3 | 4 | from .packages import six 5 | 6 | 7 | def guess_content_type(filename, default='application/octet-stream'): 8 | """ 9 | Guess the "Content-Type" of a file. 10 | 11 | :param filename: 12 | The filename to guess the "Content-Type" of using :mod:`mimetypes`. 13 | :param default: 14 | If no "Content-Type" can be guessed, default to `default`. 15 | """ 16 | if filename: 17 | return mimetypes.guess_type(filename)[0] or default 18 | return default 19 | 20 | 21 | def format_header_param(name, value): 22 | """ 23 | Helper function to format and quote a single header parameter. 24 | 25 | Particularly useful for header parameters which might contain 26 | non-ASCII values, like file names. This follows RFC 2231, as 27 | suggested by RFC 2388 Section 4.4. 28 | 29 | :param name: 30 | The name of the parameter, a string expected to be ASCII only. 31 | :param value: 32 | The value of the parameter, provided as a unicode string. 33 | """ 34 | if not any(ch in value for ch in '"\\\r\n'): 35 | result = '%s="%s"' % (name, value) 36 | try: 37 | result.encode('ascii') 38 | except UnicodeEncodeError: 39 | pass 40 | else: 41 | return result 42 | if not six.PY3: # Python 2: 43 | value = value.encode('utf-8') 44 | value = email.utils.encode_rfc2231(value, 'utf-8') 45 | value = '%s*=%s' % (name, value) 46 | return value 47 | 48 | 49 | class RequestField(object): 50 | """ 51 | A data container for request body parameters. 52 | 53 | :param name: 54 | The name of this request field. 55 | :param data: 56 | The data/value body. 57 | :param filename: 58 | An optional filename of the request field. 59 | :param headers: 60 | An optional dict-like object of headers to initially use for the field. 61 | """ 62 | def __init__(self, name, data, filename=None, headers=None): 63 | self._name = name 64 | self._filename = filename 65 | self.data = data 66 | self.headers = {} 67 | if headers: 68 | self.headers = dict(headers) 69 | 70 | @classmethod 71 | def from_tuples(cls, fieldname, value): 72 | """ 73 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters. 74 | 75 | Supports constructing :class:`~urllib3.fields.RequestField` from 76 | parameter of key/value strings AND key/filetuple. A filetuple is a 77 | (filename, data, MIME type) tuple where the MIME type is optional. 78 | For example:: 79 | 80 | 'foo': 'bar', 81 | 'fakefile': ('foofile.txt', 'contents of foofile'), 82 | 'realfile': ('barfile.txt', open('realfile').read()), 83 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'), 84 | 'nonamefile': 'contents of nonamefile field', 85 | 86 | Field names and filenames must be unicode. 87 | """ 88 | if isinstance(value, tuple): 89 | if len(value) == 3: 90 | filename, data, content_type = value 91 | else: 92 | filename, data = value 93 | content_type = guess_content_type(filename) 94 | else: 95 | filename = None 96 | content_type = None 97 | data = value 98 | 99 | request_param = cls(fieldname, data, filename=filename) 100 | request_param.make_multipart(content_type=content_type) 101 | 102 | return request_param 103 | 104 | def _render_part(self, name, value): 105 | """ 106 | Overridable helper function to format a single header parameter. 107 | 108 | :param name: 109 | The name of the parameter, a string expected to be ASCII only. 110 | :param value: 111 | The value of the parameter, provided as a unicode string. 112 | """ 113 | return format_header_param(name, value) 114 | 115 | def _render_parts(self, header_parts): 116 | """ 117 | Helper function to format and quote a single header. 118 | 119 | Useful for single headers that are composed of multiple items. E.g., 120 | 'Content-Disposition' fields. 121 | 122 | :param header_parts: 123 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format 124 | as `k1="v1"; k2="v2"; ...`. 125 | """ 126 | parts = [] 127 | iterable = header_parts 128 | if isinstance(header_parts, dict): 129 | iterable = header_parts.items() 130 | 131 | for name, value in iterable: 132 | if value: 133 | parts.append(self._render_part(name, value)) 134 | 135 | return '; '.join(parts) 136 | 137 | def render_headers(self): 138 | """ 139 | Renders the headers for this request field. 140 | """ 141 | lines = [] 142 | 143 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location'] 144 | for sort_key in sort_keys: 145 | if self.headers.get(sort_key, False): 146 | lines.append('%s: %s' % (sort_key, self.headers[sort_key])) 147 | 148 | for header_name, header_value in self.headers.items(): 149 | if header_name not in sort_keys: 150 | if header_value: 151 | lines.append('%s: %s' % (header_name, header_value)) 152 | 153 | lines.append('\r\n') 154 | return '\r\n'.join(lines) 155 | 156 | def make_multipart(self, content_disposition=None, content_type=None, 157 | content_location=None): 158 | """ 159 | Makes this request field into a multipart request field. 160 | 161 | This method overrides "Content-Disposition", "Content-Type" and 162 | "Content-Location" headers to the request parameter. 163 | 164 | :param content_type: 165 | The 'Content-Type' of the request body. 166 | :param content_location: 167 | The 'Content-Location' of the request body. 168 | 169 | """ 170 | self.headers['Content-Disposition'] = content_disposition or 'form-data' 171 | self.headers['Content-Disposition'] += '; '.join([ 172 | '', self._render_parts( 173 | (('name', self._name), ('filename', self._filename)) 174 | ) 175 | ]) 176 | self.headers['Content-Type'] = content_type 177 | self.headers['Content-Location'] = content_location 178 | -------------------------------------------------------------------------------- /requests/packages/urllib3/filepost.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | 3 | from uuid import uuid4 4 | from io import BytesIO 5 | 6 | from .packages import six 7 | from .packages.six import b 8 | from .fields import RequestField 9 | 10 | writer = codecs.lookup('utf-8')[3] 11 | 12 | 13 | def choose_boundary(): 14 | """ 15 | Our embarassingly-simple replacement for mimetools.choose_boundary. 16 | """ 17 | return uuid4().hex 18 | 19 | 20 | def iter_field_objects(fields): 21 | """ 22 | Iterate over fields. 23 | 24 | Supports list of (k, v) tuples and dicts, and lists of 25 | :class:`~urllib3.fields.RequestField`. 26 | 27 | """ 28 | if isinstance(fields, dict): 29 | i = six.iteritems(fields) 30 | else: 31 | i = iter(fields) 32 | 33 | for field in i: 34 | if isinstance(field, RequestField): 35 | yield field 36 | else: 37 | yield RequestField.from_tuples(*field) 38 | 39 | 40 | def iter_fields(fields): 41 | """ 42 | .. deprecated:: 1.6 43 | 44 | Iterate over fields. 45 | 46 | The addition of :class:`~urllib3.fields.RequestField` makes this function 47 | obsolete. Instead, use :func:`iter_field_objects`, which returns 48 | :class:`~urllib3.fields.RequestField` objects. 49 | 50 | Supports list of (k, v) tuples and dicts. 51 | """ 52 | if isinstance(fields, dict): 53 | return ((k, v) for k, v in six.iteritems(fields)) 54 | 55 | return ((k, v) for k, v in fields) 56 | 57 | 58 | def encode_multipart_formdata(fields, boundary=None): 59 | """ 60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format. 61 | 62 | :param fields: 63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`). 64 | 65 | :param boundary: 66 | If not specified, then a random boundary will be generated using 67 | :func:`mimetools.choose_boundary`. 68 | """ 69 | body = BytesIO() 70 | if boundary is None: 71 | boundary = choose_boundary() 72 | 73 | for field in iter_field_objects(fields): 74 | body.write(b('--%s\r\n' % (boundary))) 75 | 76 | writer(body).write(field.render_headers()) 77 | data = field.data 78 | 79 | if isinstance(data, int): 80 | data = str(data) # Backwards compatibility 81 | 82 | if isinstance(data, six.text_type): 83 | writer(body).write(data) 84 | else: 85 | body.write(data) 86 | 87 | body.write(b'\r\n') 88 | 89 | body.write(b('--%s--\r\n' % (boundary))) 90 | 91 | content_type = str('multipart/form-data; boundary=%s' % boundary) 92 | 93 | return body.getvalue(), content_type 94 | -------------------------------------------------------------------------------- /requests/packages/urllib3/packages/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from . import ssl_match_hostname 4 | 5 | -------------------------------------------------------------------------------- /requests/packages/urllib3/packages/ordered_dict.py: -------------------------------------------------------------------------------- 1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. 2 | # Passes Python2.7's test suite and incorporates all the latest updates. 3 | # Copyright 2009 Raymond Hettinger, released under the MIT License. 4 | # http://code.activestate.com/recipes/576693/ 5 | try: 6 | from thread import get_ident as _get_ident 7 | except ImportError: 8 | from dummy_thread import get_ident as _get_ident 9 | 10 | try: 11 | from _abcoll import KeysView, ValuesView, ItemsView 12 | except ImportError: 13 | pass 14 | 15 | 16 | class OrderedDict(dict): 17 | 'Dictionary that remembers insertion order' 18 | # An inherited dict maps keys to values. 19 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 20 | # The remaining methods are order-aware. 21 | # Big-O running times for all methods are the same as for regular dictionaries. 22 | 23 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 24 | # The circular doubly linked list starts and ends with a sentinel element. 25 | # The sentinel element never gets deleted (this simplifies the algorithm). 26 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 27 | 28 | def __init__(self, *args, **kwds): 29 | '''Initialize an ordered dictionary. Signature is the same as for 30 | regular dictionaries, but keyword arguments are not recommended 31 | because their insertion order is arbitrary. 32 | 33 | ''' 34 | if len(args) > 1: 35 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 36 | try: 37 | self.__root 38 | except AttributeError: 39 | self.__root = root = [] # sentinel node 40 | root[:] = [root, root, None] 41 | self.__map = {} 42 | self.__update(*args, **kwds) 43 | 44 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 45 | 'od.__setitem__(i, y) <==> od[i]=y' 46 | # Setting a new item creates a new link which goes at the end of the linked 47 | # list, and the inherited dictionary is updated with the new key/value pair. 48 | if key not in self: 49 | root = self.__root 50 | last = root[0] 51 | last[1] = root[0] = self.__map[key] = [last, root, key] 52 | dict_setitem(self, key, value) 53 | 54 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 55 | 'od.__delitem__(y) <==> del od[y]' 56 | # Deleting an existing item uses self.__map to find the link which is 57 | # then removed by updating the links in the predecessor and successor nodes. 58 | dict_delitem(self, key) 59 | link_prev, link_next, key = self.__map.pop(key) 60 | link_prev[1] = link_next 61 | link_next[0] = link_prev 62 | 63 | def __iter__(self): 64 | 'od.__iter__() <==> iter(od)' 65 | root = self.__root 66 | curr = root[1] 67 | while curr is not root: 68 | yield curr[2] 69 | curr = curr[1] 70 | 71 | def __reversed__(self): 72 | 'od.__reversed__() <==> reversed(od)' 73 | root = self.__root 74 | curr = root[0] 75 | while curr is not root: 76 | yield curr[2] 77 | curr = curr[0] 78 | 79 | def clear(self): 80 | 'od.clear() -> None. Remove all items from od.' 81 | try: 82 | for node in self.__map.itervalues(): 83 | del node[:] 84 | root = self.__root 85 | root[:] = [root, root, None] 86 | self.__map.clear() 87 | except AttributeError: 88 | pass 89 | dict.clear(self) 90 | 91 | def popitem(self, last=True): 92 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 93 | Pairs are returned in LIFO order if last is true or FIFO order if false. 94 | 95 | ''' 96 | if not self: 97 | raise KeyError('dictionary is empty') 98 | root = self.__root 99 | if last: 100 | link = root[0] 101 | link_prev = link[0] 102 | link_prev[1] = root 103 | root[0] = link_prev 104 | else: 105 | link = root[1] 106 | link_next = link[1] 107 | root[1] = link_next 108 | link_next[0] = root 109 | key = link[2] 110 | del self.__map[key] 111 | value = dict.pop(self, key) 112 | return key, value 113 | 114 | # -- the following methods do not depend on the internal structure -- 115 | 116 | def keys(self): 117 | 'od.keys() -> list of keys in od' 118 | return list(self) 119 | 120 | def values(self): 121 | 'od.values() -> list of values in od' 122 | return [self[key] for key in self] 123 | 124 | def items(self): 125 | 'od.items() -> list of (key, value) pairs in od' 126 | return [(key, self[key]) for key in self] 127 | 128 | def iterkeys(self): 129 | 'od.iterkeys() -> an iterator over the keys in od' 130 | return iter(self) 131 | 132 | def itervalues(self): 133 | 'od.itervalues -> an iterator over the values in od' 134 | for k in self: 135 | yield self[k] 136 | 137 | def iteritems(self): 138 | 'od.iteritems -> an iterator over the (key, value) items in od' 139 | for k in self: 140 | yield (k, self[k]) 141 | 142 | def update(*args, **kwds): 143 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 144 | 145 | If E is a dict instance, does: for k in E: od[k] = E[k] 146 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 147 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 148 | In either case, this is followed by: for k, v in F.items(): od[k] = v 149 | 150 | ''' 151 | if len(args) > 2: 152 | raise TypeError('update() takes at most 2 positional ' 153 | 'arguments (%d given)' % (len(args),)) 154 | elif not args: 155 | raise TypeError('update() takes at least 1 argument (0 given)') 156 | self = args[0] 157 | # Make progressively weaker assumptions about "other" 158 | other = () 159 | if len(args) == 2: 160 | other = args[1] 161 | if isinstance(other, dict): 162 | for key in other: 163 | self[key] = other[key] 164 | elif hasattr(other, 'keys'): 165 | for key in other.keys(): 166 | self[key] = other[key] 167 | else: 168 | for key, value in other: 169 | self[key] = value 170 | for key, value in kwds.items(): 171 | self[key] = value 172 | 173 | __update = update # let subclasses override update without breaking __init__ 174 | 175 | __marker = object() 176 | 177 | def pop(self, key, default=__marker): 178 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 179 | If key is not found, d is returned if given, otherwise KeyError is raised. 180 | 181 | ''' 182 | if key in self: 183 | result = self[key] 184 | del self[key] 185 | return result 186 | if default is self.__marker: 187 | raise KeyError(key) 188 | return default 189 | 190 | def setdefault(self, key, default=None): 191 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 192 | if key in self: 193 | return self[key] 194 | self[key] = default 195 | return default 196 | 197 | def __repr__(self, _repr_running={}): 198 | 'od.__repr__() <==> repr(od)' 199 | call_key = id(self), _get_ident() 200 | if call_key in _repr_running: 201 | return '...' 202 | _repr_running[call_key] = 1 203 | try: 204 | if not self: 205 | return '%s()' % (self.__class__.__name__,) 206 | return '%s(%r)' % (self.__class__.__name__, self.items()) 207 | finally: 208 | del _repr_running[call_key] 209 | 210 | def __reduce__(self): 211 | 'Return state information for pickling' 212 | items = [[k, self[k]] for k in self] 213 | inst_dict = vars(self).copy() 214 | for k in vars(OrderedDict()): 215 | inst_dict.pop(k, None) 216 | if inst_dict: 217 | return (self.__class__, (items,), inst_dict) 218 | return self.__class__, (items,) 219 | 220 | def copy(self): 221 | 'od.copy() -> a shallow copy of od' 222 | return self.__class__(self) 223 | 224 | @classmethod 225 | def fromkeys(cls, iterable, value=None): 226 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 227 | and values equal to v (which defaults to None). 228 | 229 | ''' 230 | d = cls() 231 | for key in iterable: 232 | d[key] = value 233 | return d 234 | 235 | def __eq__(self, other): 236 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 237 | while comparison to a regular mapping is order-insensitive. 238 | 239 | ''' 240 | if isinstance(other, OrderedDict): 241 | return len(self)==len(other) and self.items() == other.items() 242 | return dict.__eq__(self, other) 243 | 244 | def __ne__(self, other): 245 | return not self == other 246 | 247 | # -- the following methods are only used in Python 2.7 -- 248 | 249 | def viewkeys(self): 250 | "od.viewkeys() -> a set-like object providing a view on od's keys" 251 | return KeysView(self) 252 | 253 | def viewvalues(self): 254 | "od.viewvalues() -> an object providing a view on od's values" 255 | return ValuesView(self) 256 | 257 | def viewitems(self): 258 | "od.viewitems() -> a set-like object providing a view on od's items" 259 | return ItemsView(self) 260 | -------------------------------------------------------------------------------- /requests/packages/urllib3/packages/ssl_match_hostname/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | # Python 3.2+ 3 | from ssl import CertificateError, match_hostname 4 | except ImportError: 5 | try: 6 | # Backport of the function from a pypi module 7 | from backports.ssl_match_hostname import CertificateError, match_hostname 8 | except ImportError: 9 | # Our vendored copy 10 | from ._implementation import CertificateError, match_hostname 11 | 12 | # Not needed, but documenting what we provide. 13 | __all__ = ('CertificateError', 'match_hostname') 14 | -------------------------------------------------------------------------------- /requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py: -------------------------------------------------------------------------------- 1 | """The match_hostname() function from Python 3.3.3, essential when using SSL.""" 2 | 3 | # Note: This file is under the PSF license as the code comes from the python 4 | # stdlib. http://docs.python.org/3/license.html 5 | 6 | import re 7 | 8 | __version__ = '3.4.0.2' 9 | 10 | class CertificateError(ValueError): 11 | pass 12 | 13 | 14 | def _dnsname_match(dn, hostname, max_wildcards=1): 15 | """Matching according to RFC 6125, section 6.4.3 16 | 17 | http://tools.ietf.org/html/rfc6125#section-6.4.3 18 | """ 19 | pats = [] 20 | if not dn: 21 | return False 22 | 23 | # Ported from python3-syntax: 24 | # leftmost, *remainder = dn.split(r'.') 25 | parts = dn.split(r'.') 26 | leftmost = parts[0] 27 | remainder = parts[1:] 28 | 29 | wildcards = leftmost.count('*') 30 | if wildcards > max_wildcards: 31 | # Issue #17980: avoid denials of service by refusing more 32 | # than one wildcard per fragment. A survey of established 33 | # policy among SSL implementations showed it to be a 34 | # reasonable choice. 35 | raise CertificateError( 36 | "too many wildcards in certificate DNS name: " + repr(dn)) 37 | 38 | # speed up common case w/o wildcards 39 | if not wildcards: 40 | return dn.lower() == hostname.lower() 41 | 42 | # RFC 6125, section 6.4.3, subitem 1. 43 | # The client SHOULD NOT attempt to match a presented identifier in which 44 | # the wildcard character comprises a label other than the left-most label. 45 | if leftmost == '*': 46 | # When '*' is a fragment by itself, it matches a non-empty dotless 47 | # fragment. 48 | pats.append('[^.]+') 49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'): 50 | # RFC 6125, section 6.4.3, subitem 3. 51 | # The client SHOULD NOT attempt to match a presented identifier 52 | # where the wildcard character is embedded within an A-label or 53 | # U-label of an internationalized domain name. 54 | pats.append(re.escape(leftmost)) 55 | else: 56 | # Otherwise, '*' matches any dotless string, e.g. www* 57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*')) 58 | 59 | # add the remaining fragments, ignore any wildcards 60 | for frag in remainder: 61 | pats.append(re.escape(frag)) 62 | 63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) 64 | return pat.match(hostname) 65 | 66 | 67 | def match_hostname(cert, hostname): 68 | """Verify that *cert* (in decoded format as returned by 69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 70 | rules are followed, but IP addresses are not accepted for *hostname*. 71 | 72 | CertificateError is raised on failure. On success, the function 73 | returns nothing. 74 | """ 75 | if not cert: 76 | raise ValueError("empty or no certificate") 77 | dnsnames = [] 78 | san = cert.get('subjectAltName', ()) 79 | for key, value in san: 80 | if key == 'DNS': 81 | if _dnsname_match(value, hostname): 82 | return 83 | dnsnames.append(value) 84 | if not dnsnames: 85 | # The subject is only checked when there is no dNSName entry 86 | # in subjectAltName 87 | for sub in cert.get('subject', ()): 88 | for key, value in sub: 89 | # XXX according to RFC 2818, the most specific Common Name 90 | # must be used. 91 | if key == 'commonName': 92 | if _dnsname_match(value, hostname): 93 | return 94 | dnsnames.append(value) 95 | if len(dnsnames) > 1: 96 | raise CertificateError("hostname %r " 97 | "doesn't match either of %s" 98 | % (hostname, ', '.join(map(repr, dnsnames)))) 99 | elif len(dnsnames) == 1: 100 | raise CertificateError("hostname %r " 101 | "doesn't match %r" 102 | % (hostname, dnsnames[0])) 103 | else: 104 | raise CertificateError("no appropriate commonName or " 105 | "subjectAltName fields were found") 106 | -------------------------------------------------------------------------------- /requests/packages/urllib3/request.py: -------------------------------------------------------------------------------- 1 | try: 2 | from urllib.parse import urlencode 3 | except ImportError: 4 | from urllib import urlencode 5 | 6 | from .filepost import encode_multipart_formdata 7 | 8 | 9 | __all__ = ['RequestMethods'] 10 | 11 | 12 | class RequestMethods(object): 13 | """ 14 | Convenience mixin for classes who implement a :meth:`urlopen` method, such 15 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and 16 | :class:`~urllib3.poolmanager.PoolManager`. 17 | 18 | Provides behavior for making common types of HTTP request methods and 19 | decides which type of request field encoding to use. 20 | 21 | Specifically, 22 | 23 | :meth:`.request_encode_url` is for sending requests whose fields are 24 | encoded in the URL (such as GET, HEAD, DELETE). 25 | 26 | :meth:`.request_encode_body` is for sending requests whose fields are 27 | encoded in the *body* of the request using multipart or www-form-urlencoded 28 | (such as for POST, PUT, PATCH). 29 | 30 | :meth:`.request` is for making any kind of request, it will look up the 31 | appropriate encoding format and use one of the above two methods to make 32 | the request. 33 | 34 | Initializer parameters: 35 | 36 | :param headers: 37 | Headers to include with all requests, unless other headers are given 38 | explicitly. 39 | """ 40 | 41 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS']) 42 | 43 | def __init__(self, headers=None): 44 | self.headers = headers or {} 45 | 46 | def urlopen(self, method, url, body=None, headers=None, 47 | encode_multipart=True, multipart_boundary=None, 48 | **kw): # Abstract 49 | raise NotImplemented("Classes extending RequestMethods must implement " 50 | "their own ``urlopen`` method.") 51 | 52 | def request(self, method, url, fields=None, headers=None, **urlopen_kw): 53 | """ 54 | Make a request using :meth:`urlopen` with the appropriate encoding of 55 | ``fields`` based on the ``method`` used. 56 | 57 | This is a convenience method that requires the least amount of manual 58 | effort. It can be used in most situations, while still having the 59 | option to drop down to more specific methods when necessary, such as 60 | :meth:`request_encode_url`, :meth:`request_encode_body`, 61 | or even the lowest level :meth:`urlopen`. 62 | """ 63 | method = method.upper() 64 | 65 | if method in self._encode_url_methods: 66 | return self.request_encode_url(method, url, fields=fields, 67 | headers=headers, 68 | **urlopen_kw) 69 | else: 70 | return self.request_encode_body(method, url, fields=fields, 71 | headers=headers, 72 | **urlopen_kw) 73 | 74 | def request_encode_url(self, method, url, fields=None, **urlopen_kw): 75 | """ 76 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 77 | the url. This is useful for request methods like GET, HEAD, DELETE, etc. 78 | """ 79 | if fields: 80 | url += '?' + urlencode(fields) 81 | return self.urlopen(method, url, **urlopen_kw) 82 | 83 | def request_encode_body(self, method, url, fields=None, headers=None, 84 | encode_multipart=True, multipart_boundary=None, 85 | **urlopen_kw): 86 | """ 87 | Make a request using :meth:`urlopen` with the ``fields`` encoded in 88 | the body. This is useful for request methods like POST, PUT, PATCH, etc. 89 | 90 | When ``encode_multipart=True`` (default), then 91 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode 92 | the payload with the appropriate content type. Otherwise 93 | :meth:`urllib.urlencode` is used with the 94 | 'application/x-www-form-urlencoded' content type. 95 | 96 | Multipart encoding must be used when posting files, and it's reasonably 97 | safe to use it in other times too. However, it may break request 98 | signing, such as with OAuth. 99 | 100 | Supports an optional ``fields`` parameter of key/value strings AND 101 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where 102 | the MIME type is optional. For example:: 103 | 104 | fields = { 105 | 'foo': 'bar', 106 | 'fakefile': ('foofile.txt', 'contents of foofile'), 107 | 'realfile': ('barfile.txt', open('realfile').read()), 108 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 109 | 'image/jpeg'), 110 | 'nonamefile': 'contents of nonamefile field', 111 | } 112 | 113 | When uploading a file, providing a filename (the first parameter of the 114 | tuple) is optional but recommended to best mimick behavior of browsers. 115 | 116 | Note that if ``headers`` are supplied, the 'Content-Type' header will 117 | be overwritten because it depends on the dynamic random boundary string 118 | which is used to compose the body of the request. The random boundary 119 | string can be explicitly set with the ``multipart_boundary`` parameter. 120 | """ 121 | if headers is None: 122 | headers = self.headers 123 | 124 | extra_kw = {'headers': {}} 125 | 126 | if fields: 127 | if 'body' in urlopen_kw: 128 | raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.') 129 | 130 | if encode_multipart: 131 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary) 132 | else: 133 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded' 134 | 135 | extra_kw['body'] = body 136 | extra_kw['headers'] = {'Content-Type': content_type} 137 | 138 | extra_kw['headers'].update(headers) 139 | extra_kw.update(urlopen_kw) 140 | 141 | return self.urlopen(method, url, **extra_kw) 142 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/__init__.py: -------------------------------------------------------------------------------- 1 | # For backwards compatibility, provide imports that used to be here. 2 | from .connection import is_connection_dropped 3 | from .request import make_headers 4 | from .response import is_fp_closed 5 | from .ssl_ import ( 6 | SSLContext, 7 | HAS_SNI, 8 | assert_fingerprint, 9 | resolve_cert_reqs, 10 | resolve_ssl_version, 11 | ssl_wrap_socket, 12 | ) 13 | from .timeout import ( 14 | current_time, 15 | Timeout, 16 | ) 17 | 18 | from .retry import Retry 19 | from .url import ( 20 | get_host, 21 | parse_url, 22 | split_first, 23 | Url, 24 | ) 25 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/connection.py: -------------------------------------------------------------------------------- 1 | import socket 2 | try: 3 | from select import poll, POLLIN 4 | except ImportError: # `poll` doesn't exist on OSX and other platforms 5 | poll = False 6 | try: 7 | from select import select 8 | except ImportError: # `select` doesn't exist on AppEngine. 9 | select = False 10 | 11 | 12 | def is_connection_dropped(conn): # Platform-specific 13 | """ 14 | Returns True if the connection is dropped and should be closed. 15 | 16 | :param conn: 17 | :class:`httplib.HTTPConnection` object. 18 | 19 | Note: For platforms like AppEngine, this will always return ``False`` to 20 | let the platform handle connection recycling transparently for us. 21 | """ 22 | sock = getattr(conn, 'sock', False) 23 | if sock is False: # Platform-specific: AppEngine 24 | return False 25 | if sock is None: # Connection already closed (such as by httplib). 26 | return True 27 | 28 | if not poll: 29 | if not select: # Platform-specific: AppEngine 30 | return False 31 | 32 | try: 33 | return select([sock], [], [], 0.0)[0] 34 | except socket.error: 35 | return True 36 | 37 | # This version is better on platforms that support it. 38 | p = poll() 39 | p.register(sock, POLLIN) 40 | for (fno, ev) in p.poll(0.0): 41 | if fno == sock.fileno(): 42 | # Either data is buffered (bad), or the connection is dropped. 43 | return True 44 | 45 | 46 | # This function is copied from socket.py in the Python 2.7 standard 47 | # library test suite. Added to its signature is only `socket_options`. 48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 49 | source_address=None, socket_options=None): 50 | """Connect to *address* and return the socket object. 51 | 52 | Convenience function. Connect to *address* (a 2-tuple ``(host, 53 | port)``) and return the socket object. Passing the optional 54 | *timeout* parameter will set the timeout on the socket instance 55 | before attempting to connect. If no *timeout* is supplied, the 56 | global default timeout setting returned by :func:`getdefaulttimeout` 57 | is used. If *source_address* is set it must be a tuple of (host, port) 58 | for the socket to bind as a source address before making the connection. 59 | An host of '' or port 0 tells the OS to use the default. 60 | """ 61 | 62 | host, port = address 63 | err = None 64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): 65 | af, socktype, proto, canonname, sa = res 66 | sock = None 67 | try: 68 | sock = socket.socket(af, socktype, proto) 69 | 70 | # If provided, set socket level options before connecting. 71 | # This is the only addition urllib3 makes to this function. 72 | _set_socket_options(sock, socket_options) 73 | 74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT: 75 | sock.settimeout(timeout) 76 | if source_address: 77 | sock.bind(source_address) 78 | sock.connect(sa) 79 | return sock 80 | 81 | except socket.error as _: 82 | err = _ 83 | if sock is not None: 84 | sock.close() 85 | 86 | if err is not None: 87 | raise err 88 | else: 89 | raise socket.error("getaddrinfo returns an empty list") 90 | 91 | 92 | def _set_socket_options(sock, options): 93 | if options is None: 94 | return 95 | 96 | for opt in options: 97 | sock.setsockopt(*opt) 98 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/request.py: -------------------------------------------------------------------------------- 1 | from base64 import b64encode 2 | 3 | from ..packages.six import b 4 | 5 | ACCEPT_ENCODING = 'gzip,deflate' 6 | 7 | 8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None, 9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None): 10 | """ 11 | Shortcuts for generating request headers. 12 | 13 | :param keep_alive: 14 | If ``True``, adds 'connection: keep-alive' header. 15 | 16 | :param accept_encoding: 17 | Can be a boolean, list, or string. 18 | ``True`` translates to 'gzip,deflate'. 19 | List will get joined by comma. 20 | String will be used as provided. 21 | 22 | :param user_agent: 23 | String representing the user-agent you want, such as 24 | "python-urllib3/0.6" 25 | 26 | :param basic_auth: 27 | Colon-separated username:password string for 'authorization: basic ...' 28 | auth header. 29 | 30 | :param proxy_basic_auth: 31 | Colon-separated username:password string for 'proxy-authorization: basic ...' 32 | auth header. 33 | 34 | :param disable_cache: 35 | If ``True``, adds 'cache-control: no-cache' header. 36 | 37 | Example:: 38 | 39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0") 40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'} 41 | >>> make_headers(accept_encoding=True) 42 | {'accept-encoding': 'gzip,deflate'} 43 | """ 44 | headers = {} 45 | if accept_encoding: 46 | if isinstance(accept_encoding, str): 47 | pass 48 | elif isinstance(accept_encoding, list): 49 | accept_encoding = ','.join(accept_encoding) 50 | else: 51 | accept_encoding = ACCEPT_ENCODING 52 | headers['accept-encoding'] = accept_encoding 53 | 54 | if user_agent: 55 | headers['user-agent'] = user_agent 56 | 57 | if keep_alive: 58 | headers['connection'] = 'keep-alive' 59 | 60 | if basic_auth: 61 | headers['authorization'] = 'Basic ' + \ 62 | b64encode(b(basic_auth)).decode('utf-8') 63 | 64 | if proxy_basic_auth: 65 | headers['proxy-authorization'] = 'Basic ' + \ 66 | b64encode(b(proxy_basic_auth)).decode('utf-8') 67 | 68 | if disable_cache: 69 | headers['cache-control'] = 'no-cache' 70 | 71 | return headers 72 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/response.py: -------------------------------------------------------------------------------- 1 | def is_fp_closed(obj): 2 | """ 3 | Checks whether a given file-like object is closed. 4 | 5 | :param obj: 6 | The file-like object to check. 7 | """ 8 | 9 | try: 10 | # Check via the official file-like-object way. 11 | return obj.closed 12 | except AttributeError: 13 | pass 14 | 15 | try: 16 | # Check if the object is a container for another file-like object that 17 | # gets released on exhaustion (e.g. HTTPResponse). 18 | return obj.fp is None 19 | except AttributeError: 20 | pass 21 | 22 | raise ValueError("Unable to determine whether fp is closed.") 23 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/ssl_.py: -------------------------------------------------------------------------------- 1 | from binascii import hexlify, unhexlify 2 | from hashlib import md5, sha1 3 | 4 | from ..exceptions import SSLError 5 | 6 | 7 | SSLContext = None 8 | HAS_SNI = False 9 | create_default_context = None 10 | 11 | import errno 12 | import ssl 13 | 14 | try: # Test for SSL features 15 | from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23 16 | from ssl import HAS_SNI # Has SNI? 17 | except ImportError: 18 | pass 19 | 20 | 21 | try: 22 | from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION 23 | except ImportError: 24 | OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000 25 | OP_NO_COMPRESSION = 0x20000 26 | 27 | try: 28 | from ssl import _DEFAULT_CIPHERS 29 | except ImportError: 30 | _DEFAULT_CIPHERS = ( 31 | 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:' 32 | 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:' 33 | 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5' 34 | ) 35 | 36 | try: 37 | from ssl import SSLContext # Modern SSL? 38 | except ImportError: 39 | import sys 40 | 41 | class SSLContext(object): # Platform-specific: Python 2 & 3.1 42 | supports_set_ciphers = sys.version_info >= (2, 7) 43 | 44 | def __init__(self, protocol_version): 45 | self.protocol = protocol_version 46 | # Use default values from a real SSLContext 47 | self.check_hostname = False 48 | self.verify_mode = ssl.CERT_NONE 49 | self.ca_certs = None 50 | self.options = 0 51 | self.certfile = None 52 | self.keyfile = None 53 | self.ciphers = None 54 | 55 | def load_cert_chain(self, certfile, keyfile): 56 | self.certfile = certfile 57 | self.keyfile = keyfile 58 | 59 | def load_verify_locations(self, location): 60 | self.ca_certs = location 61 | 62 | def set_ciphers(self, cipher_suite): 63 | if not self.supports_set_ciphers: 64 | raise TypeError( 65 | 'Your version of Python does not support setting ' 66 | 'a custom cipher suite. Please upgrade to Python ' 67 | '2.7, 3.2, or later if you need this functionality.' 68 | ) 69 | self.ciphers = cipher_suite 70 | 71 | def wrap_socket(self, socket, server_hostname=None): 72 | kwargs = { 73 | 'keyfile': self.keyfile, 74 | 'certfile': self.certfile, 75 | 'ca_certs': self.ca_certs, 76 | 'cert_reqs': self.verify_mode, 77 | 'ssl_version': self.protocol, 78 | } 79 | if self.supports_set_ciphers: # Platform-specific: Python 2.7+ 80 | return wrap_socket(socket, ciphers=self.ciphers, **kwargs) 81 | else: # Platform-specific: Python 2.6 82 | return wrap_socket(socket, **kwargs) 83 | 84 | 85 | def assert_fingerprint(cert, fingerprint): 86 | """ 87 | Checks if given fingerprint matches the supplied certificate. 88 | 89 | :param cert: 90 | Certificate as bytes object. 91 | :param fingerprint: 92 | Fingerprint as string of hexdigits, can be interspersed by colons. 93 | """ 94 | 95 | # Maps the length of a digest to a possible hash function producing 96 | # this digest. 97 | hashfunc_map = { 98 | 16: md5, 99 | 20: sha1 100 | } 101 | 102 | fingerprint = fingerprint.replace(':', '').lower() 103 | digest_length, odd = divmod(len(fingerprint), 2) 104 | 105 | if odd or digest_length not in hashfunc_map: 106 | raise SSLError('Fingerprint is of invalid length.') 107 | 108 | # We need encode() here for py32; works on py2 and p33. 109 | fingerprint_bytes = unhexlify(fingerprint.encode()) 110 | 111 | hashfunc = hashfunc_map[digest_length] 112 | 113 | cert_digest = hashfunc(cert).digest() 114 | 115 | if not cert_digest == fingerprint_bytes: 116 | raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".' 117 | .format(hexlify(fingerprint_bytes), 118 | hexlify(cert_digest))) 119 | 120 | 121 | def resolve_cert_reqs(candidate): 122 | """ 123 | Resolves the argument to a numeric constant, which can be passed to 124 | the wrap_socket function/method from the ssl module. 125 | Defaults to :data:`ssl.CERT_NONE`. 126 | If given a string it is assumed to be the name of the constant in the 127 | :mod:`ssl` module or its abbrevation. 128 | (So you can specify `REQUIRED` instead of `CERT_REQUIRED`. 129 | If it's neither `None` nor a string we assume it is already the numeric 130 | constant which can directly be passed to wrap_socket. 131 | """ 132 | if candidate is None: 133 | return CERT_NONE 134 | 135 | if isinstance(candidate, str): 136 | res = getattr(ssl, candidate, None) 137 | if res is None: 138 | res = getattr(ssl, 'CERT_' + candidate) 139 | return res 140 | 141 | return candidate 142 | 143 | 144 | def resolve_ssl_version(candidate): 145 | """ 146 | like resolve_cert_reqs 147 | """ 148 | if candidate is None: 149 | return PROTOCOL_SSLv23 150 | 151 | if isinstance(candidate, str): 152 | res = getattr(ssl, candidate, None) 153 | if res is None: 154 | res = getattr(ssl, 'PROTOCOL_' + candidate) 155 | return res 156 | 157 | return candidate 158 | 159 | 160 | def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED, 161 | options=None, ciphers=None): 162 | """All arguments have the same meaning as ``ssl_wrap_socket``. 163 | 164 | By default, this function does a lot of the same work that 165 | ``ssl.create_default_context`` does on Python 3.4+. It: 166 | 167 | - Disables SSLv2, SSLv3, and compression 168 | - Sets a restricted set of server ciphers 169 | 170 | If you wish to enable SSLv3, you can do:: 171 | 172 | from urllib3.util import ssl_ 173 | context = ssl_.create_urllib3_context() 174 | context.options &= ~ssl_.OP_NO_SSLv3 175 | 176 | You can do the same to enable compression (substituting ``COMPRESSION`` 177 | for ``SSLv3`` in the last line above). 178 | 179 | :param ssl_version: 180 | The desired protocol version to use. This will default to 181 | PROTOCOL_SSLv23 which will negotiate the highest protocol that both 182 | the server and your installation of OpenSSL support. 183 | :param cert_reqs: 184 | Whether to require the certificate verification. This defaults to 185 | ``ssl.CERT_REQUIRED``. 186 | :param options: 187 | Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``, 188 | ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``. 189 | :param ciphers: 190 | Which cipher suites to allow the server to select. 191 | :returns: 192 | Constructed SSLContext object with specified options 193 | :rtype: SSLContext 194 | """ 195 | context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23) 196 | 197 | if options is None: 198 | options = 0 199 | # SSLv2 is easily broken and is considered harmful and dangerous 200 | options |= OP_NO_SSLv2 201 | # SSLv3 has several problems and is now dangerous 202 | options |= OP_NO_SSLv3 203 | # Disable compression to prevent CRIME attacks for OpenSSL 1.0+ 204 | # (issue #309) 205 | options |= OP_NO_COMPRESSION 206 | 207 | context.options |= options 208 | 209 | if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6 210 | context.set_ciphers(ciphers or _DEFAULT_CIPHERS) 211 | 212 | context.verify_mode = cert_reqs 213 | if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2 214 | context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED) 215 | return context 216 | 217 | 218 | def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None, 219 | ca_certs=None, server_hostname=None, 220 | ssl_version=None, ciphers=None, ssl_context=None): 221 | """ 222 | All arguments except for server_hostname and ssl_context have the same 223 | meaning as they do when using :func:`ssl.wrap_socket`. 224 | 225 | :param server_hostname: 226 | When SNI is supported, the expected hostname of the certificate 227 | :param ssl_context: 228 | A pre-made :class:`SSLContext` object. If none is provided, one will 229 | be created using :func:`create_urllib3_context`. 230 | :param ciphers: 231 | A string of ciphers we wish the client to support. This is not 232 | supported on Python 2.6 as the ssl module does not support it. 233 | """ 234 | context = ssl_context 235 | if context is None: 236 | context = create_urllib3_context(ssl_version, cert_reqs, 237 | ciphers=ciphers) 238 | 239 | if ca_certs: 240 | try: 241 | context.load_verify_locations(ca_certs) 242 | except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2 243 | raise SSLError(e) 244 | # Py33 raises FileNotFoundError which subclasses OSError 245 | # These are not equivalent unless we check the errno attribute 246 | except OSError as e: # Platform-specific: Python 3.3 and beyond 247 | if e.errno == errno.ENOENT: 248 | raise SSLError(e) 249 | raise 250 | if certfile: 251 | context.load_cert_chain(certfile, keyfile) 252 | if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI 253 | return context.wrap_socket(sock, server_hostname=server_hostname) 254 | return context.wrap_socket(sock) 255 | -------------------------------------------------------------------------------- /requests/packages/urllib3/util/url.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | from ..exceptions import LocationParseError 4 | 5 | 6 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment'] 7 | 8 | 9 | class Url(namedtuple('Url', url_attrs)): 10 | """ 11 | Datastructure for representing an HTTP URL. Used as a return value for 12 | :func:`parse_url`. 13 | """ 14 | slots = () 15 | 16 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None, 17 | query=None, fragment=None): 18 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path, 19 | query, fragment) 20 | 21 | @property 22 | def hostname(self): 23 | """For backwards-compatibility with urlparse. We're nice like that.""" 24 | return self.host 25 | 26 | @property 27 | def request_uri(self): 28 | """Absolute path including the query string.""" 29 | uri = self.path or '/' 30 | 31 | if self.query is not None: 32 | uri += '?' + self.query 33 | 34 | return uri 35 | 36 | @property 37 | def netloc(self): 38 | """Network location including host and port""" 39 | if self.port: 40 | return '%s:%d' % (self.host, self.port) 41 | return self.host 42 | 43 | @property 44 | def url(self): 45 | """ 46 | Convert self into a url 47 | 48 | This function should more or less round-trip with :func:`.parse_url`. The 49 | returned url may not be exactly the same as the url inputted to 50 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls 51 | with a blank port will have : removed). 52 | 53 | Example: :: 54 | 55 | >>> U = parse_url('http://google.com/mail/') 56 | >>> U.url 57 | 'http://google.com/mail/' 58 | >>> Url('http', 'username:password', 'host.com', 80, 59 | ... '/path', 'query', 'fragment').url 60 | 'http://username:password@host.com:80/path?query#fragment' 61 | """ 62 | scheme, auth, host, port, path, query, fragment = self 63 | url = '' 64 | 65 | # We use "is not None" we want things to happen with empty strings (or 0 port) 66 | if scheme is not None: 67 | url += scheme + '://' 68 | if auth is not None: 69 | url += auth + '@' 70 | if host is not None: 71 | url += host 72 | if port is not None: 73 | url += ':' + str(port) 74 | if path is not None: 75 | url += path 76 | if query is not None: 77 | url += '?' + query 78 | if fragment is not None: 79 | url += '#' + fragment 80 | 81 | return url 82 | 83 | def __str__(self): 84 | return self.url 85 | 86 | def split_first(s, delims): 87 | """ 88 | Given a string and an iterable of delimiters, split on the first found 89 | delimiter. Return two split parts and the matched delimiter. 90 | 91 | If not found, then the first part is the full input string. 92 | 93 | Example:: 94 | 95 | >>> split_first('foo/bar?baz', '?/=') 96 | ('foo', 'bar?baz', '/') 97 | >>> split_first('foo/bar?baz', '123') 98 | ('foo/bar?baz', '', None) 99 | 100 | Scales linearly with number of delims. Not ideal for large number of delims. 101 | """ 102 | min_idx = None 103 | min_delim = None 104 | for d in delims: 105 | idx = s.find(d) 106 | if idx < 0: 107 | continue 108 | 109 | if min_idx is None or idx < min_idx: 110 | min_idx = idx 111 | min_delim = d 112 | 113 | if min_idx is None or min_idx < 0: 114 | return s, '', None 115 | 116 | return s[:min_idx], s[min_idx+1:], min_delim 117 | 118 | 119 | def parse_url(url): 120 | """ 121 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is 122 | performed to parse incomplete urls. Fields not provided will be None. 123 | 124 | Partly backwards-compatible with :mod:`urlparse`. 125 | 126 | Example:: 127 | 128 | >>> parse_url('http://google.com/mail/') 129 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...) 130 | >>> parse_url('google.com:80') 131 | Url(scheme=None, host='google.com', port=80, path=None, ...) 132 | >>> parse_url('/foo?bar') 133 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...) 134 | """ 135 | 136 | # While this code has overlap with stdlib's urlparse, it is much 137 | # simplified for our needs and less annoying. 138 | # Additionally, this implementations does silly things to be optimal 139 | # on CPython. 140 | 141 | if not url: 142 | # Empty 143 | return Url() 144 | 145 | scheme = None 146 | auth = None 147 | host = None 148 | port = None 149 | path = None 150 | fragment = None 151 | query = None 152 | 153 | # Scheme 154 | if '://' in url: 155 | scheme, url = url.split('://', 1) 156 | 157 | # Find the earliest Authority Terminator 158 | # (http://tools.ietf.org/html/rfc3986#section-3.2) 159 | url, path_, delim = split_first(url, ['/', '?', '#']) 160 | 161 | if delim: 162 | # Reassemble the path 163 | path = delim + path_ 164 | 165 | # Auth 166 | if '@' in url: 167 | # Last '@' denotes end of auth part 168 | auth, url = url.rsplit('@', 1) 169 | 170 | # IPv6 171 | if url and url[0] == '[': 172 | host, url = url.split(']', 1) 173 | host += ']' 174 | 175 | # Port 176 | if ':' in url: 177 | _host, port = url.split(':', 1) 178 | 179 | if not host: 180 | host = _host 181 | 182 | if port: 183 | # If given, ports must be integers. 184 | if not port.isdigit(): 185 | raise LocationParseError(url) 186 | port = int(port) 187 | else: 188 | # Blank ports are cool, too. (rfc3986#section-3.2.3) 189 | port = None 190 | 191 | elif not host and url: 192 | host = url 193 | 194 | if not path: 195 | return Url(scheme, auth, host, port, path, query, fragment) 196 | 197 | # Fragment 198 | if '#' in path: 199 | path, fragment = path.split('#', 1) 200 | 201 | # Query 202 | if '?' in path: 203 | path, query = path.split('?', 1) 204 | 205 | return Url(scheme, auth, host, port, path, query, fragment) 206 | 207 | def get_host(url): 208 | """ 209 | Deprecated. Use :func:`.parse_url` instead. 210 | """ 211 | p = parse_url(url) 212 | return p.scheme or 'http', p.hostname, p.port 213 | -------------------------------------------------------------------------------- /requests/status_codes.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .structures import LookupDict 4 | 5 | _codes = { 6 | 7 | # Informational. 8 | 100: ('continue',), 9 | 101: ('switching_protocols',), 10 | 102: ('processing',), 11 | 103: ('checkpoint',), 12 | 122: ('uri_too_long', 'request_uri_too_long'), 13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'), 14 | 201: ('created',), 15 | 202: ('accepted',), 16 | 203: ('non_authoritative_info', 'non_authoritative_information'), 17 | 204: ('no_content',), 18 | 205: ('reset_content', 'reset'), 19 | 206: ('partial_content', 'partial'), 20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'), 21 | 208: ('already_reported',), 22 | 226: ('im_used',), 23 | 24 | # Redirection. 25 | 300: ('multiple_choices',), 26 | 301: ('moved_permanently', 'moved', '\\o-'), 27 | 302: ('found',), 28 | 303: ('see_other', 'other'), 29 | 304: ('not_modified',), 30 | 305: ('use_proxy',), 31 | 306: ('switch_proxy',), 32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'), 33 | 308: ('permanent_redirect', 34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0 35 | 36 | # Client Error. 37 | 400: ('bad_request', 'bad'), 38 | 401: ('unauthorized',), 39 | 402: ('payment_required', 'payment'), 40 | 403: ('forbidden',), 41 | 404: ('not_found', '-o-'), 42 | 405: ('method_not_allowed', 'not_allowed'), 43 | 406: ('not_acceptable',), 44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'), 45 | 408: ('request_timeout', 'timeout'), 46 | 409: ('conflict',), 47 | 410: ('gone',), 48 | 411: ('length_required',), 49 | 412: ('precondition_failed', 'precondition'), 50 | 413: ('request_entity_too_large',), 51 | 414: ('request_uri_too_large',), 52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'), 53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'), 54 | 417: ('expectation_failed',), 55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'), 56 | 422: ('unprocessable_entity', 'unprocessable'), 57 | 423: ('locked',), 58 | 424: ('failed_dependency', 'dependency'), 59 | 425: ('unordered_collection', 'unordered'), 60 | 426: ('upgrade_required', 'upgrade'), 61 | 428: ('precondition_required', 'precondition'), 62 | 429: ('too_many_requests', 'too_many'), 63 | 431: ('header_fields_too_large', 'fields_too_large'), 64 | 444: ('no_response', 'none'), 65 | 449: ('retry_with', 'retry'), 66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'), 67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'), 68 | 499: ('client_closed_request',), 69 | 70 | # Server Error. 71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'), 72 | 501: ('not_implemented',), 73 | 502: ('bad_gateway',), 74 | 503: ('service_unavailable', 'unavailable'), 75 | 504: ('gateway_timeout',), 76 | 505: ('http_version_not_supported', 'http_version'), 77 | 506: ('variant_also_negotiates',), 78 | 507: ('insufficient_storage',), 79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'), 80 | 510: ('not_extended',), 81 | } 82 | 83 | codes = LookupDict(name='status_codes') 84 | 85 | for (code, titles) in list(_codes.items()): 86 | for title in titles: 87 | setattr(codes, title, code) 88 | if not title.startswith('\\'): 89 | setattr(codes, title.upper(), code) 90 | -------------------------------------------------------------------------------- /requests/structures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | requests.structures 5 | ~~~~~~~~~~~~~~~~~~~ 6 | 7 | Data structures that power Requests. 8 | 9 | """ 10 | 11 | import collections 12 | 13 | 14 | class CaseInsensitiveDict(collections.MutableMapping): 15 | """ 16 | A case-insensitive ``dict``-like object. 17 | 18 | Implements all methods and operations of 19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also 20 | provides ``lower_items``. 21 | 22 | All keys are expected to be strings. The structure remembers the 23 | case of the last key to be set, and ``iter(instance)``, 24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()`` 25 | will contain case-sensitive keys. However, querying and contains 26 | testing is case insensitive:: 27 | 28 | cid = CaseInsensitiveDict() 29 | cid['Accept'] = 'application/json' 30 | cid['aCCEPT'] == 'application/json' # True 31 | list(cid) == ['Accept'] # True 32 | 33 | For example, ``headers['content-encoding']`` will return the 34 | value of a ``'Content-Encoding'`` response header, regardless 35 | of how the header name was originally stored. 36 | 37 | If the constructor, ``.update``, or equality comparison 38 | operations are given keys that have equal ``.lower()``s, the 39 | behavior is undefined. 40 | 41 | """ 42 | def __init__(self, data=None, **kwargs): 43 | self._store = dict() 44 | if data is None: 45 | data = {} 46 | self.update(data, **kwargs) 47 | 48 | def __setitem__(self, key, value): 49 | # Use the lowercased key for lookups, but store the actual 50 | # key alongside the value. 51 | self._store[key.lower()] = (key, value) 52 | 53 | def __getitem__(self, key): 54 | return self._store[key.lower()][1] 55 | 56 | def __delitem__(self, key): 57 | del self._store[key.lower()] 58 | 59 | def __iter__(self): 60 | return (casedkey for casedkey, mappedvalue in self._store.values()) 61 | 62 | def __len__(self): 63 | return len(self._store) 64 | 65 | def lower_items(self): 66 | """Like iteritems(), but with all lowercase keys.""" 67 | return ( 68 | (lowerkey, keyval[1]) 69 | for (lowerkey, keyval) 70 | in self._store.items() 71 | ) 72 | 73 | def __eq__(self, other): 74 | if isinstance(other, collections.Mapping): 75 | other = CaseInsensitiveDict(other) 76 | else: 77 | return NotImplemented 78 | # Compare insensitively 79 | return dict(self.lower_items()) == dict(other.lower_items()) 80 | 81 | # Copy is required 82 | def copy(self): 83 | return CaseInsensitiveDict(self._store.values()) 84 | 85 | def __repr__(self): 86 | return str(dict(self.items())) 87 | 88 | class LookupDict(dict): 89 | """Dictionary lookup object.""" 90 | 91 | def __init__(self, name=None): 92 | self.name = name 93 | super(LookupDict, self).__init__() 94 | 95 | def __repr__(self): 96 | return '' % (self.name) 97 | 98 | def __getitem__(self, key): 99 | # We allow fall-through here, so values default to None 100 | 101 | return self.__dict__.get(key, None) 102 | 103 | def get(self, key, default=None): 104 | return self.__dict__.get(key, default) 105 | -------------------------------------------------------------------------------- /wytorproxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | # file: torproxy.py 4 | # mail: ringzero@0x557.org 5 | 6 | import sys 7 | import random 8 | import requests 9 | import json 10 | 11 | # 动态配置项 12 | retrycnt = 3 # 重试次数 13 | timeout = 10 # 超时时间 14 | 15 | # 动态使用代理，为空不使用，支持用户密码认证 16 | proxies = { 17 | # "http": "http://user:pass@10.10.1.10:3128/", 18 | # "https": "http://10.10.1.10:1080", 19 | "http": "http://127.0.0.1:8118", # TOR 洋葱路由器 20 | } 21 | result = {} 22 | 23 | # 随机生成User-Agent 24 | def random_useragent(): 25 | USER_AGENTS = [ 26 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 27 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)", 28 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", 29 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", 30 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", 31 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", 32 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)", 33 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)", 34 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6", 35 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1", 36 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0", 37 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5", 38 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6", 39 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", 40 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20", 41 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", 42 | ] 43 | return random.choice(USER_AGENTS) 44 | 45 | # 随机X-Forwarded-For，动态IP 46 | def random_x_forwarded_for(): 47 | return '%d.%d.%d.%d' % (random.randint(1, 254),random.randint(1, 254),random.randint(1, 254),random.randint(1, 254)) 48 | 49 | def http_request_get(url, body_content_workflow=0): 50 | trycnt = 0 51 | # cookies = dict(scan_worker='working', cookies_be='wscan.net') 52 | headers = { 53 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20', 54 | 'Referer' : url, 55 | 'Cookie': 'whoami=wytorproxy', 56 | } 57 | while True: 58 | try: 59 | if body_content_workflow == 1: 60 | result = requests.get(url, stream=True, headers=headers, timeout=timeout, proxies=proxies) 61 | return result 62 | else: 63 | result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies) 64 | return result 65 | except Exception, e: 66 | # print 'Exception: %s' % e 67 | trycnt += 1 68 | if trycnt >= retrycnt: 69 | # print 'retry overflow' 70 | return False 71 | 72 | def http_request_post(url, payload, body_content_workflow=0): 73 | ''' 74 | payload = {'key1': 'value1', 'key2': 'value2'} 75 | ''' 76 | trycnt = 0 77 | # cookies = dict(scan_worker='working', cookies_be='wscan.net') 78 | headers = { 79 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20', 80 | 'Referer' : url, 81 | 'Cookie': 'whoami=wytorproxy', 82 | } 83 | while True: 84 | try: 85 | if body_content_workflow == 1: 86 | result = requests.post(url, data=payload, headers=headers, stream=True, timeout=timeout, proxies=proxies) 87 | return result 88 | else: 89 | result = requests.post(url, data=payload, headers=headers, timeout=timeout, proxies=proxies) 90 | return result 91 | except Exception, e: 92 | # print 'Exception: %s' % e 93 | trycnt += 1 94 | if trycnt >= retrycnt: 95 | # print 'retry overflow' 96 | return False 97 | 98 | def check_website_status(url): 99 | result = http_request_get(url, body_content_workflow=1) 100 | if result == False: 101 | # 服务器宕机或者选项错误 102 | return {'status': False, 'info': 'server down or options error'} 103 | elif result.status_code != requests.codes.ok: 104 | # 返回值不等于200 105 | result_info = 'status_code: %s != 200' % result.status_code 106 | return {'status': False, 'info': result_info} 107 | else: 108 | # 返回正常 109 | return {'status': True, 'info': 'response ok'} 110 | 111 | print http_request_get('http://ip.taobao.com/service/getIpInfo2.php?ip=myip').text 112 | 113 | 114 | --------------------------------------------------------------------------------