├── README.md
├── pytorctl
├── GeoIPSupport.py
├── LICENSE
├── PathSupport.py
├── README
├── SQLSupport.py
├── ScanSupport.py
├── StatsSupport.py
├── TorCtl.py
├── TorUtil.py
├── __init__.py
└── example.py
├── requests
├── __init__.py
├── adapters.py
├── api.py
├── auth.py
├── cacert.pem
├── certs.py
├── compat.py
├── cookies.py
├── exceptions.py
├── hooks.py
├── models.py
├── packages
│ ├── __init__.py
│ ├── chardet
│ │ ├── __init__.py
│ │ ├── big5freq.py
│ │ ├── big5prober.py
│ │ ├── chardetect.py
│ │ ├── chardistribution.py
│ │ ├── charsetgroupprober.py
│ │ ├── charsetprober.py
│ │ ├── codingstatemachine.py
│ │ ├── compat.py
│ │ ├── constants.py
│ │ ├── cp949prober.py
│ │ ├── escprober.py
│ │ ├── escsm.py
│ │ ├── eucjpprober.py
│ │ ├── euckrfreq.py
│ │ ├── euckrprober.py
│ │ ├── euctwfreq.py
│ │ ├── euctwprober.py
│ │ ├── gb2312freq.py
│ │ ├── gb2312prober.py
│ │ ├── hebrewprober.py
│ │ ├── jisfreq.py
│ │ ├── jpcntx.py
│ │ ├── langbulgarianmodel.py
│ │ ├── langcyrillicmodel.py
│ │ ├── langgreekmodel.py
│ │ ├── langhebrewmodel.py
│ │ ├── langhungarianmodel.py
│ │ ├── langthaimodel.py
│ │ ├── latin1prober.py
│ │ ├── mbcharsetprober.py
│ │ ├── mbcsgroupprober.py
│ │ ├── mbcssm.py
│ │ ├── sbcharsetprober.py
│ │ ├── sbcsgroupprober.py
│ │ ├── sjisprober.py
│ │ ├── universaldetector.py
│ │ └── utf8prober.py
│ └── urllib3
│ │ ├── __init__.py
│ │ ├── _collections.py
│ │ ├── connection.py
│ │ ├── connectionpool.py
│ │ ├── contrib
│ │ ├── __init__.py
│ │ ├── ntlmpool.py
│ │ └── pyopenssl.py
│ │ ├── exceptions.py
│ │ ├── fields.py
│ │ ├── filepost.py
│ │ ├── packages
│ │ ├── __init__.py
│ │ ├── ordered_dict.py
│ │ ├── six.py
│ │ └── ssl_match_hostname
│ │ │ ├── __init__.py
│ │ │ └── _implementation.py
│ │ ├── poolmanager.py
│ │ ├── request.py
│ │ ├── response.py
│ │ └── util
│ │ ├── __init__.py
│ │ ├── connection.py
│ │ ├── request.py
│ │ ├── response.py
│ │ ├── retry.py
│ │ ├── ssl_.py
│ │ ├── timeout.py
│ │ └── url.py
├── sessions.py
├── status_codes.py
├── structures.py
└── utils.py
└── wytorproxy.py
/README.md:
--------------------------------------------------------------------------------
1 | # wytorproxy
2 | wyscan tor proxy lib & help doc
3 | (当你需要更多的IP,防止服务被屏蔽,写一个tor的控制脚本,就能每分钟刷新一下tor洋葱池的IP)
4 |
5 | BUG反馈
6 | -----------------------------------
7 | > 微博:http://weibo.com/ringzero
8 | > 邮箱:ringzero@0x557.org
9 |
10 | #### 使用实例
11 | > [root@10-8-11-221 wytorproxy]# python wytorproxy.py
12 | > {"code":0,"data":{"country":"美国","country_id":"US","area":"","area_id":"","region":"","region_id":"","city":"","city_id":"","county":"","county_id":"","isp":"","isp_id":"","ip":"96.47.226.20"}}
13 |
14 | 安装配置tor & privoxy
15 | -----------------------------------
16 | ### 安装tor
17 | yum -y install libevent libevent-devel libpcap-devel openssl-devel
18 | wget http://tor.hermetix.org/dist/tor-0.2.1.25.tar.gz
19 | tar zvxf tor-0.2.1.25.tar.gz
20 | cd tor-0.2.1.25
21 | ./configure & make & make install
22 | /* 后台启动 */
23 | nohup tor &
24 |
25 | ### 安装 privoxy 并配置与tor连接
26 | yum -y install privoxy
27 | /* 配置privoxy连接tor路由 */
28 | echo 'forward-socks5 / 127.0.0.1:9050 .' >> /etc/privoxy/config
29 | /* 启动privoxy服务 */
30 | service privoxy start
31 |
32 | 使用TOR代理
33 | -----------------------------------
34 | > 127.0.0.1:8118
35 | > 此时127.0.0.1的8118端口就接入tor的网络,享用匿名IP服务了
36 |
37 | ### 使用iptables做NAT转换,映射到外网IP上供更多的服务器使用
38 | (如果你想在外网使用的话,下面是实现方法)
39 |
40 | sed -i '/net.ipv4.ip_forward/ s/\(.*= \).*/\11/' /etc/sysctl.conf
41 | sysctl -p
42 | iptables -t nat -A PREROUTING -p tcp -i eth0 --dport 8778 -j DNAT --to 127.0.0.1:8118
43 | iptables -t nat -A POSTROUTING -j MASQUERADE
44 | service iptables save
45 | service iptables restart
46 |
--------------------------------------------------------------------------------
/pytorctl/GeoIPSupport.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # Copyright 2007 Johannes Renner and Mike Perry. See LICENSE file.
3 |
4 | import struct
5 | import socket
6 | import TorCtl
7 | import StatsSupport
8 |
9 | from TorUtil import plog
10 | try:
11 | import GeoIP
12 | # GeoIP data object: choose database here
13 | geoip = GeoIP.new(GeoIP.GEOIP_STANDARD)
14 | #geoip = GeoIP.open("./GeoLiteCity.dat", GeoIP.GEOIP_STANDARD)
15 | except:
16 | plog("NOTICE", "No GeoIP library. GeoIPSupport.py will not work correctly")
17 | # XXX: How do we bail entirely..
18 |
19 |
20 | class Continent:
21 | """ Continent class: The group attribute is to partition the continents
22 | in groups, to determine the number of ocean crossings """
23 | def __init__(self, continent_code):
24 | self.code = continent_code
25 | self.group = None
26 | self.countries = []
27 |
28 | def contains(self, country_code):
29 | return country_code in self.countries
30 |
31 | # Set countries to continents
32 | africa = Continent("AF")
33 | africa.group = 1
34 | africa.countries = ["AO","BF","BI","BJ","BV","BW","CD","CF","CG","CI","CM",
35 | "CV","DJ","DZ","EG","EH","ER","ET","GA","GH","GM","GN","GQ","GW","HM","KE",
36 | "KM","LR","LS","LY","MA","MG","ML","MR","MU","MW","MZ","NA","NE","NG","RE",
37 | "RW","SC","SD","SH","SL","SN","SO","ST","SZ","TD","TF","TG","TN","TZ","UG",
38 | "YT","ZA","ZM","ZR","ZW"]
39 |
40 | asia = Continent("AS")
41 | asia.group = 1
42 | asia.countries = ["AP","AE","AF","AM","AZ","BD","BH","BN","BT","CC","CN","CX",
43 | "CY","GE","HK","ID","IL","IN","IO","IQ","IR","JO","JP","KG","KH","KP","KR",
44 | "KW","KZ","LA","LB","LK","MM","MN","MO","MV","MY","NP","OM","PH","PK","PS",
45 | "QA","RU","SA","SG","SY","TH","TJ","TM","TP","TR","TW","UZ","VN","YE"]
46 |
47 | europe = Continent("EU")
48 | europe.group = 1
49 | europe.countries = ["EU","AD","AL","AT","BA","BE","BG","BY","CH","CZ","DE",
50 | "DK","EE","ES","FI","FO","FR","FX","GB","GI","GR","HR","HU","IE","IS","IT",
51 | "LI","LT","LU","LV","MC","MD","MK","MT","NL","NO","PL","PT","RO","SE","SI",
52 | "SJ","SK","SM","UA","VA","YU"]
53 |
54 | oceania = Continent("OC")
55 | oceania.group = 2
56 | oceania.countries = ["AS","AU","CK","FJ","FM","GU","KI","MH","MP","NC","NF",
57 | "NR","NU","NZ","PF","PG","PN","PW","SB","TK","TO","TV","UM","VU","WF","WS"]
58 |
59 | north_america = Continent("NA")
60 | north_america.group = 0
61 | north_america.countries = ["CA","MX","US"]
62 |
63 | south_america = Continent("SA")
64 | south_america.group = 0
65 | south_america.countries = ["AG","AI","AN","AR","AW","BB","BM","BO","BR","BS",
66 | "BZ","CL","CO","CR","CU","DM","DO","EC","FK","GD","GF","GL","GP","GS","GT",
67 | "GY","HN","HT","JM","KN","KY","LC","MQ","MS","NI","PA","PE","PM","PR","PY",
68 | "SA","SR","SV","TC","TT","UY","VC","VE","VG","VI"]
69 |
70 | # List of continents
71 | continents = [africa, asia, europe, north_america, oceania, south_america]
72 |
73 | def get_continent(country_code):
74 | """ Perform country -- continent mapping """
75 | for c in continents:
76 | if c.contains(country_code):
77 | return c
78 | plog("INFO", country_code + " is not on any continent")
79 | return None
80 |
81 | def get_country(ip):
82 | """ Get the country via the library """
83 | return geoip.country_code_by_addr(ip)
84 |
85 | def get_country_from_record(ip):
86 | """ Get the country code out of a GeoLiteCity record (not used) """
87 | record = geoip.record_by_addr(ip)
88 | if record != None:
89 | return record['country_code']
90 |
91 | class GeoIPRouter(TorCtl.Router):
92 | # TODO: Its really shitty that this has to be a TorCtl.Router
93 | # and can't be a StatsRouter..
94 | """ Router class extended to GeoIP """
95 | def __init__(self, router):
96 | self.__dict__ = router.__dict__
97 | self.country_code = get_country(self.get_ip_dotted())
98 | if self.country_code != None:
99 | c = get_continent(self.country_code)
100 | if c != None:
101 | self.continent = c.code
102 | self.cont_group = c.group
103 | else:
104 | plog("INFO", self.nickname + ": Country code not found")
105 | self.continent = None
106 |
107 | def get_ip_dotted(self):
108 | """ Convert long int back to dotted quad string """
109 | return socket.inet_ntoa(struct.pack('>I', self.ip))
110 |
111 | class GeoIPConfig:
112 | """ Class to configure GeoIP-based path building """
113 | def __init__(self, unique_countries=None, continent_crossings=4,
114 | ocean_crossings=None, entry_country=None, middle_country=None,
115 | exit_country=None, excludes=None):
116 | # TODO: Somehow ensure validity of a configuration:
117 | # - continent_crossings >= ocean_crossings
118 | # - unique_countries=False --> continent_crossings!=None
119 | # - echelon? set entry_country to source and exit_country to None
120 |
121 | # Do not use a country twice in a route
122 | # [True --> unique, False --> same or None --> pass]
123 | self.unique_countries = unique_countries
124 |
125 | # Configure max continent crossings in one path
126 | # [integer number 0-n or None --> ContinentJumper/UniqueContinent]
127 | self.continent_crossings = continent_crossings
128 | self.ocean_crossings = ocean_crossings
129 |
130 | # Try to find an exit node in the destination country
131 | # use exit_country as backup, if country cannot not be found
132 | self.echelon = False
133 |
134 | # Specify countries for positions [single country code or None]
135 | self.entry_country = entry_country
136 | self.middle_country = middle_country
137 | self.exit_country = exit_country
138 |
139 | # List of countries not to use in routes
140 | # [(empty) list of country codes or None]
141 | self.excludes = excludes
142 |
--------------------------------------------------------------------------------
/pytorctl/LICENSE:
--------------------------------------------------------------------------------
1 | ===============================================================================
2 | The Python Tor controller code is distributed under this license:
3 |
4 | Copyright 2005, Nick Mathewson, Roger Dingledine
5 | Copyright 2007-2010, Mike Perry
6 |
7 | Redistribution and use in source and binary forms, with or without
8 | modification, are permitted provided that the following conditions are
9 | met:
10 |
11 | * Redistributions of source code must retain the above copyright
12 | notice, this list of conditions and the following disclaimer.
13 |
14 | * Redistributions in binary form must reproduce the above
15 | copyright notice, this list of conditions and the following disclaimer
16 | in the documentation and/or other materials provided with the
17 | distribution.
18 |
19 | * Neither the names of the copyright owners nor the names of its
20 | contributors may be used to endorse or promote products derived from
21 | this software without specific prior written permission.
22 |
23 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 |
--------------------------------------------------------------------------------
/pytorctl/README:
--------------------------------------------------------------------------------
1 | Note: TorCtl is mostly unmaintained. It serves primarily as the support
2 | library for the Bandwidth Authorities, Exit Scanner, and other projects in
3 | TorFlow. For more actively maintained python libraries, you may consider using
4 | Stem or TxTorCon. See:
5 | https://stem.torproject.org/ and https://github.com/meejah/txtorcon
6 |
7 |
8 |
9 |
10 | TorCtl Python Bindings
11 |
12 |
13 | TorCtl is a python Tor controller with extensions to support path
14 | building and various constraints on node and path selection, as well as
15 | statistics gathering.
16 |
17 | Apps can hook into the TorCtl package at whatever level they wish.
18 |
19 | The lowest level of interaction is to use the TorCtl module
20 | (TorCtl/TorCtl.py). Typically this is done by importing TorCtl.TorCtl
21 | and creating a TorCtl.Connection and extending from TorCtl.EventHandler.
22 | This class receives Tor controller events packaged into python classes
23 | from a TorCtl.Connection.
24 |
25 | The next level up is to use the TorCtl.PathSupport module. This is done
26 | by importing TorCtl.PathSupport and instantiating or extending from
27 | PathSupport.PathBuilder, which itself extends from TorCtl.EventHandler.
28 | This class handles circuit construction and stream attachment subject to
29 | policies defined by PathSupport.NodeRestrictor and
30 | PathSupport.PathRestrictor implementations.
31 |
32 | If you are interested in gathering statistics, you can instead
33 | instantiate or extend from StatsSupport.StatsHandler, which is
34 | again an event handler with hooks to record statistics on circuit
35 | creation, stream bandwidth, and circuit failure information.
36 |
37 | All of these modules are pydoced. For more detailed information than
38 | the above overview, you can do:
39 |
40 | # pydoc TorCtl
41 | # pydoc PathSupport
42 | # pydoc StatsSupport
43 |
44 | There is a minimalistic example of usage of the basic TorCtl.Connection
45 | and TorCtl.EventHandler in run_example() in TorCtl.py in this directory.
46 | Other components also have unit tests at the end of their source files.
47 |
48 | For more extensive examples of the PathSupport and StatsSupport
49 | interfaces, see the TorFlow project at git url:
50 |
51 | git clone git://git.torproject.org/git/torflow.git
52 |
--------------------------------------------------------------------------------
/pytorctl/ScanSupport.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # Copyright 2009-2010 Mike Perry. See LICENSE file.
3 | import PathSupport
4 | import threading
5 | import copy
6 | import time
7 | import shutil
8 | import TorCtl
9 |
10 | from TorUtil import plog
11 |
12 | SQLSupport = None
13 |
14 | # Note: be careful writing functions for this class. Remember that
15 | # the PathBuilder has its own thread that it recieves events on
16 | # independent from your thread that calls into here.
17 | class ScanHandler(PathSupport.PathBuilder):
18 | def set_pct_rstr(self, percent_skip, percent_fast):
19 | def notlambda(sm):
20 | sm.percent_fast=percent_fast
21 | sm.percent_skip=percent_skip
22 | self.schedule_selmgr(notlambda)
23 |
24 | def reset_stats(self):
25 | def notlambda(this):
26 | this.reset()
27 | self.schedule_low_prio(notlambda)
28 |
29 | def commit(self):
30 | plog("INFO", "Scanner committing jobs...")
31 | cond = threading.Condition()
32 | def notlambda2(this):
33 | cond.acquire()
34 | this.run_all_jobs = False
35 | plog("INFO", "Commit done.")
36 | cond.notify()
37 | cond.release()
38 |
39 | def notlambda1(this):
40 | plog("INFO", "Committing jobs...")
41 | this.run_all_jobs = True
42 | self.schedule_low_prio(notlambda2)
43 |
44 | cond.acquire()
45 | self.schedule_immediate(notlambda1)
46 |
47 | cond.wait()
48 | cond.release()
49 | plog("INFO", "Scanner commit done.")
50 |
51 | def close_circuits(self):
52 | cond = threading.Condition()
53 | def notlambda(this):
54 | cond.acquire()
55 | this.close_all_circuits()
56 | cond.notify()
57 | cond.release()
58 | cond.acquire()
59 | self.schedule_low_prio(notlambda)
60 | cond.wait()
61 | cond.release()
62 |
63 | def close_streams(self, reason):
64 | cond = threading.Condition()
65 | plog("NOTICE", "Wedged Tor stream. Closing all streams")
66 | def notlambda(this):
67 | cond.acquire()
68 | this.close_all_streams(reason)
69 | cond.notify()
70 | cond.release()
71 | cond.acquire()
72 | self.schedule_low_prio(notlambda)
73 | cond.wait()
74 | cond.release()
75 |
76 | def new_exit(self):
77 | cond = threading.Condition()
78 | def notlambda(this):
79 | cond.acquire()
80 | this.new_nym = True
81 | if this.selmgr.bad_restrictions:
82 | plog("NOTICE", "Clearing bad restrictions with reconfigure..")
83 | this.selmgr.reconfigure(this.current_consensus())
84 | lines = this.c.sendAndRecv("SIGNAL CLEARDNSCACHE\r\n")
85 | for _,msg,more in lines:
86 | plog("DEBUG", msg)
87 | cond.notify()
88 | cond.release()
89 | cond.acquire()
90 | self.schedule_low_prio(notlambda)
91 | cond.wait()
92 | cond.release()
93 |
94 | def idhex_to_r(self, idhex):
95 | cond = threading.Condition()
96 | def notlambda(this):
97 | cond.acquire()
98 | if idhex in self.routers:
99 | cond._result = self.routers[idhex]
100 | else:
101 | cond._result = None
102 | cond.notify()
103 | cond.release()
104 | cond.acquire()
105 | self.schedule_low_prio(notlambda)
106 | cond.wait()
107 | cond.release()
108 | return cond._result
109 |
110 | def name_to_idhex(self, nick):
111 | cond = threading.Condition()
112 | def notlambda(this):
113 | cond.acquire()
114 | if nick in self.name_to_key:
115 | cond._result = self.name_to_key[nick]
116 | else:
117 | cond._result = None
118 | cond.notify()
119 | cond.release()
120 | cond.acquire()
121 | self.schedule_low_prio(notlambda)
122 | cond.wait()
123 | cond.release()
124 | return cond._result
125 |
126 | def rank_to_percent(self, rank):
127 | cond = threading.Condition()
128 | def notlambda(this):
129 | cond.acquire()
130 | cond._pct = (100.0*rank)/len(this.sorted_r) # lol moar haxx
131 | cond.notify()
132 | cond.release()
133 | cond.acquire()
134 | self.schedule_low_prio(notlambda)
135 | cond.wait()
136 | cond.release()
137 | return cond._pct
138 |
139 | def percent_to_rank(self, pct):
140 | cond = threading.Condition()
141 | def notlambda(this):
142 | cond.acquire()
143 | cond._rank = int(round((pct*len(this.sorted_r))/100.0,0)) # lol moar haxx
144 | cond.notify()
145 | cond.release()
146 | cond.acquire()
147 | self.schedule_low_prio(notlambda)
148 | cond.wait()
149 | cond.release()
150 | return cond._rank
151 |
152 | def get_exit_node(self):
153 | ret = copy.copy(self.last_exit) # GIL FTW
154 | if ret:
155 | plog("DEBUG", "Got last exit of "+ret.idhex)
156 | else:
157 | plog("DEBUG", "No last exit.")
158 | return ret
159 |
160 | def set_exit_node(self, arg):
161 | cond = threading.Condition()
162 | exit_name = arg
163 | plog("DEBUG", "Got Setexit: "+exit_name)
164 | def notlambda(sm):
165 | plog("DEBUG", "Job for setexit: "+exit_name)
166 | cond.acquire()
167 | # Clear last successful exit, we're running a new test
168 | self.last_exit = None
169 | sm.set_exit(exit_name)
170 | cond.notify()
171 | cond.release()
172 | cond.acquire()
173 | self.schedule_selmgr(notlambda)
174 | cond.wait()
175 | cond.release()
176 |
177 | class SQLScanHandler(ScanHandler):
178 | def __init__(self, c, selmgr, RouterClass=TorCtl.Router,
179 | strm_selector=PathSupport.StreamSelector):
180 | # Only require sqlalchemy if we really need it.
181 | global SQLSupport
182 | if SQLSupport is None:
183 | import SQLSupport
184 | ScanHandler.__init__(self, c, selmgr, RouterClass, strm_selector)
185 |
186 | def attach_sql_listener(self, db_uri):
187 | plog("DEBUG", "Got db: "+db_uri)
188 | SQLSupport.setup_db(db_uri, echo=False, drop=True)
189 | self.sql_consensus_listener = SQLSupport.ConsensusTrackerListener()
190 | self.add_event_listener(self.sql_consensus_listener)
191 | self.add_event_listener(SQLSupport.StreamListener())
192 |
193 | def write_sql_stats(self, rfilename=None, stats_filter=None):
194 | if not rfilename:
195 | rfilename="./data/stats/sql-"+time.strftime("20%y-%m-%d-%H:%M:%S")
196 | cond = threading.Condition()
197 | def notlambda(h):
198 | cond.acquire()
199 | SQLSupport.RouterStats.write_stats(file(rfilename, "w"),
200 | 0, 100, order_by=SQLSupport.RouterStats.sbw,
201 | recompute=True, disp_clause=stats_filter)
202 | cond.notify()
203 | cond.release()
204 | cond.acquire()
205 | self.schedule_low_prio(notlambda)
206 | cond.wait()
207 | cond.release()
208 |
209 | def write_strm_bws(self, rfilename=None, slice_num=0, stats_filter=None):
210 | if not rfilename:
211 | rfilename="./data/stats/bws-"+time.strftime("20%y-%m-%d-%H:%M:%S")
212 | cond = threading.Condition()
213 | def notlambda(this):
214 | cond.acquire()
215 | f=file(rfilename, "w")
216 | f.write("slicenum="+str(slice_num)+"\n")
217 | SQLSupport.RouterStats.write_bws(f, 0, 100,
218 | order_by=SQLSupport.RouterStats.sbw,
219 | recompute=False, disp_clause=stats_filter)
220 | f.close()
221 | cond.notify()
222 | cond.release()
223 | cond.acquire()
224 | self.schedule_low_prio(notlambda)
225 | cond.wait()
226 | cond.release()
227 |
228 | def save_sql_file(self, sql_file, new_file):
229 | cond = threading.Condition()
230 | def notlambda(this):
231 | cond.acquire()
232 | SQLSupport.tc_session.close()
233 | try:
234 | shutil.copy(sql_file, new_file)
235 | except Exception,e:
236 | plog("WARN", "Error moving sql file: "+str(e))
237 | SQLSupport.reset_all()
238 | cond.notify()
239 | cond.release()
240 | cond.acquire()
241 | self.schedule_low_prio(notlambda)
242 | cond.wait()
243 | cond.release()
244 |
245 | def wait_for_consensus(self):
246 | cond = threading.Condition()
247 | def notlambda(this):
248 | if this.sql_consensus_listener.last_desc_at \
249 | != SQLSupport.ConsensusTrackerListener.CONSENSUS_DONE:
250 | this.sql_consensus_listener.wait_for_signal = False
251 | plog("INFO", "Waiting on consensus result: "+str(this.run_all_jobs))
252 | this.schedule_low_prio(notlambda)
253 | else:
254 | cond.acquire()
255 | this.sql_consensus_listener.wait_for_signal = True
256 | cond.notify()
257 | cond.release()
258 | plog("DEBUG", "Checking for consensus")
259 | cond.acquire()
260 | self.schedule_low_prio(notlambda)
261 | cond.wait()
262 | cond.release()
263 | plog("INFO", "Consensus OK")
264 |
265 | def reset_stats(self):
266 | cond = threading.Condition()
267 | def notlambda(this):
268 | cond.acquire()
269 | ScanHandler.reset_stats(self)
270 | SQLSupport.reset_all()
271 | this.sql_consensus_listener.update_consensus()
272 | this.sql_consensus_listener._update_rank_history(this.sql_consensus_listener.consensus.ns_map.iterkeys())
273 | SQLSupport.refresh_all()
274 | cond.notify()
275 | cond.release()
276 | cond.acquire()
277 | self.schedule_low_prio(notlambda)
278 | cond.wait()
279 | cond.release()
280 |
--------------------------------------------------------------------------------
/pytorctl/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | TorCtl is a python Tor controller with extensions to support path
3 | building and various constraints on node and path selection, as well as
4 | statistics gathering.
5 |
6 | Apps can hook into the TorCtl package at whatever level they wish.
7 |
8 | The lowest level of interaction is to use the TorCtl module
9 | (TorCtl/TorCtl.py). Typically this is done by importing TorCtl.TorCtl
10 | and creating a TorCtl.Connection and extending from TorCtl.EventHandler.
11 | This class receives Tor controller events packaged into python classes
12 | from a TorCtl.Connection.
13 |
14 | The next level up is to use the TorCtl.PathSupport module. This is done
15 | by importing TorCtl.PathSupport and instantiating or extending from
16 | PathSupport.PathBuilder, which itself extends from TorCtl.EventHandler.
17 | This class handles circuit construction and stream attachment subject to
18 | policies defined by PathSupport.NodeRestrictor and
19 | PathSupport.PathRestrictor implementations.
20 |
21 | If you are interested in gathering statistics, you can instead
22 | instantiate or extend from StatsSupport.StatsHandler, which is
23 | again an event handler with hooks to record statistics on circuit
24 | creation, stream bandwidth, and circuit failure information.
25 | """
26 |
27 | __all__ = ["TorUtil", "GeoIPSupport", "PathSupport", "TorCtl", "StatsSupport",
28 | "SQLSupport", "ScanSupport"]
29 |
--------------------------------------------------------------------------------
/pytorctl/example.py:
--------------------------------------------------------------------------------
1 | """
2 | The following is a simple example of TorCtl usage. This attaches a listener
3 | that prints the amount of traffic going over tor each second.
4 | """
5 |
6 | import time
7 | import TorCtl
8 |
9 | class BandwidthListener(TorCtl.PostEventListener):
10 | def __init__(self):
11 | TorCtl.PostEventListener.__init__(self)
12 |
13 | def bandwidth_event(self, event):
14 | print "tor read %i bytes and wrote %i bytes" % (event.read, event.written)
15 |
16 | # constructs a listener that prints BW events
17 | myListener = BandwidthListener()
18 |
19 | # initiates a TorCtl connection, returning None if it was unsuccessful
20 | conn = TorCtl.connect()
21 |
22 | if conn:
23 | # tells tor to send us BW events
24 | conn.set_events(["BW"])
25 |
26 | # attaches the listener so it'll receive BW events
27 | conn.add_event_listener(myListener)
28 |
29 | # run until we get a keyboard interrupt
30 | try:
31 | while True:
32 | time.sleep(10)
33 | except KeyboardInterrupt: pass
34 |
35 |
--------------------------------------------------------------------------------
/requests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # __
4 | # /__) _ _ _ _ _/ _
5 | # / ( (- (/ (/ (- _) / _)
6 | # /
7 |
8 | """
9 | requests HTTP library
10 | ~~~~~~~~~~~~~~~~~~~~~
11 |
12 | Requests is an HTTP library, written in Python, for human beings. Basic GET
13 | usage:
14 |
15 | >>> import requests
16 | >>> r = requests.get('https://www.python.org')
17 | >>> r.status_code
18 | 200
19 | >>> 'Python is a programming language' in r.content
20 | True
21 |
22 | ... or POST:
23 |
24 | >>> payload = dict(key1='value1', key2='value2')
25 | >>> r = requests.post('http://httpbin.org/post', data=payload)
26 | >>> print(r.text)
27 | {
28 | ...
29 | "form": {
30 | "key2": "value2",
31 | "key1": "value1"
32 | },
33 | ...
34 | }
35 |
36 | The other HTTP methods are supported - see `requests.api`. Full documentation
37 | is at .
38 |
39 | :copyright: (c) 2014 by Kenneth Reitz.
40 | :license: Apache 2.0, see LICENSE for more details.
41 |
42 | """
43 |
44 | __title__ = 'requests'
45 | __version__ = '2.5.1'
46 | __build__ = 0x020501
47 | __author__ = 'Kenneth Reitz'
48 | __license__ = 'Apache 2.0'
49 | __copyright__ = 'Copyright 2014 Kenneth Reitz'
50 |
51 | # Attempt to enable urllib3's SNI support, if possible
52 | try:
53 | from .packages.urllib3.contrib import pyopenssl
54 | pyopenssl.inject_into_urllib3()
55 | except ImportError:
56 | pass
57 |
58 | from . import utils
59 | from .models import Request, Response, PreparedRequest
60 | from .api import request, get, head, post, patch, put, delete, options
61 | from .sessions import session, Session
62 | from .status_codes import codes
63 | from .exceptions import (
64 | RequestException, Timeout, URLRequired,
65 | TooManyRedirects, HTTPError, ConnectionError
66 | )
67 |
68 | # Set default logging handler to avoid "No handler found" warnings.
69 | import logging
70 | try: # Python 2.7+
71 | from logging import NullHandler
72 | except ImportError:
73 | class NullHandler(logging.Handler):
74 | def emit(self, record):
75 | pass
76 |
77 | logging.getLogger(__name__).addHandler(NullHandler())
78 |
--------------------------------------------------------------------------------
/requests/api.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.api
5 | ~~~~~~~~~~~~
6 |
7 | This module implements the Requests API.
8 |
9 | :copyright: (c) 2012 by Kenneth Reitz.
10 | :license: Apache2, see LICENSE for more details.
11 |
12 | """
13 |
14 | from . import sessions
15 |
16 |
17 | def request(method, url, **kwargs):
18 | """Constructs and sends a :class:`Request `.
19 | Returns :class:`Response ` object.
20 |
21 | :param method: method for the new :class:`Request` object.
22 | :param url: URL for the new :class:`Request` object.
23 | :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`.
24 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
25 | :param json: (optional) json data to send in the body of the :class:`Request`.
26 | :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`.
27 | :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`.
28 | :param files: (optional) Dictionary of ``'name': file-like-objects`` (or ``{'name': ('filename', fileobj)}``) for multipart encoding upload.
29 | :param auth: (optional) Auth tuple to enable Basic/Digest/Custom HTTP Auth.
30 | :param timeout: (optional) How long to wait for the server to send data
31 | before giving up, as a float, or a (`connect timeout, read timeout
32 | `_) tuple.
33 | :type timeout: float or tuple
34 | :param allow_redirects: (optional) Boolean. Set to True if POST/PUT/DELETE redirect following is allowed.
35 | :type allow_redirects: bool
36 | :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy.
37 | :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided.
38 | :param stream: (optional) if ``False``, the response content will be immediately downloaded.
39 | :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair.
40 |
41 | Usage::
42 |
43 | >>> import requests
44 | >>> req = requests.request('GET', 'http://httpbin.org/get')
45 |
46 | """
47 |
48 | session = sessions.Session()
49 | response = session.request(method=method, url=url, **kwargs)
50 | # By explicitly closing the session, we avoid leaving sockets open which
51 | # can trigger a ResourceWarning in some cases, and look like a memory leak
52 | # in others.
53 | session.close()
54 | return response
55 |
56 |
57 | def get(url, **kwargs):
58 | """Sends a GET request. Returns :class:`Response` object.
59 |
60 | :param url: URL for the new :class:`Request` object.
61 | :param \*\*kwargs: Optional arguments that ``request`` takes.
62 | """
63 |
64 | kwargs.setdefault('allow_redirects', True)
65 | return request('get', url, **kwargs)
66 |
67 |
68 | def options(url, **kwargs):
69 | """Sends a OPTIONS request. Returns :class:`Response` object.
70 |
71 | :param url: URL for the new :class:`Request` object.
72 | :param \*\*kwargs: Optional arguments that ``request`` takes.
73 | """
74 |
75 | kwargs.setdefault('allow_redirects', True)
76 | return request('options', url, **kwargs)
77 |
78 |
79 | def head(url, **kwargs):
80 | """Sends a HEAD request. Returns :class:`Response` object.
81 |
82 | :param url: URL for the new :class:`Request` object.
83 | :param \*\*kwargs: Optional arguments that ``request`` takes.
84 | """
85 |
86 | kwargs.setdefault('allow_redirects', False)
87 | return request('head', url, **kwargs)
88 |
89 |
90 | def post(url, data=None, json=None, **kwargs):
91 | """Sends a POST request. Returns :class:`Response` object.
92 |
93 | :param url: URL for the new :class:`Request` object.
94 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
95 | :param json: (optional) json data to send in the body of the :class:`Request`.
96 | :param \*\*kwargs: Optional arguments that ``request`` takes.
97 | """
98 |
99 | return request('post', url, data=data, json=json, **kwargs)
100 |
101 |
102 | def put(url, data=None, **kwargs):
103 | """Sends a PUT request. Returns :class:`Response` object.
104 |
105 | :param url: URL for the new :class:`Request` object.
106 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
107 | :param \*\*kwargs: Optional arguments that ``request`` takes.
108 | """
109 |
110 | return request('put', url, data=data, **kwargs)
111 |
112 |
113 | def patch(url, data=None, **kwargs):
114 | """Sends a PATCH request. Returns :class:`Response` object.
115 |
116 | :param url: URL for the new :class:`Request` object.
117 | :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`.
118 | :param \*\*kwargs: Optional arguments that ``request`` takes.
119 | """
120 |
121 | return request('patch', url, data=data, **kwargs)
122 |
123 |
124 | def delete(url, **kwargs):
125 | """Sends a DELETE request. Returns :class:`Response` object.
126 |
127 | :param url: URL for the new :class:`Request` object.
128 | :param \*\*kwargs: Optional arguments that ``request`` takes.
129 | """
130 |
131 | return request('delete', url, **kwargs)
132 |
--------------------------------------------------------------------------------
/requests/auth.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.auth
5 | ~~~~~~~~~~~~~
6 |
7 | This module contains the authentication handlers for Requests.
8 | """
9 |
10 | import os
11 | import re
12 | import time
13 | import hashlib
14 |
15 | from base64 import b64encode
16 |
17 | from .compat import urlparse, str
18 | from .cookies import extract_cookies_to_jar
19 | from .utils import parse_dict_header, to_native_string
20 | from .status_codes import codes
21 |
22 | CONTENT_TYPE_FORM_URLENCODED = 'application/x-www-form-urlencoded'
23 | CONTENT_TYPE_MULTI_PART = 'multipart/form-data'
24 |
25 |
26 | def _basic_auth_str(username, password):
27 | """Returns a Basic Auth string."""
28 |
29 | authstr = 'Basic ' + to_native_string(
30 | b64encode(('%s:%s' % (username, password)).encode('latin1')).strip()
31 | )
32 |
33 | return authstr
34 |
35 |
36 | class AuthBase(object):
37 | """Base class that all auth implementations derive from"""
38 |
39 | def __call__(self, r):
40 | raise NotImplementedError('Auth hooks must be callable.')
41 |
42 |
43 | class HTTPBasicAuth(AuthBase):
44 | """Attaches HTTP Basic Authentication to the given Request object."""
45 | def __init__(self, username, password):
46 | self.username = username
47 | self.password = password
48 |
49 | def __call__(self, r):
50 | r.headers['Authorization'] = _basic_auth_str(self.username, self.password)
51 | return r
52 |
53 |
54 | class HTTPProxyAuth(HTTPBasicAuth):
55 | """Attaches HTTP Proxy Authentication to a given Request object."""
56 | def __call__(self, r):
57 | r.headers['Proxy-Authorization'] = _basic_auth_str(self.username, self.password)
58 | return r
59 |
60 |
61 | class HTTPDigestAuth(AuthBase):
62 | """Attaches HTTP Digest Authentication to the given Request object."""
63 | def __init__(self, username, password):
64 | self.username = username
65 | self.password = password
66 | self.last_nonce = ''
67 | self.nonce_count = 0
68 | self.chal = {}
69 | self.pos = None
70 | self.num_401_calls = 1
71 |
72 | def build_digest_header(self, method, url):
73 |
74 | realm = self.chal['realm']
75 | nonce = self.chal['nonce']
76 | qop = self.chal.get('qop')
77 | algorithm = self.chal.get('algorithm')
78 | opaque = self.chal.get('opaque')
79 |
80 | if algorithm is None:
81 | _algorithm = 'MD5'
82 | else:
83 | _algorithm = algorithm.upper()
84 | # lambdas assume digest modules are imported at the top level
85 | if _algorithm == 'MD5' or _algorithm == 'MD5-SESS':
86 | def md5_utf8(x):
87 | if isinstance(x, str):
88 | x = x.encode('utf-8')
89 | return hashlib.md5(x).hexdigest()
90 | hash_utf8 = md5_utf8
91 | elif _algorithm == 'SHA':
92 | def sha_utf8(x):
93 | if isinstance(x, str):
94 | x = x.encode('utf-8')
95 | return hashlib.sha1(x).hexdigest()
96 | hash_utf8 = sha_utf8
97 |
98 | KD = lambda s, d: hash_utf8("%s:%s" % (s, d))
99 |
100 | if hash_utf8 is None:
101 | return None
102 |
103 | # XXX not implemented yet
104 | entdig = None
105 | p_parsed = urlparse(url)
106 | path = p_parsed.path
107 | if p_parsed.query:
108 | path += '?' + p_parsed.query
109 |
110 | A1 = '%s:%s:%s' % (self.username, realm, self.password)
111 | A2 = '%s:%s' % (method, path)
112 |
113 | HA1 = hash_utf8(A1)
114 | HA2 = hash_utf8(A2)
115 |
116 | if nonce == self.last_nonce:
117 | self.nonce_count += 1
118 | else:
119 | self.nonce_count = 1
120 | ncvalue = '%08x' % self.nonce_count
121 | s = str(self.nonce_count).encode('utf-8')
122 | s += nonce.encode('utf-8')
123 | s += time.ctime().encode('utf-8')
124 | s += os.urandom(8)
125 |
126 | cnonce = (hashlib.sha1(s).hexdigest()[:16])
127 | noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, HA2)
128 | if _algorithm == 'MD5-SESS':
129 | HA1 = hash_utf8('%s:%s:%s' % (HA1, nonce, cnonce))
130 |
131 | if qop is None:
132 | respdig = KD(HA1, "%s:%s" % (nonce, HA2))
133 | elif qop == 'auth' or 'auth' in qop.split(','):
134 | respdig = KD(HA1, noncebit)
135 | else:
136 | # XXX handle auth-int.
137 | return None
138 |
139 | self.last_nonce = nonce
140 |
141 | # XXX should the partial digests be encoded too?
142 | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
143 | 'response="%s"' % (self.username, realm, nonce, path, respdig)
144 | if opaque:
145 | base += ', opaque="%s"' % opaque
146 | if algorithm:
147 | base += ', algorithm="%s"' % algorithm
148 | if entdig:
149 | base += ', digest="%s"' % entdig
150 | if qop:
151 | base += ', qop="auth", nc=%s, cnonce="%s"' % (ncvalue, cnonce)
152 |
153 | return 'Digest %s' % (base)
154 |
155 | def handle_redirect(self, r, **kwargs):
156 | """Reset num_401_calls counter on redirects."""
157 | if r.is_redirect:
158 | self.num_401_calls = 1
159 |
160 | def handle_401(self, r, **kwargs):
161 | """Takes the given response and tries digest-auth, if needed."""
162 |
163 | if self.pos is not None:
164 | # Rewind the file position indicator of the body to where
165 | # it was to resend the request.
166 | r.request.body.seek(self.pos)
167 | num_401_calls = getattr(self, 'num_401_calls', 1)
168 | s_auth = r.headers.get('www-authenticate', '')
169 |
170 | if 'digest' in s_auth.lower() and num_401_calls < 2:
171 |
172 | self.num_401_calls += 1
173 | pat = re.compile(r'digest ', flags=re.IGNORECASE)
174 | self.chal = parse_dict_header(pat.sub('', s_auth, count=1))
175 |
176 | # Consume content and release the original connection
177 | # to allow our new request to reuse the same one.
178 | r.content
179 | r.raw.release_conn()
180 | prep = r.request.copy()
181 | extract_cookies_to_jar(prep._cookies, r.request, r.raw)
182 | prep.prepare_cookies(prep._cookies)
183 |
184 | prep.headers['Authorization'] = self.build_digest_header(
185 | prep.method, prep.url)
186 | _r = r.connection.send(prep, **kwargs)
187 | _r.history.append(r)
188 | _r.request = prep
189 |
190 | return _r
191 |
192 | self.num_401_calls = 1
193 | return r
194 |
195 | def __call__(self, r):
196 | # If we have a saved nonce, skip the 401
197 | if self.last_nonce:
198 | r.headers['Authorization'] = self.build_digest_header(r.method, r.url)
199 | try:
200 | self.pos = r.body.tell()
201 | except AttributeError:
202 | # In the case of HTTPDigestAuth being reused and the body of
203 | # the previous request was a file-like object, pos has the
204 | # file position of the previous body. Ensure it's set to
205 | # None.
206 | self.pos = None
207 | r.register_hook('response', self.handle_401)
208 | r.register_hook('response', self.handle_redirect)
209 | return r
210 |
--------------------------------------------------------------------------------
/requests/certs.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | certs.py
6 | ~~~~~~~~
7 |
8 | This module returns the preferred default CA certificate bundle.
9 |
10 | If you are packaging Requests, e.g., for a Linux distribution or a managed
11 | environment, you can change the definition of where() to return a separately
12 | packaged CA bundle.
13 | """
14 | import os.path
15 |
16 | try:
17 | from certifi import where
18 | except ImportError:
19 | def where():
20 | """Return the preferred certificate bundle."""
21 | # vendored bundle inside Requests
22 | return os.path.join(os.path.dirname(__file__), 'cacert.pem')
23 |
24 | if __name__ == '__main__':
25 | print(where())
26 |
--------------------------------------------------------------------------------
/requests/compat.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | pythoncompat
5 | """
6 |
7 | from .packages import chardet
8 |
9 | import sys
10 |
11 | # -------
12 | # Pythons
13 | # -------
14 |
15 | # Syntax sugar.
16 | _ver = sys.version_info
17 |
18 | #: Python 2.x?
19 | is_py2 = (_ver[0] == 2)
20 |
21 | #: Python 3.x?
22 | is_py3 = (_ver[0] == 3)
23 |
24 | #: Python 3.0.x
25 | is_py30 = (is_py3 and _ver[1] == 0)
26 |
27 | #: Python 3.1.x
28 | is_py31 = (is_py3 and _ver[1] == 1)
29 |
30 | #: Python 3.2.x
31 | is_py32 = (is_py3 and _ver[1] == 2)
32 |
33 | #: Python 3.3.x
34 | is_py33 = (is_py3 and _ver[1] == 3)
35 |
36 | #: Python 3.4.x
37 | is_py34 = (is_py3 and _ver[1] == 4)
38 |
39 | #: Python 2.7.x
40 | is_py27 = (is_py2 and _ver[1] == 7)
41 |
42 | #: Python 2.6.x
43 | is_py26 = (is_py2 and _ver[1] == 6)
44 |
45 | #: Python 2.5.x
46 | is_py25 = (is_py2 and _ver[1] == 5)
47 |
48 | #: Python 2.4.x
49 | is_py24 = (is_py2 and _ver[1] == 4) # I'm assuming this is not by choice.
50 |
51 |
52 | # ---------
53 | # Platforms
54 | # ---------
55 |
56 |
57 | # Syntax sugar.
58 | _ver = sys.version.lower()
59 |
60 | is_pypy = ('pypy' in _ver)
61 | is_jython = ('jython' in _ver)
62 | is_ironpython = ('iron' in _ver)
63 |
64 | # Assume CPython, if nothing else.
65 | is_cpython = not any((is_pypy, is_jython, is_ironpython))
66 |
67 | # Windows-based system.
68 | is_windows = 'win32' in str(sys.platform).lower()
69 |
70 | # Standard Linux 2+ system.
71 | is_linux = ('linux' in str(sys.platform).lower())
72 | is_osx = ('darwin' in str(sys.platform).lower())
73 | is_hpux = ('hpux' in str(sys.platform).lower()) # Complete guess.
74 | is_solaris = ('solar==' in str(sys.platform).lower()) # Complete guess.
75 |
76 | try:
77 | import simplejson as json
78 | except (ImportError, SyntaxError):
79 | # simplejson does not support Python 3.2, it throws a SyntaxError
80 | # because of u'...' Unicode literals.
81 | import json
82 |
83 | # ---------
84 | # Specifics
85 | # ---------
86 |
87 | if is_py2:
88 | from urllib import quote, unquote, quote_plus, unquote_plus, urlencode, getproxies, proxy_bypass
89 | from urlparse import urlparse, urlunparse, urljoin, urlsplit, urldefrag
90 | from urllib2 import parse_http_list
91 | import cookielib
92 | from Cookie import Morsel
93 | from StringIO import StringIO
94 | from .packages.urllib3.packages.ordered_dict import OrderedDict
95 |
96 | builtin_str = str
97 | bytes = str
98 | str = unicode
99 | basestring = basestring
100 | numeric_types = (int, long, float)
101 |
102 |
103 | elif is_py3:
104 | from urllib.parse import urlparse, urlunparse, urljoin, urlsplit, urlencode, quote, unquote, quote_plus, unquote_plus, urldefrag
105 | from urllib.request import parse_http_list, getproxies, proxy_bypass
106 | from http import cookiejar as cookielib
107 | from http.cookies import Morsel
108 | from io import StringIO
109 | from collections import OrderedDict
110 |
111 | builtin_str = str
112 | str = str
113 | bytes = bytes
114 | basestring = (str, bytes)
115 | numeric_types = (int, float)
116 |
--------------------------------------------------------------------------------
/requests/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.exceptions
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | This module contains the set of Requests' exceptions.
8 |
9 | """
10 | from .packages.urllib3.exceptions import HTTPError as BaseHTTPError
11 |
12 |
13 | class RequestException(IOError):
14 | """There was an ambiguous exception that occurred while handling your
15 | request."""
16 |
17 | def __init__(self, *args, **kwargs):
18 | """
19 | Initialize RequestException with `request` and `response` objects.
20 | """
21 | response = kwargs.pop('response', None)
22 | self.response = response
23 | self.request = kwargs.pop('request', None)
24 | if (response is not None and not self.request and
25 | hasattr(response, 'request')):
26 | self.request = self.response.request
27 | super(RequestException, self).__init__(*args, **kwargs)
28 |
29 |
30 | class HTTPError(RequestException):
31 | """An HTTP error occurred."""
32 |
33 |
34 | class ConnectionError(RequestException):
35 | """A Connection error occurred."""
36 |
37 |
38 | class ProxyError(ConnectionError):
39 | """A proxy error occurred."""
40 |
41 |
42 | class SSLError(ConnectionError):
43 | """An SSL error occurred."""
44 |
45 |
46 | class Timeout(RequestException):
47 | """The request timed out.
48 |
49 | Catching this error will catch both
50 | :exc:`~requests.exceptions.ConnectTimeout` and
51 | :exc:`~requests.exceptions.ReadTimeout` errors.
52 | """
53 |
54 |
55 | class ConnectTimeout(ConnectionError, Timeout):
56 | """The request timed out while trying to connect to the remote server.
57 |
58 | Requests that produced this error are safe to retry.
59 | """
60 |
61 |
62 | class ReadTimeout(Timeout):
63 | """The server did not send any data in the allotted amount of time."""
64 |
65 |
66 | class URLRequired(RequestException):
67 | """A valid URL is required to make a request."""
68 |
69 |
70 | class TooManyRedirects(RequestException):
71 | """Too many redirects."""
72 |
73 |
74 | class MissingSchema(RequestException, ValueError):
75 | """The URL schema (e.g. http or https) is missing."""
76 |
77 |
78 | class InvalidSchema(RequestException, ValueError):
79 | """See defaults.py for valid schemas."""
80 |
81 |
82 | class InvalidURL(RequestException, ValueError):
83 | """ The URL provided was somehow invalid. """
84 |
85 |
86 | class ChunkedEncodingError(RequestException):
87 | """The server declared chunked encoding but sent an invalid chunk."""
88 |
89 |
90 | class ContentDecodingError(RequestException, BaseHTTPError):
91 | """Failed to decode response content"""
92 |
93 |
94 | class StreamConsumedError(RequestException, TypeError):
95 | """The content for this response was already consumed"""
96 |
97 |
98 | class RetryError(RequestException):
99 | """Custom retries logic failed"""
100 |
--------------------------------------------------------------------------------
/requests/hooks.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.hooks
5 | ~~~~~~~~~~~~~~
6 |
7 | This module provides the capabilities for the Requests hooks system.
8 |
9 | Available hooks:
10 |
11 | ``response``:
12 | The response generated from a Request.
13 |
14 | """
15 |
16 |
17 | HOOKS = ['response']
18 |
19 |
20 | def default_hooks():
21 | hooks = {}
22 | for event in HOOKS:
23 | hooks[event] = []
24 | return hooks
25 |
26 | # TODO: response is the only one
27 |
28 |
29 | def dispatch_hook(key, hooks, hook_data, **kwargs):
30 | """Dispatches a hook dictionary on a given piece of data."""
31 |
32 | hooks = hooks or dict()
33 |
34 | if key in hooks:
35 | hooks = hooks.get(key)
36 |
37 | if hasattr(hooks, '__call__'):
38 | hooks = [hooks]
39 |
40 | for hook in hooks:
41 | _hook_data = hook(hook_data, **kwargs)
42 | if _hook_data is not None:
43 | hook_data = _hook_data
44 |
45 | return hook_data
46 |
--------------------------------------------------------------------------------
/requests/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from . import urllib3
4 |
--------------------------------------------------------------------------------
/requests/packages/chardet/__init__.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # This library is free software; you can redistribute it and/or
3 | # modify it under the terms of the GNU Lesser General Public
4 | # License as published by the Free Software Foundation; either
5 | # version 2.1 of the License, or (at your option) any later version.
6 | #
7 | # This library is distributed in the hope that it will be useful,
8 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
9 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10 | # Lesser General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU Lesser General Public
13 | # License along with this library; if not, write to the Free Software
14 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
15 | # 02110-1301 USA
16 | ######################### END LICENSE BLOCK #########################
17 |
18 | __version__ = "2.3.0"
19 | from sys import version_info
20 |
21 |
22 | def detect(aBuf):
23 | if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
24 | (version_info >= (3, 0) and not isinstance(aBuf, bytes))):
25 | raise ValueError('Expected a bytes object, not a unicode object')
26 |
27 | from . import universaldetector
28 | u = universaldetector.UniversalDetector()
29 | u.reset()
30 | u.feed(aBuf)
31 | u.close()
32 | return u.result
33 |
--------------------------------------------------------------------------------
/requests/packages/chardet/big5prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import Big5DistributionAnalysis
31 | from .mbcssm import Big5SMModel
32 |
33 |
34 | class Big5Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(Big5SMModel)
38 | self._mDistributionAnalyzer = Big5DistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "Big5"
43 |
--------------------------------------------------------------------------------
/requests/packages/chardet/chardetect.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Script which takes one or more file paths and reports on their detected
4 | encodings
5 |
6 | Example::
7 |
8 | % chardetect somefile someotherfile
9 | somefile: windows-1252 with confidence 0.5
10 | someotherfile: ascii with confidence 1.0
11 |
12 | If no paths are provided, it takes its input from stdin.
13 |
14 | """
15 |
16 | from __future__ import absolute_import, print_function, unicode_literals
17 |
18 | import argparse
19 | import sys
20 | from io import open
21 |
22 | from chardet import __version__
23 | from chardet.universaldetector import UniversalDetector
24 |
25 |
26 | def description_of(lines, name='stdin'):
27 | """
28 | Return a string describing the probable encoding of a file or
29 | list of strings.
30 |
31 | :param lines: The lines to get the encoding of.
32 | :type lines: Iterable of bytes
33 | :param name: Name of file or collection of lines
34 | :type name: str
35 | """
36 | u = UniversalDetector()
37 | for line in lines:
38 | u.feed(line)
39 | u.close()
40 | result = u.result
41 | if result['encoding']:
42 | return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
43 | result['confidence'])
44 | else:
45 | return '{0}: no result'.format(name)
46 |
47 |
48 | def main(argv=None):
49 | '''
50 | Handles command line arguments and gets things started.
51 |
52 | :param argv: List of arguments, as if specified on the command-line.
53 | If None, ``sys.argv[1:]`` is used instead.
54 | :type argv: list of str
55 | '''
56 | # Get command line arguments
57 | parser = argparse.ArgumentParser(
58 | description="Takes one or more file paths and reports their detected \
59 | encodings",
60 | formatter_class=argparse.ArgumentDefaultsHelpFormatter,
61 | conflict_handler='resolve')
62 | parser.add_argument('input',
63 | help='File whose encoding we would like to determine.',
64 | type=argparse.FileType('rb'), nargs='*',
65 | default=[sys.stdin])
66 | parser.add_argument('--version', action='version',
67 | version='%(prog)s {0}'.format(__version__))
68 | args = parser.parse_args(argv)
69 |
70 | for f in args.input:
71 | if f.isatty():
72 | print("You are running chardetect interactively. Press " +
73 | "CTRL-D twice at the start of a blank line to signal the " +
74 | "end of your input. If you want help, run chardetect " +
75 | "--help\n", file=sys.stderr)
76 | print(description_of(f, f.name))
77 |
78 |
79 | if __name__ == '__main__':
80 | main()
81 |
--------------------------------------------------------------------------------
/requests/packages/chardet/charsetgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Communicator client code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | import sys
30 | from .charsetprober import CharSetProber
31 |
32 |
33 | class CharSetGroupProber(CharSetProber):
34 | def __init__(self):
35 | CharSetProber.__init__(self)
36 | self._mActiveNum = 0
37 | self._mProbers = []
38 | self._mBestGuessProber = None
39 |
40 | def reset(self):
41 | CharSetProber.reset(self)
42 | self._mActiveNum = 0
43 | for prober in self._mProbers:
44 | if prober:
45 | prober.reset()
46 | prober.active = True
47 | self._mActiveNum += 1
48 | self._mBestGuessProber = None
49 |
50 | def get_charset_name(self):
51 | if not self._mBestGuessProber:
52 | self.get_confidence()
53 | if not self._mBestGuessProber:
54 | return None
55 | # self._mBestGuessProber = self._mProbers[0]
56 | return self._mBestGuessProber.get_charset_name()
57 |
58 | def feed(self, aBuf):
59 | for prober in self._mProbers:
60 | if not prober:
61 | continue
62 | if not prober.active:
63 | continue
64 | st = prober.feed(aBuf)
65 | if not st:
66 | continue
67 | if st == constants.eFoundIt:
68 | self._mBestGuessProber = prober
69 | return self.get_state()
70 | elif st == constants.eNotMe:
71 | prober.active = False
72 | self._mActiveNum -= 1
73 | if self._mActiveNum <= 0:
74 | self._mState = constants.eNotMe
75 | return self.get_state()
76 | return self.get_state()
77 |
78 | def get_confidence(self):
79 | st = self.get_state()
80 | if st == constants.eFoundIt:
81 | return 0.99
82 | elif st == constants.eNotMe:
83 | return 0.01
84 | bestConf = 0.0
85 | self._mBestGuessProber = None
86 | for prober in self._mProbers:
87 | if not prober:
88 | continue
89 | if not prober.active:
90 | if constants._debug:
91 | sys.stderr.write(prober.get_charset_name()
92 | + ' not active\n')
93 | continue
94 | cf = prober.get_confidence()
95 | if constants._debug:
96 | sys.stderr.write('%s confidence = %s\n' %
97 | (prober.get_charset_name(), cf))
98 | if bestConf < cf:
99 | bestConf = cf
100 | self._mBestGuessProber = prober
101 | if not self._mBestGuessProber:
102 | return 0.0
103 | return bestConf
104 | # else:
105 | # self._mBestGuessProber = self._mProbers[0]
106 | # return self._mBestGuessProber.get_confidence()
107 |
--------------------------------------------------------------------------------
/requests/packages/chardet/charsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from . import constants
30 | import re
31 |
32 |
33 | class CharSetProber:
34 | def __init__(self):
35 | pass
36 |
37 | def reset(self):
38 | self._mState = constants.eDetecting
39 |
40 | def get_charset_name(self):
41 | return None
42 |
43 | def feed(self, aBuf):
44 | pass
45 |
46 | def get_state(self):
47 | return self._mState
48 |
49 | def get_confidence(self):
50 | return 0.0
51 |
52 | def filter_high_bit_only(self, aBuf):
53 | aBuf = re.sub(b'([\x00-\x7F])+', b' ', aBuf)
54 | return aBuf
55 |
56 | def filter_without_english_letters(self, aBuf):
57 | aBuf = re.sub(b'([A-Za-z])+', b' ', aBuf)
58 | return aBuf
59 |
60 | def filter_with_english_letters(self, aBuf):
61 | # TODO
62 | return aBuf
63 |
--------------------------------------------------------------------------------
/requests/packages/chardet/codingstatemachine.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .constants import eStart
29 | from .compat import wrap_ord
30 |
31 |
32 | class CodingStateMachine:
33 | def __init__(self, sm):
34 | self._mModel = sm
35 | self._mCurrentBytePos = 0
36 | self._mCurrentCharLen = 0
37 | self.reset()
38 |
39 | def reset(self):
40 | self._mCurrentState = eStart
41 |
42 | def next_state(self, c):
43 | # for each byte we get its class
44 | # if it is first byte, we also get byte length
45 | # PY3K: aBuf is a byte stream, so c is an int, not a byte
46 | byteCls = self._mModel['classTable'][wrap_ord(c)]
47 | if self._mCurrentState == eStart:
48 | self._mCurrentBytePos = 0
49 | self._mCurrentCharLen = self._mModel['charLenTable'][byteCls]
50 | # from byte's class and stateTable, we get its next state
51 | curr_state = (self._mCurrentState * self._mModel['classFactor']
52 | + byteCls)
53 | self._mCurrentState = self._mModel['stateTable'][curr_state]
54 | self._mCurrentBytePos += 1
55 | return self._mCurrentState
56 |
57 | def get_current_charlen(self):
58 | return self._mCurrentCharLen
59 |
60 | def get_coding_state_machine(self):
61 | return self._mModel['name']
62 |
--------------------------------------------------------------------------------
/requests/packages/chardet/compat.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # Contributor(s):
3 | # Ian Cordasco - port to Python
4 | #
5 | # This library is free software; you can redistribute it and/or
6 | # modify it under the terms of the GNU Lesser General Public
7 | # License as published by the Free Software Foundation; either
8 | # version 2.1 of the License, or (at your option) any later version.
9 | #
10 | # This library is distributed in the hope that it will be useful,
11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 | # Lesser General Public License for more details.
14 | #
15 | # You should have received a copy of the GNU Lesser General Public
16 | # License along with this library; if not, write to the Free Software
17 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
18 | # 02110-1301 USA
19 | ######################### END LICENSE BLOCK #########################
20 |
21 | import sys
22 |
23 |
24 | if sys.version_info < (3, 0):
25 | base_str = (str, unicode)
26 | else:
27 | base_str = (bytes, str)
28 |
29 |
30 | def wrap_ord(a):
31 | if sys.version_info < (3, 0) and isinstance(a, base_str):
32 | return ord(a)
33 | else:
34 | return a
35 |
--------------------------------------------------------------------------------
/requests/packages/chardet/constants.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | _debug = 0
30 |
31 | eDetecting = 0
32 | eFoundIt = 1
33 | eNotMe = 2
34 |
35 | eStart = 0
36 | eError = 1
37 | eItsMe = 2
38 |
39 | SHORTCUT_THRESHOLD = 0.95
40 |
--------------------------------------------------------------------------------
/requests/packages/chardet/cp949prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import CP949SMModel
32 |
33 |
34 | class CP949Prober(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(CP949SMModel)
38 | # NOTE: CP949 is a superset of EUC-KR, so the distribution should be
39 | # not different.
40 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
41 | self.reset()
42 |
43 | def get_charset_name(self):
44 | return "CP949"
45 |
--------------------------------------------------------------------------------
/requests/packages/chardet/escprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .escsm import (HZSMModel, ISO2022CNSMModel, ISO2022JPSMModel,
30 | ISO2022KRSMModel)
31 | from .charsetprober import CharSetProber
32 | from .codingstatemachine import CodingStateMachine
33 | from .compat import wrap_ord
34 |
35 |
36 | class EscCharSetProber(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = [
40 | CodingStateMachine(HZSMModel),
41 | CodingStateMachine(ISO2022CNSMModel),
42 | CodingStateMachine(ISO2022JPSMModel),
43 | CodingStateMachine(ISO2022KRSMModel)
44 | ]
45 | self.reset()
46 |
47 | def reset(self):
48 | CharSetProber.reset(self)
49 | for codingSM in self._mCodingSM:
50 | if not codingSM:
51 | continue
52 | codingSM.active = True
53 | codingSM.reset()
54 | self._mActiveSM = len(self._mCodingSM)
55 | self._mDetectedCharset = None
56 |
57 | def get_charset_name(self):
58 | return self._mDetectedCharset
59 |
60 | def get_confidence(self):
61 | if self._mDetectedCharset:
62 | return 0.99
63 | else:
64 | return 0.00
65 |
66 | def feed(self, aBuf):
67 | for c in aBuf:
68 | # PY3K: aBuf is a byte array, so c is an int, not a byte
69 | for codingSM in self._mCodingSM:
70 | if not codingSM:
71 | continue
72 | if not codingSM.active:
73 | continue
74 | codingState = codingSM.next_state(wrap_ord(c))
75 | if codingState == constants.eError:
76 | codingSM.active = False
77 | self._mActiveSM -= 1
78 | if self._mActiveSM <= 0:
79 | self._mState = constants.eNotMe
80 | return self.get_state()
81 | elif codingState == constants.eItsMe:
82 | self._mState = constants.eFoundIt
83 | self._mDetectedCharset = codingSM.get_coding_state_machine() # nopep8
84 | return self.get_state()
85 |
86 | return self.get_state()
87 |
--------------------------------------------------------------------------------
/requests/packages/chardet/escsm.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .constants import eStart, eError, eItsMe
29 |
30 | HZ_cls = (
31 | 1,0,0,0,0,0,0,0, # 00 - 07
32 | 0,0,0,0,0,0,0,0, # 08 - 0f
33 | 0,0,0,0,0,0,0,0, # 10 - 17
34 | 0,0,0,1,0,0,0,0, # 18 - 1f
35 | 0,0,0,0,0,0,0,0, # 20 - 27
36 | 0,0,0,0,0,0,0,0, # 28 - 2f
37 | 0,0,0,0,0,0,0,0, # 30 - 37
38 | 0,0,0,0,0,0,0,0, # 38 - 3f
39 | 0,0,0,0,0,0,0,0, # 40 - 47
40 | 0,0,0,0,0,0,0,0, # 48 - 4f
41 | 0,0,0,0,0,0,0,0, # 50 - 57
42 | 0,0,0,0,0,0,0,0, # 58 - 5f
43 | 0,0,0,0,0,0,0,0, # 60 - 67
44 | 0,0,0,0,0,0,0,0, # 68 - 6f
45 | 0,0,0,0,0,0,0,0, # 70 - 77
46 | 0,0,0,4,0,5,2,0, # 78 - 7f
47 | 1,1,1,1,1,1,1,1, # 80 - 87
48 | 1,1,1,1,1,1,1,1, # 88 - 8f
49 | 1,1,1,1,1,1,1,1, # 90 - 97
50 | 1,1,1,1,1,1,1,1, # 98 - 9f
51 | 1,1,1,1,1,1,1,1, # a0 - a7
52 | 1,1,1,1,1,1,1,1, # a8 - af
53 | 1,1,1,1,1,1,1,1, # b0 - b7
54 | 1,1,1,1,1,1,1,1, # b8 - bf
55 | 1,1,1,1,1,1,1,1, # c0 - c7
56 | 1,1,1,1,1,1,1,1, # c8 - cf
57 | 1,1,1,1,1,1,1,1, # d0 - d7
58 | 1,1,1,1,1,1,1,1, # d8 - df
59 | 1,1,1,1,1,1,1,1, # e0 - e7
60 | 1,1,1,1,1,1,1,1, # e8 - ef
61 | 1,1,1,1,1,1,1,1, # f0 - f7
62 | 1,1,1,1,1,1,1,1, # f8 - ff
63 | )
64 |
65 | HZ_st = (
66 | eStart,eError, 3,eStart,eStart,eStart,eError,eError,# 00-07
67 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
68 | eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError,# 10-17
69 | 5,eError, 6,eError, 5, 5, 4,eError,# 18-1f
70 | 4,eError, 4, 4, 4,eError, 4,eError,# 20-27
71 | 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart,# 28-2f
72 | )
73 |
74 | HZCharLenTable = (0, 0, 0, 0, 0, 0)
75 |
76 | HZSMModel = {'classTable': HZ_cls,
77 | 'classFactor': 6,
78 | 'stateTable': HZ_st,
79 | 'charLenTable': HZCharLenTable,
80 | 'name': "HZ-GB-2312"}
81 |
82 | ISO2022CN_cls = (
83 | 2,0,0,0,0,0,0,0, # 00 - 07
84 | 0,0,0,0,0,0,0,0, # 08 - 0f
85 | 0,0,0,0,0,0,0,0, # 10 - 17
86 | 0,0,0,1,0,0,0,0, # 18 - 1f
87 | 0,0,0,0,0,0,0,0, # 20 - 27
88 | 0,3,0,0,0,0,0,0, # 28 - 2f
89 | 0,0,0,0,0,0,0,0, # 30 - 37
90 | 0,0,0,0,0,0,0,0, # 38 - 3f
91 | 0,0,0,4,0,0,0,0, # 40 - 47
92 | 0,0,0,0,0,0,0,0, # 48 - 4f
93 | 0,0,0,0,0,0,0,0, # 50 - 57
94 | 0,0,0,0,0,0,0,0, # 58 - 5f
95 | 0,0,0,0,0,0,0,0, # 60 - 67
96 | 0,0,0,0,0,0,0,0, # 68 - 6f
97 | 0,0,0,0,0,0,0,0, # 70 - 77
98 | 0,0,0,0,0,0,0,0, # 78 - 7f
99 | 2,2,2,2,2,2,2,2, # 80 - 87
100 | 2,2,2,2,2,2,2,2, # 88 - 8f
101 | 2,2,2,2,2,2,2,2, # 90 - 97
102 | 2,2,2,2,2,2,2,2, # 98 - 9f
103 | 2,2,2,2,2,2,2,2, # a0 - a7
104 | 2,2,2,2,2,2,2,2, # a8 - af
105 | 2,2,2,2,2,2,2,2, # b0 - b7
106 | 2,2,2,2,2,2,2,2, # b8 - bf
107 | 2,2,2,2,2,2,2,2, # c0 - c7
108 | 2,2,2,2,2,2,2,2, # c8 - cf
109 | 2,2,2,2,2,2,2,2, # d0 - d7
110 | 2,2,2,2,2,2,2,2, # d8 - df
111 | 2,2,2,2,2,2,2,2, # e0 - e7
112 | 2,2,2,2,2,2,2,2, # e8 - ef
113 | 2,2,2,2,2,2,2,2, # f0 - f7
114 | 2,2,2,2,2,2,2,2, # f8 - ff
115 | )
116 |
117 | ISO2022CN_st = (
118 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
119 | eStart,eError,eError,eError,eError,eError,eError,eError,# 08-0f
120 | eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
121 | eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError,# 18-1f
122 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 20-27
123 | 5, 6,eError,eError,eError,eError,eError,eError,# 28-2f
124 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 30-37
125 | eError,eError,eError,eError,eError,eItsMe,eError,eStart,# 38-3f
126 | )
127 |
128 | ISO2022CNCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0)
129 |
130 | ISO2022CNSMModel = {'classTable': ISO2022CN_cls,
131 | 'classFactor': 9,
132 | 'stateTable': ISO2022CN_st,
133 | 'charLenTable': ISO2022CNCharLenTable,
134 | 'name': "ISO-2022-CN"}
135 |
136 | ISO2022JP_cls = (
137 | 2,0,0,0,0,0,0,0, # 00 - 07
138 | 0,0,0,0,0,0,2,2, # 08 - 0f
139 | 0,0,0,0,0,0,0,0, # 10 - 17
140 | 0,0,0,1,0,0,0,0, # 18 - 1f
141 | 0,0,0,0,7,0,0,0, # 20 - 27
142 | 3,0,0,0,0,0,0,0, # 28 - 2f
143 | 0,0,0,0,0,0,0,0, # 30 - 37
144 | 0,0,0,0,0,0,0,0, # 38 - 3f
145 | 6,0,4,0,8,0,0,0, # 40 - 47
146 | 0,9,5,0,0,0,0,0, # 48 - 4f
147 | 0,0,0,0,0,0,0,0, # 50 - 57
148 | 0,0,0,0,0,0,0,0, # 58 - 5f
149 | 0,0,0,0,0,0,0,0, # 60 - 67
150 | 0,0,0,0,0,0,0,0, # 68 - 6f
151 | 0,0,0,0,0,0,0,0, # 70 - 77
152 | 0,0,0,0,0,0,0,0, # 78 - 7f
153 | 2,2,2,2,2,2,2,2, # 80 - 87
154 | 2,2,2,2,2,2,2,2, # 88 - 8f
155 | 2,2,2,2,2,2,2,2, # 90 - 97
156 | 2,2,2,2,2,2,2,2, # 98 - 9f
157 | 2,2,2,2,2,2,2,2, # a0 - a7
158 | 2,2,2,2,2,2,2,2, # a8 - af
159 | 2,2,2,2,2,2,2,2, # b0 - b7
160 | 2,2,2,2,2,2,2,2, # b8 - bf
161 | 2,2,2,2,2,2,2,2, # c0 - c7
162 | 2,2,2,2,2,2,2,2, # c8 - cf
163 | 2,2,2,2,2,2,2,2, # d0 - d7
164 | 2,2,2,2,2,2,2,2, # d8 - df
165 | 2,2,2,2,2,2,2,2, # e0 - e7
166 | 2,2,2,2,2,2,2,2, # e8 - ef
167 | 2,2,2,2,2,2,2,2, # f0 - f7
168 | 2,2,2,2,2,2,2,2, # f8 - ff
169 | )
170 |
171 | ISO2022JP_st = (
172 | eStart, 3,eError,eStart,eStart,eStart,eStart,eStart,# 00-07
173 | eStart,eStart,eError,eError,eError,eError,eError,eError,# 08-0f
174 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 10-17
175 | eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eError,eError,# 18-1f
176 | eError, 5,eError,eError,eError, 4,eError,eError,# 20-27
177 | eError,eError,eError, 6,eItsMe,eError,eItsMe,eError,# 28-2f
178 | eError,eError,eError,eError,eError,eError,eItsMe,eItsMe,# 30-37
179 | eError,eError,eError,eItsMe,eError,eError,eError,eError,# 38-3f
180 | eError,eError,eError,eError,eItsMe,eError,eStart,eStart,# 40-47
181 | )
182 |
183 | ISO2022JPCharLenTable = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
184 |
185 | ISO2022JPSMModel = {'classTable': ISO2022JP_cls,
186 | 'classFactor': 10,
187 | 'stateTable': ISO2022JP_st,
188 | 'charLenTable': ISO2022JPCharLenTable,
189 | 'name': "ISO-2022-JP"}
190 |
191 | ISO2022KR_cls = (
192 | 2,0,0,0,0,0,0,0, # 00 - 07
193 | 0,0,0,0,0,0,0,0, # 08 - 0f
194 | 0,0,0,0,0,0,0,0, # 10 - 17
195 | 0,0,0,1,0,0,0,0, # 18 - 1f
196 | 0,0,0,0,3,0,0,0, # 20 - 27
197 | 0,4,0,0,0,0,0,0, # 28 - 2f
198 | 0,0,0,0,0,0,0,0, # 30 - 37
199 | 0,0,0,0,0,0,0,0, # 38 - 3f
200 | 0,0,0,5,0,0,0,0, # 40 - 47
201 | 0,0,0,0,0,0,0,0, # 48 - 4f
202 | 0,0,0,0,0,0,0,0, # 50 - 57
203 | 0,0,0,0,0,0,0,0, # 58 - 5f
204 | 0,0,0,0,0,0,0,0, # 60 - 67
205 | 0,0,0,0,0,0,0,0, # 68 - 6f
206 | 0,0,0,0,0,0,0,0, # 70 - 77
207 | 0,0,0,0,0,0,0,0, # 78 - 7f
208 | 2,2,2,2,2,2,2,2, # 80 - 87
209 | 2,2,2,2,2,2,2,2, # 88 - 8f
210 | 2,2,2,2,2,2,2,2, # 90 - 97
211 | 2,2,2,2,2,2,2,2, # 98 - 9f
212 | 2,2,2,2,2,2,2,2, # a0 - a7
213 | 2,2,2,2,2,2,2,2, # a8 - af
214 | 2,2,2,2,2,2,2,2, # b0 - b7
215 | 2,2,2,2,2,2,2,2, # b8 - bf
216 | 2,2,2,2,2,2,2,2, # c0 - c7
217 | 2,2,2,2,2,2,2,2, # c8 - cf
218 | 2,2,2,2,2,2,2,2, # d0 - d7
219 | 2,2,2,2,2,2,2,2, # d8 - df
220 | 2,2,2,2,2,2,2,2, # e0 - e7
221 | 2,2,2,2,2,2,2,2, # e8 - ef
222 | 2,2,2,2,2,2,2,2, # f0 - f7
223 | 2,2,2,2,2,2,2,2, # f8 - ff
224 | )
225 |
226 | ISO2022KR_st = (
227 | eStart, 3,eError,eStart,eStart,eStart,eError,eError,# 00-07
228 | eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,# 08-0f
229 | eItsMe,eItsMe,eError,eError,eError, 4,eError,eError,# 10-17
230 | eError,eError,eError,eError, 5,eError,eError,eError,# 18-1f
231 | eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart,# 20-27
232 | )
233 |
234 | ISO2022KRCharLenTable = (0, 0, 0, 0, 0, 0)
235 |
236 | ISO2022KRSMModel = {'classTable': ISO2022KR_cls,
237 | 'classFactor': 6,
238 | 'stateTable': ISO2022KR_st,
239 | 'charLenTable': ISO2022KRCharLenTable,
240 | 'name': "ISO-2022-KR"}
241 |
242 | # flake8: noqa
243 |
--------------------------------------------------------------------------------
/requests/packages/chardet/eucjpprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from . import constants
30 | from .mbcharsetprober import MultiByteCharSetProber
31 | from .codingstatemachine import CodingStateMachine
32 | from .chardistribution import EUCJPDistributionAnalysis
33 | from .jpcntx import EUCJPContextAnalysis
34 | from .mbcssm import EUCJPSMModel
35 |
36 |
37 | class EUCJPProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(EUCJPSMModel)
41 | self._mDistributionAnalyzer = EUCJPDistributionAnalysis()
42 | self._mContextAnalyzer = EUCJPContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return "EUC-JP"
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | # PY3K: aBuf is a byte array, so aBuf[i] is an int, not a byte
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mContextAnalyzer.feed(self._mLastChar, charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i - 1:i + 1], charLen)
75 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
76 | charLen)
77 |
78 | self._mLastChar[0] = aBuf[aLen - 1]
79 |
80 | if self.get_state() == constants.eDetecting:
81 | if (self._mContextAnalyzer.got_enough_data() and
82 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
83 | self._mState = constants.eFoundIt
84 |
85 | return self.get_state()
86 |
87 | def get_confidence(self):
88 | contxtCf = self._mContextAnalyzer.get_confidence()
89 | distribCf = self._mDistributionAnalyzer.get_confidence()
90 | return max(contxtCf, distribCf)
91 |
--------------------------------------------------------------------------------
/requests/packages/chardet/euckrprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCKRDistributionAnalysis
31 | from .mbcssm import EUCKRSMModel
32 |
33 |
34 | class EUCKRProber(MultiByteCharSetProber):
35 | def __init__(self):
36 | MultiByteCharSetProber.__init__(self)
37 | self._mCodingSM = CodingStateMachine(EUCKRSMModel)
38 | self._mDistributionAnalyzer = EUCKRDistributionAnalysis()
39 | self.reset()
40 |
41 | def get_charset_name(self):
42 | return "EUC-KR"
43 |
--------------------------------------------------------------------------------
/requests/packages/chardet/euctwprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import EUCTWDistributionAnalysis
31 | from .mbcssm import EUCTWSMModel
32 |
33 | class EUCTWProber(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(EUCTWSMModel)
37 | self._mDistributionAnalyzer = EUCTWDistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "EUC-TW"
42 |
--------------------------------------------------------------------------------
/requests/packages/chardet/gb2312prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from .mbcharsetprober import MultiByteCharSetProber
29 | from .codingstatemachine import CodingStateMachine
30 | from .chardistribution import GB2312DistributionAnalysis
31 | from .mbcssm import GB2312SMModel
32 |
33 | class GB2312Prober(MultiByteCharSetProber):
34 | def __init__(self):
35 | MultiByteCharSetProber.__init__(self)
36 | self._mCodingSM = CodingStateMachine(GB2312SMModel)
37 | self._mDistributionAnalyzer = GB2312DistributionAnalysis()
38 | self.reset()
39 |
40 | def get_charset_name(self):
41 | return "GB2312"
42 |
--------------------------------------------------------------------------------
/requests/packages/chardet/latin1prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from .charsetprober import CharSetProber
30 | from .constants import eNotMe
31 | from .compat import wrap_ord
32 |
33 | FREQ_CAT_NUM = 4
34 |
35 | UDF = 0 # undefined
36 | OTH = 1 # other
37 | ASC = 2 # ascii capital letter
38 | ASS = 3 # ascii small letter
39 | ACV = 4 # accent capital vowel
40 | ACO = 5 # accent capital other
41 | ASV = 6 # accent small vowel
42 | ASO = 7 # accent small other
43 | CLASS_NUM = 8 # total classes
44 |
45 | Latin1_CharToClass = (
46 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07
47 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F
48 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17
49 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F
50 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27
51 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F
52 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37
53 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F
54 | OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47
55 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F
56 | ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57
57 | ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F
58 | OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67
59 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F
60 | ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77
61 | ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F
62 | OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, # 80 - 87
63 | OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, # 88 - 8F
64 | UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 90 - 97
65 | OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, # 98 - 9F
66 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A0 - A7
67 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # A8 - AF
68 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7
69 | OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B8 - BF
70 | ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, # C0 - C7
71 | ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # C8 - CF
72 | ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, # D0 - D7
73 | ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, # D8 - DF
74 | ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, # E0 - E7
75 | ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
76 | ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
77 | ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
78 | )
79 |
80 | # 0 : illegal
81 | # 1 : very unlikely
82 | # 2 : normal
83 | # 3 : very likely
84 | Latin1ClassModel = (
85 | # UDF OTH ASC ASS ACV ACO ASV ASO
86 | 0, 0, 0, 0, 0, 0, 0, 0, # UDF
87 | 0, 3, 3, 3, 3, 3, 3, 3, # OTH
88 | 0, 3, 3, 3, 3, 3, 3, 3, # ASC
89 | 0, 3, 3, 3, 1, 1, 3, 3, # ASS
90 | 0, 3, 3, 3, 1, 2, 1, 2, # ACV
91 | 0, 3, 3, 3, 3, 3, 3, 3, # ACO
92 | 0, 3, 1, 3, 1, 1, 1, 3, # ASV
93 | 0, 3, 1, 3, 1, 1, 3, 3, # ASO
94 | )
95 |
96 |
97 | class Latin1Prober(CharSetProber):
98 | def __init__(self):
99 | CharSetProber.__init__(self)
100 | self.reset()
101 |
102 | def reset(self):
103 | self._mLastCharClass = OTH
104 | self._mFreqCounter = [0] * FREQ_CAT_NUM
105 | CharSetProber.reset(self)
106 |
107 | def get_charset_name(self):
108 | return "windows-1252"
109 |
110 | def feed(self, aBuf):
111 | aBuf = self.filter_with_english_letters(aBuf)
112 | for c in aBuf:
113 | charClass = Latin1_CharToClass[wrap_ord(c)]
114 | freq = Latin1ClassModel[(self._mLastCharClass * CLASS_NUM)
115 | + charClass]
116 | if freq == 0:
117 | self._mState = eNotMe
118 | break
119 | self._mFreqCounter[freq] += 1
120 | self._mLastCharClass = charClass
121 |
122 | return self.get_state()
123 |
124 | def get_confidence(self):
125 | if self.get_state() == eNotMe:
126 | return 0.01
127 |
128 | total = sum(self._mFreqCounter)
129 | if total < 0.01:
130 | confidence = 0.0
131 | else:
132 | confidence = ((self._mFreqCounter[3] - self._mFreqCounter[1] * 20.0)
133 | / total)
134 | if confidence < 0.0:
135 | confidence = 0.0
136 | # lower the confidence of latin1 so that other more accurate
137 | # detector can take priority.
138 | confidence = confidence * 0.73
139 | return confidence
140 |
--------------------------------------------------------------------------------
/requests/packages/chardet/mbcharsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | import sys
31 | from . import constants
32 | from .charsetprober import CharSetProber
33 |
34 |
35 | class MultiByteCharSetProber(CharSetProber):
36 | def __init__(self):
37 | CharSetProber.__init__(self)
38 | self._mDistributionAnalyzer = None
39 | self._mCodingSM = None
40 | self._mLastChar = [0, 0]
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | if self._mCodingSM:
45 | self._mCodingSM.reset()
46 | if self._mDistributionAnalyzer:
47 | self._mDistributionAnalyzer.reset()
48 | self._mLastChar = [0, 0]
49 |
50 | def get_charset_name(self):
51 | pass
52 |
53 | def feed(self, aBuf):
54 | aLen = len(aBuf)
55 | for i in range(0, aLen):
56 | codingState = self._mCodingSM.next_state(aBuf[i])
57 | if codingState == constants.eError:
58 | if constants._debug:
59 | sys.stderr.write(self.get_charset_name()
60 | + ' prober hit error at byte ' + str(i)
61 | + '\n')
62 | self._mState = constants.eNotMe
63 | break
64 | elif codingState == constants.eItsMe:
65 | self._mState = constants.eFoundIt
66 | break
67 | elif codingState == constants.eStart:
68 | charLen = self._mCodingSM.get_current_charlen()
69 | if i == 0:
70 | self._mLastChar[1] = aBuf[0]
71 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
72 | else:
73 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
74 | charLen)
75 |
76 | self._mLastChar[0] = aBuf[aLen - 1]
77 |
78 | if self.get_state() == constants.eDetecting:
79 | if (self._mDistributionAnalyzer.got_enough_data() and
80 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
81 | self._mState = constants.eFoundIt
82 |
83 | return self.get_state()
84 |
85 | def get_confidence(self):
86 | return self._mDistributionAnalyzer.get_confidence()
87 |
--------------------------------------------------------------------------------
/requests/packages/chardet/mbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | # Proofpoint, Inc.
13 | #
14 | # This library is free software; you can redistribute it and/or
15 | # modify it under the terms of the GNU Lesser General Public
16 | # License as published by the Free Software Foundation; either
17 | # version 2.1 of the License, or (at your option) any later version.
18 | #
19 | # This library is distributed in the hope that it will be useful,
20 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
21 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 | # Lesser General Public License for more details.
23 | #
24 | # You should have received a copy of the GNU Lesser General Public
25 | # License along with this library; if not, write to the Free Software
26 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27 | # 02110-1301 USA
28 | ######################### END LICENSE BLOCK #########################
29 |
30 | from .charsetgroupprober import CharSetGroupProber
31 | from .utf8prober import UTF8Prober
32 | from .sjisprober import SJISProber
33 | from .eucjpprober import EUCJPProber
34 | from .gb2312prober import GB2312Prober
35 | from .euckrprober import EUCKRProber
36 | from .cp949prober import CP949Prober
37 | from .big5prober import Big5Prober
38 | from .euctwprober import EUCTWProber
39 |
40 |
41 | class MBCSGroupProber(CharSetGroupProber):
42 | def __init__(self):
43 | CharSetGroupProber.__init__(self)
44 | self._mProbers = [
45 | UTF8Prober(),
46 | SJISProber(),
47 | EUCJPProber(),
48 | GB2312Prober(),
49 | EUCKRProber(),
50 | CP949Prober(),
51 | Big5Prober(),
52 | EUCTWProber()
53 | ]
54 | self.reset()
55 |
--------------------------------------------------------------------------------
/requests/packages/chardet/sbcharsetprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | import sys
30 | from . import constants
31 | from .charsetprober import CharSetProber
32 | from .compat import wrap_ord
33 |
34 | SAMPLE_SIZE = 64
35 | SB_ENOUGH_REL_THRESHOLD = 1024
36 | POSITIVE_SHORTCUT_THRESHOLD = 0.95
37 | NEGATIVE_SHORTCUT_THRESHOLD = 0.05
38 | SYMBOL_CAT_ORDER = 250
39 | NUMBER_OF_SEQ_CAT = 4
40 | POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1
41 | #NEGATIVE_CAT = 0
42 |
43 |
44 | class SingleByteCharSetProber(CharSetProber):
45 | def __init__(self, model, reversed=False, nameProber=None):
46 | CharSetProber.__init__(self)
47 | self._mModel = model
48 | # TRUE if we need to reverse every pair in the model lookup
49 | self._mReversed = reversed
50 | # Optional auxiliary prober for name decision
51 | self._mNameProber = nameProber
52 | self.reset()
53 |
54 | def reset(self):
55 | CharSetProber.reset(self)
56 | # char order of last character
57 | self._mLastOrder = 255
58 | self._mSeqCounters = [0] * NUMBER_OF_SEQ_CAT
59 | self._mTotalSeqs = 0
60 | self._mTotalChar = 0
61 | # characters that fall in our sampling range
62 | self._mFreqChar = 0
63 |
64 | def get_charset_name(self):
65 | if self._mNameProber:
66 | return self._mNameProber.get_charset_name()
67 | else:
68 | return self._mModel['charsetName']
69 |
70 | def feed(self, aBuf):
71 | if not self._mModel['keepEnglishLetter']:
72 | aBuf = self.filter_without_english_letters(aBuf)
73 | aLen = len(aBuf)
74 | if not aLen:
75 | return self.get_state()
76 | for c in aBuf:
77 | order = self._mModel['charToOrderMap'][wrap_ord(c)]
78 | if order < SYMBOL_CAT_ORDER:
79 | self._mTotalChar += 1
80 | if order < SAMPLE_SIZE:
81 | self._mFreqChar += 1
82 | if self._mLastOrder < SAMPLE_SIZE:
83 | self._mTotalSeqs += 1
84 | if not self._mReversed:
85 | i = (self._mLastOrder * SAMPLE_SIZE) + order
86 | model = self._mModel['precedenceMatrix'][i]
87 | else: # reverse the order of the letters in the lookup
88 | i = (order * SAMPLE_SIZE) + self._mLastOrder
89 | model = self._mModel['precedenceMatrix'][i]
90 | self._mSeqCounters[model] += 1
91 | self._mLastOrder = order
92 |
93 | if self.get_state() == constants.eDetecting:
94 | if self._mTotalSeqs > SB_ENOUGH_REL_THRESHOLD:
95 | cf = self.get_confidence()
96 | if cf > POSITIVE_SHORTCUT_THRESHOLD:
97 | if constants._debug:
98 | sys.stderr.write('%s confidence = %s, we have a'
99 | 'winner\n' %
100 | (self._mModel['charsetName'], cf))
101 | self._mState = constants.eFoundIt
102 | elif cf < NEGATIVE_SHORTCUT_THRESHOLD:
103 | if constants._debug:
104 | sys.stderr.write('%s confidence = %s, below negative'
105 | 'shortcut threshhold %s\n' %
106 | (self._mModel['charsetName'], cf,
107 | NEGATIVE_SHORTCUT_THRESHOLD))
108 | self._mState = constants.eNotMe
109 |
110 | return self.get_state()
111 |
112 | def get_confidence(self):
113 | r = 0.01
114 | if self._mTotalSeqs > 0:
115 | r = ((1.0 * self._mSeqCounters[POSITIVE_CAT]) / self._mTotalSeqs
116 | / self._mModel['mTypicalPositiveRatio'])
117 | r = r * self._mFreqChar / self._mTotalChar
118 | if r >= 1.0:
119 | r = 0.99
120 | return r
121 |
--------------------------------------------------------------------------------
/requests/packages/chardet/sbcsgroupprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from .charsetgroupprober import CharSetGroupProber
30 | from .sbcharsetprober import SingleByteCharSetProber
31 | from .langcyrillicmodel import (Win1251CyrillicModel, Koi8rModel,
32 | Latin5CyrillicModel, MacCyrillicModel,
33 | Ibm866Model, Ibm855Model)
34 | from .langgreekmodel import Latin7GreekModel, Win1253GreekModel
35 | from .langbulgarianmodel import Latin5BulgarianModel, Win1251BulgarianModel
36 | from .langhungarianmodel import Latin2HungarianModel, Win1250HungarianModel
37 | from .langthaimodel import TIS620ThaiModel
38 | from .langhebrewmodel import Win1255HebrewModel
39 | from .hebrewprober import HebrewProber
40 |
41 |
42 | class SBCSGroupProber(CharSetGroupProber):
43 | def __init__(self):
44 | CharSetGroupProber.__init__(self)
45 | self._mProbers = [
46 | SingleByteCharSetProber(Win1251CyrillicModel),
47 | SingleByteCharSetProber(Koi8rModel),
48 | SingleByteCharSetProber(Latin5CyrillicModel),
49 | SingleByteCharSetProber(MacCyrillicModel),
50 | SingleByteCharSetProber(Ibm866Model),
51 | SingleByteCharSetProber(Ibm855Model),
52 | SingleByteCharSetProber(Latin7GreekModel),
53 | SingleByteCharSetProber(Win1253GreekModel),
54 | SingleByteCharSetProber(Latin5BulgarianModel),
55 | SingleByteCharSetProber(Win1251BulgarianModel),
56 | SingleByteCharSetProber(Latin2HungarianModel),
57 | SingleByteCharSetProber(Win1250HungarianModel),
58 | SingleByteCharSetProber(TIS620ThaiModel),
59 | ]
60 | hebrewProber = HebrewProber()
61 | logicalHebrewProber = SingleByteCharSetProber(Win1255HebrewModel,
62 | False, hebrewProber)
63 | visualHebrewProber = SingleByteCharSetProber(Win1255HebrewModel, True,
64 | hebrewProber)
65 | hebrewProber.set_model_probers(logicalHebrewProber, visualHebrewProber)
66 | self._mProbers.extend([hebrewProber, logicalHebrewProber,
67 | visualHebrewProber])
68 |
69 | self.reset()
70 |
--------------------------------------------------------------------------------
/requests/packages/chardet/sjisprober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | import sys
29 | from .mbcharsetprober import MultiByteCharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .chardistribution import SJISDistributionAnalysis
32 | from .jpcntx import SJISContextAnalysis
33 | from .mbcssm import SJISSMModel
34 | from . import constants
35 |
36 |
37 | class SJISProber(MultiByteCharSetProber):
38 | def __init__(self):
39 | MultiByteCharSetProber.__init__(self)
40 | self._mCodingSM = CodingStateMachine(SJISSMModel)
41 | self._mDistributionAnalyzer = SJISDistributionAnalysis()
42 | self._mContextAnalyzer = SJISContextAnalysis()
43 | self.reset()
44 |
45 | def reset(self):
46 | MultiByteCharSetProber.reset(self)
47 | self._mContextAnalyzer.reset()
48 |
49 | def get_charset_name(self):
50 | return self._mContextAnalyzer.get_charset_name()
51 |
52 | def feed(self, aBuf):
53 | aLen = len(aBuf)
54 | for i in range(0, aLen):
55 | codingState = self._mCodingSM.next_state(aBuf[i])
56 | if codingState == constants.eError:
57 | if constants._debug:
58 | sys.stderr.write(self.get_charset_name()
59 | + ' prober hit error at byte ' + str(i)
60 | + '\n')
61 | self._mState = constants.eNotMe
62 | break
63 | elif codingState == constants.eItsMe:
64 | self._mState = constants.eFoundIt
65 | break
66 | elif codingState == constants.eStart:
67 | charLen = self._mCodingSM.get_current_charlen()
68 | if i == 0:
69 | self._mLastChar[1] = aBuf[0]
70 | self._mContextAnalyzer.feed(self._mLastChar[2 - charLen:],
71 | charLen)
72 | self._mDistributionAnalyzer.feed(self._mLastChar, charLen)
73 | else:
74 | self._mContextAnalyzer.feed(aBuf[i + 1 - charLen:i + 3
75 | - charLen], charLen)
76 | self._mDistributionAnalyzer.feed(aBuf[i - 1:i + 1],
77 | charLen)
78 |
79 | self._mLastChar[0] = aBuf[aLen - 1]
80 |
81 | if self.get_state() == constants.eDetecting:
82 | if (self._mContextAnalyzer.got_enough_data() and
83 | (self.get_confidence() > constants.SHORTCUT_THRESHOLD)):
84 | self._mState = constants.eFoundIt
85 |
86 | return self.get_state()
87 |
88 | def get_confidence(self):
89 | contxtCf = self._mContextAnalyzer.get_confidence()
90 | distribCf = self._mDistributionAnalyzer.get_confidence()
91 | return max(contxtCf, distribCf)
92 |
--------------------------------------------------------------------------------
/requests/packages/chardet/universaldetector.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is Mozilla Universal charset detector code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 2001
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | # Shy Shalom - original C code
12 | #
13 | # This library is free software; you can redistribute it and/or
14 | # modify it under the terms of the GNU Lesser General Public
15 | # License as published by the Free Software Foundation; either
16 | # version 2.1 of the License, or (at your option) any later version.
17 | #
18 | # This library is distributed in the hope that it will be useful,
19 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
20 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 | # Lesser General Public License for more details.
22 | #
23 | # You should have received a copy of the GNU Lesser General Public
24 | # License along with this library; if not, write to the Free Software
25 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
26 | # 02110-1301 USA
27 | ######################### END LICENSE BLOCK #########################
28 |
29 | from . import constants
30 | import sys
31 | import codecs
32 | from .latin1prober import Latin1Prober # windows-1252
33 | from .mbcsgroupprober import MBCSGroupProber # multi-byte character sets
34 | from .sbcsgroupprober import SBCSGroupProber # single-byte character sets
35 | from .escprober import EscCharSetProber # ISO-2122, etc.
36 | import re
37 |
38 | MINIMUM_THRESHOLD = 0.20
39 | ePureAscii = 0
40 | eEscAscii = 1
41 | eHighbyte = 2
42 |
43 |
44 | class UniversalDetector:
45 | def __init__(self):
46 | self._highBitDetector = re.compile(b'[\x80-\xFF]')
47 | self._escDetector = re.compile(b'(\033|~{)')
48 | self._mEscCharSetProber = None
49 | self._mCharSetProbers = []
50 | self.reset()
51 |
52 | def reset(self):
53 | self.result = {'encoding': None, 'confidence': 0.0}
54 | self.done = False
55 | self._mStart = True
56 | self._mGotData = False
57 | self._mInputState = ePureAscii
58 | self._mLastChar = b''
59 | if self._mEscCharSetProber:
60 | self._mEscCharSetProber.reset()
61 | for prober in self._mCharSetProbers:
62 | prober.reset()
63 |
64 | def feed(self, aBuf):
65 | if self.done:
66 | return
67 |
68 | aLen = len(aBuf)
69 | if not aLen:
70 | return
71 |
72 | if not self._mGotData:
73 | # If the data starts with BOM, we know it is UTF
74 | if aBuf[:3] == codecs.BOM_UTF8:
75 | # EF BB BF UTF-8 with BOM
76 | self.result = {'encoding': "UTF-8-SIG", 'confidence': 1.0}
77 | elif aBuf[:4] == codecs.BOM_UTF32_LE:
78 | # FF FE 00 00 UTF-32, little-endian BOM
79 | self.result = {'encoding': "UTF-32LE", 'confidence': 1.0}
80 | elif aBuf[:4] == codecs.BOM_UTF32_BE:
81 | # 00 00 FE FF UTF-32, big-endian BOM
82 | self.result = {'encoding': "UTF-32BE", 'confidence': 1.0}
83 | elif aBuf[:4] == b'\xFE\xFF\x00\x00':
84 | # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
85 | self.result = {
86 | 'encoding': "X-ISO-10646-UCS-4-3412",
87 | 'confidence': 1.0
88 | }
89 | elif aBuf[:4] == b'\x00\x00\xFF\xFE':
90 | # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
91 | self.result = {
92 | 'encoding': "X-ISO-10646-UCS-4-2143",
93 | 'confidence': 1.0
94 | }
95 | elif aBuf[:2] == codecs.BOM_LE:
96 | # FF FE UTF-16, little endian BOM
97 | self.result = {'encoding': "UTF-16LE", 'confidence': 1.0}
98 | elif aBuf[:2] == codecs.BOM_BE:
99 | # FE FF UTF-16, big endian BOM
100 | self.result = {'encoding': "UTF-16BE", 'confidence': 1.0}
101 |
102 | self._mGotData = True
103 | if self.result['encoding'] and (self.result['confidence'] > 0.0):
104 | self.done = True
105 | return
106 |
107 | if self._mInputState == ePureAscii:
108 | if self._highBitDetector.search(aBuf):
109 | self._mInputState = eHighbyte
110 | elif ((self._mInputState == ePureAscii) and
111 | self._escDetector.search(self._mLastChar + aBuf)):
112 | self._mInputState = eEscAscii
113 |
114 | self._mLastChar = aBuf[-1:]
115 |
116 | if self._mInputState == eEscAscii:
117 | if not self._mEscCharSetProber:
118 | self._mEscCharSetProber = EscCharSetProber()
119 | if self._mEscCharSetProber.feed(aBuf) == constants.eFoundIt:
120 | self.result = {'encoding': self._mEscCharSetProber.get_charset_name(),
121 | 'confidence': self._mEscCharSetProber.get_confidence()}
122 | self.done = True
123 | elif self._mInputState == eHighbyte:
124 | if not self._mCharSetProbers:
125 | self._mCharSetProbers = [MBCSGroupProber(), SBCSGroupProber(),
126 | Latin1Prober()]
127 | for prober in self._mCharSetProbers:
128 | if prober.feed(aBuf) == constants.eFoundIt:
129 | self.result = {'encoding': prober.get_charset_name(),
130 | 'confidence': prober.get_confidence()}
131 | self.done = True
132 | break
133 |
134 | def close(self):
135 | if self.done:
136 | return
137 | if not self._mGotData:
138 | if constants._debug:
139 | sys.stderr.write('no data received!\n')
140 | return
141 | self.done = True
142 |
143 | if self._mInputState == ePureAscii:
144 | self.result = {'encoding': 'ascii', 'confidence': 1.0}
145 | return self.result
146 |
147 | if self._mInputState == eHighbyte:
148 | proberConfidence = None
149 | maxProberConfidence = 0.0
150 | maxProber = None
151 | for prober in self._mCharSetProbers:
152 | if not prober:
153 | continue
154 | proberConfidence = prober.get_confidence()
155 | if proberConfidence > maxProberConfidence:
156 | maxProberConfidence = proberConfidence
157 | maxProber = prober
158 | if maxProber and (maxProberConfidence > MINIMUM_THRESHOLD):
159 | self.result = {'encoding': maxProber.get_charset_name(),
160 | 'confidence': maxProber.get_confidence()}
161 | return self.result
162 |
163 | if constants._debug:
164 | sys.stderr.write('no probers hit minimum threshhold\n')
165 | for prober in self._mCharSetProbers[0].mProbers:
166 | if not prober:
167 | continue
168 | sys.stderr.write('%s confidence = %s\n' %
169 | (prober.get_charset_name(),
170 | prober.get_confidence()))
171 |
--------------------------------------------------------------------------------
/requests/packages/chardet/utf8prober.py:
--------------------------------------------------------------------------------
1 | ######################## BEGIN LICENSE BLOCK ########################
2 | # The Original Code is mozilla.org code.
3 | #
4 | # The Initial Developer of the Original Code is
5 | # Netscape Communications Corporation.
6 | # Portions created by the Initial Developer are Copyright (C) 1998
7 | # the Initial Developer. All Rights Reserved.
8 | #
9 | # Contributor(s):
10 | # Mark Pilgrim - port to Python
11 | #
12 | # This library is free software; you can redistribute it and/or
13 | # modify it under the terms of the GNU Lesser General Public
14 | # License as published by the Free Software Foundation; either
15 | # version 2.1 of the License, or (at your option) any later version.
16 | #
17 | # This library is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 | # Lesser General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Lesser General Public
23 | # License along with this library; if not, write to the Free Software
24 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
25 | # 02110-1301 USA
26 | ######################### END LICENSE BLOCK #########################
27 |
28 | from . import constants
29 | from .charsetprober import CharSetProber
30 | from .codingstatemachine import CodingStateMachine
31 | from .mbcssm import UTF8SMModel
32 |
33 | ONE_CHAR_PROB = 0.5
34 |
35 |
36 | class UTF8Prober(CharSetProber):
37 | def __init__(self):
38 | CharSetProber.__init__(self)
39 | self._mCodingSM = CodingStateMachine(UTF8SMModel)
40 | self.reset()
41 |
42 | def reset(self):
43 | CharSetProber.reset(self)
44 | self._mCodingSM.reset()
45 | self._mNumOfMBChar = 0
46 |
47 | def get_charset_name(self):
48 | return "utf-8"
49 |
50 | def feed(self, aBuf):
51 | for c in aBuf:
52 | codingState = self._mCodingSM.next_state(c)
53 | if codingState == constants.eError:
54 | self._mState = constants.eNotMe
55 | break
56 | elif codingState == constants.eItsMe:
57 | self._mState = constants.eFoundIt
58 | break
59 | elif codingState == constants.eStart:
60 | if self._mCodingSM.get_current_charlen() >= 2:
61 | self._mNumOfMBChar += 1
62 |
63 | if self.get_state() == constants.eDetecting:
64 | if self.get_confidence() > constants.SHORTCUT_THRESHOLD:
65 | self._mState = constants.eFoundIt
66 |
67 | return self.get_state()
68 |
69 | def get_confidence(self):
70 | unlike = 0.99
71 | if self._mNumOfMBChar < 6:
72 | for i in range(0, self._mNumOfMBChar):
73 | unlike = unlike * ONE_CHAR_PROB
74 | return 1.0 - unlike
75 | else:
76 | return unlike
77 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | urllib3 - Thread-safe connection pooling and re-using.
3 | """
4 |
5 | __author__ = 'Andrey Petrov (andrey.petrov@shazow.net)'
6 | __license__ = 'MIT'
7 | __version__ = 'dev'
8 |
9 |
10 | from .connectionpool import (
11 | HTTPConnectionPool,
12 | HTTPSConnectionPool,
13 | connection_from_url
14 | )
15 |
16 | from . import exceptions
17 | from .filepost import encode_multipart_formdata
18 | from .poolmanager import PoolManager, ProxyManager, proxy_from_url
19 | from .response import HTTPResponse
20 | from .util.request import make_headers
21 | from .util.url import get_host
22 | from .util.timeout import Timeout
23 | from .util.retry import Retry
24 |
25 |
26 | # Set default logging handler to avoid "No handler found" warnings.
27 | import logging
28 | try: # Python 2.7+
29 | from logging import NullHandler
30 | except ImportError:
31 | class NullHandler(logging.Handler):
32 | def emit(self, record):
33 | pass
34 |
35 | logging.getLogger(__name__).addHandler(NullHandler())
36 |
37 | def add_stderr_logger(level=logging.DEBUG):
38 | """
39 | Helper for quickly adding a StreamHandler to the logger. Useful for
40 | debugging.
41 |
42 | Returns the handler after adding it.
43 | """
44 | # This method needs to be in this __init__.py to get the __name__ correct
45 | # even if urllib3 is vendored within another package.
46 | logger = logging.getLogger(__name__)
47 | handler = logging.StreamHandler()
48 | handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
49 | logger.addHandler(handler)
50 | logger.setLevel(level)
51 | logger.debug('Added a stderr logging handler to logger: %s' % __name__)
52 | return handler
53 |
54 | # ... Clean up.
55 | del NullHandler
56 |
57 |
58 | # Set security warning to only go off once by default.
59 | import warnings
60 | warnings.simplefilter('always', exceptions.SecurityWarning)
61 |
62 | def disable_warnings(category=exceptions.HTTPWarning):
63 | """
64 | Helper for quickly disabling all urllib3 warnings.
65 | """
66 | warnings.simplefilter('ignore', category)
67 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/_collections.py:
--------------------------------------------------------------------------------
1 | from collections import Mapping, MutableMapping
2 | try:
3 | from threading import RLock
4 | except ImportError: # Platform-specific: No threads available
5 | class RLock:
6 | def __enter__(self):
7 | pass
8 |
9 | def __exit__(self, exc_type, exc_value, traceback):
10 | pass
11 |
12 |
13 | try: # Python 2.7+
14 | from collections import OrderedDict
15 | except ImportError:
16 | from .packages.ordered_dict import OrderedDict
17 | from .packages.six import iterkeys, itervalues
18 |
19 |
20 | __all__ = ['RecentlyUsedContainer', 'HTTPHeaderDict']
21 |
22 |
23 | _Null = object()
24 |
25 |
26 | class RecentlyUsedContainer(MutableMapping):
27 | """
28 | Provides a thread-safe dict-like container which maintains up to
29 | ``maxsize`` keys while throwing away the least-recently-used keys beyond
30 | ``maxsize``.
31 |
32 | :param maxsize:
33 | Maximum number of recent elements to retain.
34 |
35 | :param dispose_func:
36 | Every time an item is evicted from the container,
37 | ``dispose_func(value)`` is called. Callback which will get called
38 | """
39 |
40 | ContainerCls = OrderedDict
41 |
42 | def __init__(self, maxsize=10, dispose_func=None):
43 | self._maxsize = maxsize
44 | self.dispose_func = dispose_func
45 |
46 | self._container = self.ContainerCls()
47 | self.lock = RLock()
48 |
49 | def __getitem__(self, key):
50 | # Re-insert the item, moving it to the end of the eviction line.
51 | with self.lock:
52 | item = self._container.pop(key)
53 | self._container[key] = item
54 | return item
55 |
56 | def __setitem__(self, key, value):
57 | evicted_value = _Null
58 | with self.lock:
59 | # Possibly evict the existing value of 'key'
60 | evicted_value = self._container.get(key, _Null)
61 | self._container[key] = value
62 |
63 | # If we didn't evict an existing value, we might have to evict the
64 | # least recently used item from the beginning of the container.
65 | if len(self._container) > self._maxsize:
66 | _key, evicted_value = self._container.popitem(last=False)
67 |
68 | if self.dispose_func and evicted_value is not _Null:
69 | self.dispose_func(evicted_value)
70 |
71 | def __delitem__(self, key):
72 | with self.lock:
73 | value = self._container.pop(key)
74 |
75 | if self.dispose_func:
76 | self.dispose_func(value)
77 |
78 | def __len__(self):
79 | with self.lock:
80 | return len(self._container)
81 |
82 | def __iter__(self):
83 | raise NotImplementedError('Iteration over this class is unlikely to be threadsafe.')
84 |
85 | def clear(self):
86 | with self.lock:
87 | # Copy pointers to all values, then wipe the mapping
88 | values = list(itervalues(self._container))
89 | self._container.clear()
90 |
91 | if self.dispose_func:
92 | for value in values:
93 | self.dispose_func(value)
94 |
95 | def keys(self):
96 | with self.lock:
97 | return list(iterkeys(self._container))
98 |
99 |
100 | class HTTPHeaderDict(MutableMapping):
101 | """
102 | :param headers:
103 | An iterable of field-value pairs. Must not contain multiple field names
104 | when compared case-insensitively.
105 |
106 | :param kwargs:
107 | Additional field-value pairs to pass in to ``dict.update``.
108 |
109 | A ``dict`` like container for storing HTTP Headers.
110 |
111 | Field names are stored and compared case-insensitively in compliance with
112 | RFC 7230. Iteration provides the first case-sensitive key seen for each
113 | case-insensitive pair.
114 |
115 | Using ``__setitem__`` syntax overwrites fields that compare equal
116 | case-insensitively in order to maintain ``dict``'s api. For fields that
117 | compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
118 | in a loop.
119 |
120 | If multiple fields that are equal case-insensitively are passed to the
121 | constructor or ``.update``, the behavior is undefined and some will be
122 | lost.
123 |
124 | >>> headers = HTTPHeaderDict()
125 | >>> headers.add('Set-Cookie', 'foo=bar')
126 | >>> headers.add('set-cookie', 'baz=quxx')
127 | >>> headers['content-length'] = '7'
128 | >>> headers['SET-cookie']
129 | 'foo=bar, baz=quxx'
130 | >>> headers['Content-Length']
131 | '7'
132 |
133 | If you want to access the raw headers with their original casing
134 | for debugging purposes you can access the private ``._data`` attribute
135 | which is a normal python ``dict`` that maps the case-insensitive key to a
136 | list of tuples stored as (case-sensitive-original-name, value). Using the
137 | structure from above as our example:
138 |
139 | >>> headers._data
140 | {'set-cookie': [('Set-Cookie', 'foo=bar'), ('set-cookie', 'baz=quxx')],
141 | 'content-length': [('content-length', '7')]}
142 | """
143 |
144 | def __init__(self, headers=None, **kwargs):
145 | self._data = {}
146 | if headers is None:
147 | headers = {}
148 | self.update(headers, **kwargs)
149 |
150 | def add(self, key, value):
151 | """Adds a (name, value) pair, doesn't overwrite the value if it already
152 | exists.
153 |
154 | >>> headers = HTTPHeaderDict(foo='bar')
155 | >>> headers.add('Foo', 'baz')
156 | >>> headers['foo']
157 | 'bar, baz'
158 | """
159 | self._data.setdefault(key.lower(), []).append((key, value))
160 |
161 | def getlist(self, key):
162 | """Returns a list of all the values for the named field. Returns an
163 | empty list if the key doesn't exist."""
164 | return self[key].split(', ') if key in self else []
165 |
166 | def copy(self):
167 | h = HTTPHeaderDict()
168 | for key in self._data:
169 | for rawkey, value in self._data[key]:
170 | h.add(rawkey, value)
171 | return h
172 |
173 | def __eq__(self, other):
174 | if not isinstance(other, Mapping):
175 | return False
176 | other = HTTPHeaderDict(other)
177 | return dict((k1, self[k1]) for k1 in self._data) == \
178 | dict((k2, other[k2]) for k2 in other._data)
179 |
180 | def __getitem__(self, key):
181 | values = self._data[key.lower()]
182 | return ', '.join(value[1] for value in values)
183 |
184 | def __setitem__(self, key, value):
185 | self._data[key.lower()] = [(key, value)]
186 |
187 | def __delitem__(self, key):
188 | del self._data[key.lower()]
189 |
190 | def __len__(self):
191 | return len(self._data)
192 |
193 | def __iter__(self):
194 | for headers in itervalues(self._data):
195 | yield headers[0][0]
196 |
197 | def __repr__(self):
198 | return '%s(%r)' % (self.__class__.__name__, dict(self.items()))
199 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/connection.py:
--------------------------------------------------------------------------------
1 | import datetime
2 | import sys
3 | import socket
4 | from socket import timeout as SocketTimeout
5 | import warnings
6 | from .packages import six
7 |
8 | try: # Python 3
9 | from http.client import HTTPConnection as _HTTPConnection, HTTPException
10 | except ImportError:
11 | from httplib import HTTPConnection as _HTTPConnection, HTTPException
12 |
13 |
14 | class DummyConnection(object):
15 | "Used to detect a failed ConnectionCls import."
16 | pass
17 |
18 |
19 | try: # Compiled with SSL?
20 | HTTPSConnection = DummyConnection
21 | import ssl
22 | BaseSSLError = ssl.SSLError
23 | except (ImportError, AttributeError): # Platform-specific: No SSL.
24 | ssl = None
25 |
26 | class BaseSSLError(BaseException):
27 | pass
28 |
29 |
30 | try: # Python 3:
31 | # Not a no-op, we're adding this to the namespace so it can be imported.
32 | ConnectionError = ConnectionError
33 | except NameError: # Python 2:
34 | class ConnectionError(Exception):
35 | pass
36 |
37 |
38 | from .exceptions import (
39 | ConnectTimeoutError,
40 | SystemTimeWarning,
41 | SecurityWarning,
42 | )
43 | from .packages.ssl_match_hostname import match_hostname
44 |
45 | from .util.ssl_ import (
46 | resolve_cert_reqs,
47 | resolve_ssl_version,
48 | ssl_wrap_socket,
49 | assert_fingerprint,
50 | )
51 |
52 |
53 | from .util import connection
54 |
55 | port_by_scheme = {
56 | 'http': 80,
57 | 'https': 443,
58 | }
59 |
60 | RECENT_DATE = datetime.date(2014, 1, 1)
61 |
62 |
63 | class HTTPConnection(_HTTPConnection, object):
64 | """
65 | Based on httplib.HTTPConnection but provides an extra constructor
66 | backwards-compatibility layer between older and newer Pythons.
67 |
68 | Additional keyword parameters are used to configure attributes of the connection.
69 | Accepted parameters include:
70 |
71 | - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
72 | - ``source_address``: Set the source address for the current connection.
73 |
74 | .. note:: This is ignored for Python 2.6. It is only applied for 2.7 and 3.x
75 |
76 | - ``socket_options``: Set specific options on the underlying socket. If not specified, then
77 | defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
78 | Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
79 |
80 | For example, if you wish to enable TCP Keep Alive in addition to the defaults,
81 | you might pass::
82 |
83 | HTTPConnection.default_socket_options + [
84 | (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
85 | ]
86 |
87 | Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
88 | """
89 |
90 | default_port = port_by_scheme['http']
91 |
92 | #: Disable Nagle's algorithm by default.
93 | #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
94 | default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
95 |
96 | #: Whether this connection verifies the host's certificate.
97 | is_verified = False
98 |
99 | def __init__(self, *args, **kw):
100 | if six.PY3: # Python 3
101 | kw.pop('strict', None)
102 |
103 | # Pre-set source_address in case we have an older Python like 2.6.
104 | self.source_address = kw.get('source_address')
105 |
106 | if sys.version_info < (2, 7): # Python 2.6
107 | # _HTTPConnection on Python 2.6 will balk at this keyword arg, but
108 | # not newer versions. We can still use it when creating a
109 | # connection though, so we pop it *after* we have saved it as
110 | # self.source_address.
111 | kw.pop('source_address', None)
112 |
113 | #: The socket options provided by the user. If no options are
114 | #: provided, we use the default options.
115 | self.socket_options = kw.pop('socket_options', self.default_socket_options)
116 |
117 | # Superclass also sets self.source_address in Python 2.7+.
118 | _HTTPConnection.__init__(self, *args, **kw)
119 |
120 | def _new_conn(self):
121 | """ Establish a socket connection and set nodelay settings on it.
122 |
123 | :return: New socket connection.
124 | """
125 | extra_kw = {}
126 | if self.source_address:
127 | extra_kw['source_address'] = self.source_address
128 |
129 | if self.socket_options:
130 | extra_kw['socket_options'] = self.socket_options
131 |
132 | try:
133 | conn = connection.create_connection(
134 | (self.host, self.port), self.timeout, **extra_kw)
135 |
136 | except SocketTimeout:
137 | raise ConnectTimeoutError(
138 | self, "Connection to %s timed out. (connect timeout=%s)" %
139 | (self.host, self.timeout))
140 |
141 | return conn
142 |
143 | def _prepare_conn(self, conn):
144 | self.sock = conn
145 | # the _tunnel_host attribute was added in python 2.6.3 (via
146 | # http://hg.python.org/cpython/rev/0f57b30a152f) so pythons 2.6(0-2) do
147 | # not have them.
148 | if getattr(self, '_tunnel_host', None):
149 | # TODO: Fix tunnel so it doesn't depend on self.sock state.
150 | self._tunnel()
151 | # Mark this connection as not reusable
152 | self.auto_open = 0
153 |
154 | def connect(self):
155 | conn = self._new_conn()
156 | self._prepare_conn(conn)
157 |
158 |
159 | class HTTPSConnection(HTTPConnection):
160 | default_port = port_by_scheme['https']
161 |
162 | def __init__(self, host, port=None, key_file=None, cert_file=None,
163 | strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kw):
164 |
165 | HTTPConnection.__init__(self, host, port, strict=strict,
166 | timeout=timeout, **kw)
167 |
168 | self.key_file = key_file
169 | self.cert_file = cert_file
170 |
171 | # Required property for Google AppEngine 1.9.0 which otherwise causes
172 | # HTTPS requests to go out as HTTP. (See Issue #356)
173 | self._protocol = 'https'
174 |
175 | def connect(self):
176 | conn = self._new_conn()
177 | self._prepare_conn(conn)
178 | self.sock = ssl.wrap_socket(conn, self.key_file, self.cert_file)
179 |
180 |
181 | class VerifiedHTTPSConnection(HTTPSConnection):
182 | """
183 | Based on httplib.HTTPSConnection but wraps the socket with
184 | SSL certification.
185 | """
186 | cert_reqs = None
187 | ca_certs = None
188 | ssl_version = None
189 | assert_fingerprint = None
190 |
191 | def set_cert(self, key_file=None, cert_file=None,
192 | cert_reqs=None, ca_certs=None,
193 | assert_hostname=None, assert_fingerprint=None):
194 |
195 | self.key_file = key_file
196 | self.cert_file = cert_file
197 | self.cert_reqs = cert_reqs
198 | self.ca_certs = ca_certs
199 | self.assert_hostname = assert_hostname
200 | self.assert_fingerprint = assert_fingerprint
201 |
202 | def connect(self):
203 | # Add certificate verification
204 | conn = self._new_conn()
205 |
206 | resolved_cert_reqs = resolve_cert_reqs(self.cert_reqs)
207 | resolved_ssl_version = resolve_ssl_version(self.ssl_version)
208 |
209 | hostname = self.host
210 | if getattr(self, '_tunnel_host', None):
211 | # _tunnel_host was added in Python 2.6.3
212 | # (See: http://hg.python.org/cpython/rev/0f57b30a152f)
213 |
214 | self.sock = conn
215 | # Calls self._set_hostport(), so self.host is
216 | # self._tunnel_host below.
217 | self._tunnel()
218 | # Mark this connection as not reusable
219 | self.auto_open = 0
220 |
221 | # Override the host with the one we're requesting data from.
222 | hostname = self._tunnel_host
223 |
224 | is_time_off = datetime.date.today() < RECENT_DATE
225 | if is_time_off:
226 | warnings.warn((
227 | 'System time is way off (before {0}). This will probably '
228 | 'lead to SSL verification errors').format(RECENT_DATE),
229 | SystemTimeWarning
230 | )
231 |
232 | # Wrap socket using verification with the root certs in
233 | # trusted_root_certs
234 | self.sock = ssl_wrap_socket(conn, self.key_file, self.cert_file,
235 | cert_reqs=resolved_cert_reqs,
236 | ca_certs=self.ca_certs,
237 | server_hostname=hostname,
238 | ssl_version=resolved_ssl_version)
239 |
240 | if self.assert_fingerprint:
241 | assert_fingerprint(self.sock.getpeercert(binary_form=True),
242 | self.assert_fingerprint)
243 | elif resolved_cert_reqs != ssl.CERT_NONE \
244 | and self.assert_hostname is not False:
245 | cert = self.sock.getpeercert()
246 | if not cert.get('subjectAltName', ()):
247 | warnings.warn((
248 | 'Certificate has no `subjectAltName`, falling back to check for a `commonName` for now. '
249 | 'This feature is being removed by major browsers and deprecated by RFC 2818. '
250 | '(See https://github.com/shazow/urllib3/issues/497 for details.)'),
251 | SecurityWarning
252 | )
253 | match_hostname(cert, self.assert_hostname or hostname)
254 |
255 | self.is_verified = (resolved_cert_reqs == ssl.CERT_REQUIRED
256 | or self.assert_fingerprint is not None)
257 |
258 |
259 | if ssl:
260 | # Make a copy for testing.
261 | UnverifiedHTTPSConnection = HTTPSConnection
262 | HTTPSConnection = VerifiedHTTPSConnection
263 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ring04h/wytorproxy/0e61fd5cebd55231a915e5d633904582fddcf77f/requests/packages/urllib3/contrib/__init__.py
--------------------------------------------------------------------------------
/requests/packages/urllib3/contrib/ntlmpool.py:
--------------------------------------------------------------------------------
1 | """
2 | NTLM authenticating pool, contributed by erikcederstran
3 |
4 | Issue #10, see: http://code.google.com/p/urllib3/issues/detail?id=10
5 | """
6 |
7 | try:
8 | from http.client import HTTPSConnection
9 | except ImportError:
10 | from httplib import HTTPSConnection
11 | from logging import getLogger
12 | from ntlm import ntlm
13 |
14 | from urllib3 import HTTPSConnectionPool
15 |
16 |
17 | log = getLogger(__name__)
18 |
19 |
20 | class NTLMConnectionPool(HTTPSConnectionPool):
21 | """
22 | Implements an NTLM authentication version of an urllib3 connection pool
23 | """
24 |
25 | scheme = 'https'
26 |
27 | def __init__(self, user, pw, authurl, *args, **kwargs):
28 | """
29 | authurl is a random URL on the server that is protected by NTLM.
30 | user is the Windows user, probably in the DOMAIN\\username format.
31 | pw is the password for the user.
32 | """
33 | super(NTLMConnectionPool, self).__init__(*args, **kwargs)
34 | self.authurl = authurl
35 | self.rawuser = user
36 | user_parts = user.split('\\', 1)
37 | self.domain = user_parts[0].upper()
38 | self.user = user_parts[1]
39 | self.pw = pw
40 |
41 | def _new_conn(self):
42 | # Performs the NTLM handshake that secures the connection. The socket
43 | # must be kept open while requests are performed.
44 | self.num_connections += 1
45 | log.debug('Starting NTLM HTTPS connection no. %d: https://%s%s' %
46 | (self.num_connections, self.host, self.authurl))
47 |
48 | headers = {}
49 | headers['Connection'] = 'Keep-Alive'
50 | req_header = 'Authorization'
51 | resp_header = 'www-authenticate'
52 |
53 | conn = HTTPSConnection(host=self.host, port=self.port)
54 |
55 | # Send negotiation message
56 | headers[req_header] = (
57 | 'NTLM %s' % ntlm.create_NTLM_NEGOTIATE_MESSAGE(self.rawuser))
58 | log.debug('Request headers: %s' % headers)
59 | conn.request('GET', self.authurl, None, headers)
60 | res = conn.getresponse()
61 | reshdr = dict(res.getheaders())
62 | log.debug('Response status: %s %s' % (res.status, res.reason))
63 | log.debug('Response headers: %s' % reshdr)
64 | log.debug('Response data: %s [...]' % res.read(100))
65 |
66 | # Remove the reference to the socket, so that it can not be closed by
67 | # the response object (we want to keep the socket open)
68 | res.fp = None
69 |
70 | # Server should respond with a challenge message
71 | auth_header_values = reshdr[resp_header].split(', ')
72 | auth_header_value = None
73 | for s in auth_header_values:
74 | if s[:5] == 'NTLM ':
75 | auth_header_value = s[5:]
76 | if auth_header_value is None:
77 | raise Exception('Unexpected %s response header: %s' %
78 | (resp_header, reshdr[resp_header]))
79 |
80 | # Send authentication message
81 | ServerChallenge, NegotiateFlags = \
82 | ntlm.parse_NTLM_CHALLENGE_MESSAGE(auth_header_value)
83 | auth_msg = ntlm.create_NTLM_AUTHENTICATE_MESSAGE(ServerChallenge,
84 | self.user,
85 | self.domain,
86 | self.pw,
87 | NegotiateFlags)
88 | headers[req_header] = 'NTLM %s' % auth_msg
89 | log.debug('Request headers: %s' % headers)
90 | conn.request('GET', self.authurl, None, headers)
91 | res = conn.getresponse()
92 | log.debug('Response status: %s %s' % (res.status, res.reason))
93 | log.debug('Response headers: %s' % dict(res.getheaders()))
94 | log.debug('Response data: %s [...]' % res.read()[:100])
95 | if res.status != 200:
96 | if res.status == 401:
97 | raise Exception('Server rejected request: wrong '
98 | 'username or password')
99 | raise Exception('Wrong server response: %s %s' %
100 | (res.status, res.reason))
101 |
102 | res.fp = None
103 | log.debug('Connection established')
104 | return conn
105 |
106 | def urlopen(self, method, url, body=None, headers=None, retries=3,
107 | redirect=True, assert_same_host=True):
108 | if headers is None:
109 | headers = {}
110 | headers['Connection'] = 'Keep-Alive'
111 | return super(NTLMConnectionPool, self).urlopen(method, url, body,
112 | headers, retries,
113 | redirect,
114 | assert_same_host)
115 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/exceptions.py:
--------------------------------------------------------------------------------
1 |
2 | ## Base Exceptions
3 |
4 | class HTTPError(Exception):
5 | "Base exception used by this module."
6 | pass
7 |
8 | class HTTPWarning(Warning):
9 | "Base warning used by this module."
10 | pass
11 |
12 |
13 |
14 | class PoolError(HTTPError):
15 | "Base exception for errors caused within a pool."
16 | def __init__(self, pool, message):
17 | self.pool = pool
18 | HTTPError.__init__(self, "%s: %s" % (pool, message))
19 |
20 | def __reduce__(self):
21 | # For pickling purposes.
22 | return self.__class__, (None, None)
23 |
24 |
25 | class RequestError(PoolError):
26 | "Base exception for PoolErrors that have associated URLs."
27 | def __init__(self, pool, url, message):
28 | self.url = url
29 | PoolError.__init__(self, pool, message)
30 |
31 | def __reduce__(self):
32 | # For pickling purposes.
33 | return self.__class__, (None, self.url, None)
34 |
35 |
36 | class SSLError(HTTPError):
37 | "Raised when SSL certificate fails in an HTTPS connection."
38 | pass
39 |
40 |
41 | class ProxyError(HTTPError):
42 | "Raised when the connection to a proxy fails."
43 | pass
44 |
45 |
46 | class DecodeError(HTTPError):
47 | "Raised when automatic decoding based on Content-Type fails."
48 | pass
49 |
50 |
51 | class ProtocolError(HTTPError):
52 | "Raised when something unexpected happens mid-request/response."
53 | pass
54 |
55 |
56 | #: Renamed to ProtocolError but aliased for backwards compatibility.
57 | ConnectionError = ProtocolError
58 |
59 |
60 | ## Leaf Exceptions
61 |
62 | class MaxRetryError(RequestError):
63 | """Raised when the maximum number of retries is exceeded.
64 |
65 | :param pool: The connection pool
66 | :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
67 | :param string url: The requested Url
68 | :param exceptions.Exception reason: The underlying error
69 |
70 | """
71 |
72 | def __init__(self, pool, url, reason=None):
73 | self.reason = reason
74 |
75 | message = "Max retries exceeded with url: %s (Caused by %r)" % (
76 | url, reason)
77 |
78 | RequestError.__init__(self, pool, url, message)
79 |
80 |
81 | class HostChangedError(RequestError):
82 | "Raised when an existing pool gets a request for a foreign host."
83 |
84 | def __init__(self, pool, url, retries=3):
85 | message = "Tried to open a foreign host with url: %s" % url
86 | RequestError.__init__(self, pool, url, message)
87 | self.retries = retries
88 |
89 |
90 | class TimeoutStateError(HTTPError):
91 | """ Raised when passing an invalid state to a timeout """
92 | pass
93 |
94 |
95 | class TimeoutError(HTTPError):
96 | """ Raised when a socket timeout error occurs.
97 |
98 | Catching this error will catch both :exc:`ReadTimeoutErrors
99 | ` and :exc:`ConnectTimeoutErrors `.
100 | """
101 | pass
102 |
103 |
104 | class ReadTimeoutError(TimeoutError, RequestError):
105 | "Raised when a socket timeout occurs while receiving data from a server"
106 | pass
107 |
108 |
109 | # This timeout error does not have a URL attached and needs to inherit from the
110 | # base HTTPError
111 | class ConnectTimeoutError(TimeoutError):
112 | "Raised when a socket timeout occurs while connecting to a server"
113 | pass
114 |
115 |
116 | class EmptyPoolError(PoolError):
117 | "Raised when a pool runs out of connections and no more are allowed."
118 | pass
119 |
120 |
121 | class ClosedPoolError(PoolError):
122 | "Raised when a request enters a pool after the pool has been closed."
123 | pass
124 |
125 |
126 | class LocationValueError(ValueError, HTTPError):
127 | "Raised when there is something wrong with a given URL input."
128 | pass
129 |
130 |
131 | class LocationParseError(LocationValueError):
132 | "Raised when get_host or similar fails to parse the URL input."
133 |
134 | def __init__(self, location):
135 | message = "Failed to parse: %s" % location
136 | HTTPError.__init__(self, message)
137 |
138 | self.location = location
139 |
140 |
141 | class ResponseError(HTTPError):
142 | "Used as a container for an error reason supplied in a MaxRetryError."
143 | GENERIC_ERROR = 'too many error responses'
144 | SPECIFIC_ERROR = 'too many {status_code} error responses'
145 |
146 |
147 | class SecurityWarning(HTTPWarning):
148 | "Warned when perfoming security reducing actions"
149 | pass
150 |
151 |
152 | class InsecureRequestWarning(SecurityWarning):
153 | "Warned when making an unverified HTTPS request."
154 | pass
155 |
156 |
157 | class SystemTimeWarning(SecurityWarning):
158 | "Warned when system time is suspected to be wrong"
159 | pass
160 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/fields.py:
--------------------------------------------------------------------------------
1 | import email.utils
2 | import mimetypes
3 |
4 | from .packages import six
5 |
6 |
7 | def guess_content_type(filename, default='application/octet-stream'):
8 | """
9 | Guess the "Content-Type" of a file.
10 |
11 | :param filename:
12 | The filename to guess the "Content-Type" of using :mod:`mimetypes`.
13 | :param default:
14 | If no "Content-Type" can be guessed, default to `default`.
15 | """
16 | if filename:
17 | return mimetypes.guess_type(filename)[0] or default
18 | return default
19 |
20 |
21 | def format_header_param(name, value):
22 | """
23 | Helper function to format and quote a single header parameter.
24 |
25 | Particularly useful for header parameters which might contain
26 | non-ASCII values, like file names. This follows RFC 2231, as
27 | suggested by RFC 2388 Section 4.4.
28 |
29 | :param name:
30 | The name of the parameter, a string expected to be ASCII only.
31 | :param value:
32 | The value of the parameter, provided as a unicode string.
33 | """
34 | if not any(ch in value for ch in '"\\\r\n'):
35 | result = '%s="%s"' % (name, value)
36 | try:
37 | result.encode('ascii')
38 | except UnicodeEncodeError:
39 | pass
40 | else:
41 | return result
42 | if not six.PY3: # Python 2:
43 | value = value.encode('utf-8')
44 | value = email.utils.encode_rfc2231(value, 'utf-8')
45 | value = '%s*=%s' % (name, value)
46 | return value
47 |
48 |
49 | class RequestField(object):
50 | """
51 | A data container for request body parameters.
52 |
53 | :param name:
54 | The name of this request field.
55 | :param data:
56 | The data/value body.
57 | :param filename:
58 | An optional filename of the request field.
59 | :param headers:
60 | An optional dict-like object of headers to initially use for the field.
61 | """
62 | def __init__(self, name, data, filename=None, headers=None):
63 | self._name = name
64 | self._filename = filename
65 | self.data = data
66 | self.headers = {}
67 | if headers:
68 | self.headers = dict(headers)
69 |
70 | @classmethod
71 | def from_tuples(cls, fieldname, value):
72 | """
73 | A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
74 |
75 | Supports constructing :class:`~urllib3.fields.RequestField` from
76 | parameter of key/value strings AND key/filetuple. A filetuple is a
77 | (filename, data, MIME type) tuple where the MIME type is optional.
78 | For example::
79 |
80 | 'foo': 'bar',
81 | 'fakefile': ('foofile.txt', 'contents of foofile'),
82 | 'realfile': ('barfile.txt', open('realfile').read()),
83 | 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
84 | 'nonamefile': 'contents of nonamefile field',
85 |
86 | Field names and filenames must be unicode.
87 | """
88 | if isinstance(value, tuple):
89 | if len(value) == 3:
90 | filename, data, content_type = value
91 | else:
92 | filename, data = value
93 | content_type = guess_content_type(filename)
94 | else:
95 | filename = None
96 | content_type = None
97 | data = value
98 |
99 | request_param = cls(fieldname, data, filename=filename)
100 | request_param.make_multipart(content_type=content_type)
101 |
102 | return request_param
103 |
104 | def _render_part(self, name, value):
105 | """
106 | Overridable helper function to format a single header parameter.
107 |
108 | :param name:
109 | The name of the parameter, a string expected to be ASCII only.
110 | :param value:
111 | The value of the parameter, provided as a unicode string.
112 | """
113 | return format_header_param(name, value)
114 |
115 | def _render_parts(self, header_parts):
116 | """
117 | Helper function to format and quote a single header.
118 |
119 | Useful for single headers that are composed of multiple items. E.g.,
120 | 'Content-Disposition' fields.
121 |
122 | :param header_parts:
123 | A sequence of (k, v) typles or a :class:`dict` of (k, v) to format
124 | as `k1="v1"; k2="v2"; ...`.
125 | """
126 | parts = []
127 | iterable = header_parts
128 | if isinstance(header_parts, dict):
129 | iterable = header_parts.items()
130 |
131 | for name, value in iterable:
132 | if value:
133 | parts.append(self._render_part(name, value))
134 |
135 | return '; '.join(parts)
136 |
137 | def render_headers(self):
138 | """
139 | Renders the headers for this request field.
140 | """
141 | lines = []
142 |
143 | sort_keys = ['Content-Disposition', 'Content-Type', 'Content-Location']
144 | for sort_key in sort_keys:
145 | if self.headers.get(sort_key, False):
146 | lines.append('%s: %s' % (sort_key, self.headers[sort_key]))
147 |
148 | for header_name, header_value in self.headers.items():
149 | if header_name not in sort_keys:
150 | if header_value:
151 | lines.append('%s: %s' % (header_name, header_value))
152 |
153 | lines.append('\r\n')
154 | return '\r\n'.join(lines)
155 |
156 | def make_multipart(self, content_disposition=None, content_type=None,
157 | content_location=None):
158 | """
159 | Makes this request field into a multipart request field.
160 |
161 | This method overrides "Content-Disposition", "Content-Type" and
162 | "Content-Location" headers to the request parameter.
163 |
164 | :param content_type:
165 | The 'Content-Type' of the request body.
166 | :param content_location:
167 | The 'Content-Location' of the request body.
168 |
169 | """
170 | self.headers['Content-Disposition'] = content_disposition or 'form-data'
171 | self.headers['Content-Disposition'] += '; '.join([
172 | '', self._render_parts(
173 | (('name', self._name), ('filename', self._filename))
174 | )
175 | ])
176 | self.headers['Content-Type'] = content_type
177 | self.headers['Content-Location'] = content_location
178 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/filepost.py:
--------------------------------------------------------------------------------
1 | import codecs
2 |
3 | from uuid import uuid4
4 | from io import BytesIO
5 |
6 | from .packages import six
7 | from .packages.six import b
8 | from .fields import RequestField
9 |
10 | writer = codecs.lookup('utf-8')[3]
11 |
12 |
13 | def choose_boundary():
14 | """
15 | Our embarassingly-simple replacement for mimetools.choose_boundary.
16 | """
17 | return uuid4().hex
18 |
19 |
20 | def iter_field_objects(fields):
21 | """
22 | Iterate over fields.
23 |
24 | Supports list of (k, v) tuples and dicts, and lists of
25 | :class:`~urllib3.fields.RequestField`.
26 |
27 | """
28 | if isinstance(fields, dict):
29 | i = six.iteritems(fields)
30 | else:
31 | i = iter(fields)
32 |
33 | for field in i:
34 | if isinstance(field, RequestField):
35 | yield field
36 | else:
37 | yield RequestField.from_tuples(*field)
38 |
39 |
40 | def iter_fields(fields):
41 | """
42 | .. deprecated:: 1.6
43 |
44 | Iterate over fields.
45 |
46 | The addition of :class:`~urllib3.fields.RequestField` makes this function
47 | obsolete. Instead, use :func:`iter_field_objects`, which returns
48 | :class:`~urllib3.fields.RequestField` objects.
49 |
50 | Supports list of (k, v) tuples and dicts.
51 | """
52 | if isinstance(fields, dict):
53 | return ((k, v) for k, v in six.iteritems(fields))
54 |
55 | return ((k, v) for k, v in fields)
56 |
57 |
58 | def encode_multipart_formdata(fields, boundary=None):
59 | """
60 | Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
61 |
62 | :param fields:
63 | Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
64 |
65 | :param boundary:
66 | If not specified, then a random boundary will be generated using
67 | :func:`mimetools.choose_boundary`.
68 | """
69 | body = BytesIO()
70 | if boundary is None:
71 | boundary = choose_boundary()
72 |
73 | for field in iter_field_objects(fields):
74 | body.write(b('--%s\r\n' % (boundary)))
75 |
76 | writer(body).write(field.render_headers())
77 | data = field.data
78 |
79 | if isinstance(data, int):
80 | data = str(data) # Backwards compatibility
81 |
82 | if isinstance(data, six.text_type):
83 | writer(body).write(data)
84 | else:
85 | body.write(data)
86 |
87 | body.write(b'\r\n')
88 |
89 | body.write(b('--%s--\r\n' % (boundary)))
90 |
91 | content_type = str('multipart/form-data; boundary=%s' % boundary)
92 |
93 | return body.getvalue(), content_type
94 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/packages/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from . import ssl_match_hostname
4 |
5 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/packages/ordered_dict.py:
--------------------------------------------------------------------------------
1 | # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
2 | # Passes Python2.7's test suite and incorporates all the latest updates.
3 | # Copyright 2009 Raymond Hettinger, released under the MIT License.
4 | # http://code.activestate.com/recipes/576693/
5 | try:
6 | from thread import get_ident as _get_ident
7 | except ImportError:
8 | from dummy_thread import get_ident as _get_ident
9 |
10 | try:
11 | from _abcoll import KeysView, ValuesView, ItemsView
12 | except ImportError:
13 | pass
14 |
15 |
16 | class OrderedDict(dict):
17 | 'Dictionary that remembers insertion order'
18 | # An inherited dict maps keys to values.
19 | # The inherited dict provides __getitem__, __len__, __contains__, and get.
20 | # The remaining methods are order-aware.
21 | # Big-O running times for all methods are the same as for regular dictionaries.
22 |
23 | # The internal self.__map dictionary maps keys to links in a doubly linked list.
24 | # The circular doubly linked list starts and ends with a sentinel element.
25 | # The sentinel element never gets deleted (this simplifies the algorithm).
26 | # Each link is stored as a list of length three: [PREV, NEXT, KEY].
27 |
28 | def __init__(self, *args, **kwds):
29 | '''Initialize an ordered dictionary. Signature is the same as for
30 | regular dictionaries, but keyword arguments are not recommended
31 | because their insertion order is arbitrary.
32 |
33 | '''
34 | if len(args) > 1:
35 | raise TypeError('expected at most 1 arguments, got %d' % len(args))
36 | try:
37 | self.__root
38 | except AttributeError:
39 | self.__root = root = [] # sentinel node
40 | root[:] = [root, root, None]
41 | self.__map = {}
42 | self.__update(*args, **kwds)
43 |
44 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
45 | 'od.__setitem__(i, y) <==> od[i]=y'
46 | # Setting a new item creates a new link which goes at the end of the linked
47 | # list, and the inherited dictionary is updated with the new key/value pair.
48 | if key not in self:
49 | root = self.__root
50 | last = root[0]
51 | last[1] = root[0] = self.__map[key] = [last, root, key]
52 | dict_setitem(self, key, value)
53 |
54 | def __delitem__(self, key, dict_delitem=dict.__delitem__):
55 | 'od.__delitem__(y) <==> del od[y]'
56 | # Deleting an existing item uses self.__map to find the link which is
57 | # then removed by updating the links in the predecessor and successor nodes.
58 | dict_delitem(self, key)
59 | link_prev, link_next, key = self.__map.pop(key)
60 | link_prev[1] = link_next
61 | link_next[0] = link_prev
62 |
63 | def __iter__(self):
64 | 'od.__iter__() <==> iter(od)'
65 | root = self.__root
66 | curr = root[1]
67 | while curr is not root:
68 | yield curr[2]
69 | curr = curr[1]
70 |
71 | def __reversed__(self):
72 | 'od.__reversed__() <==> reversed(od)'
73 | root = self.__root
74 | curr = root[0]
75 | while curr is not root:
76 | yield curr[2]
77 | curr = curr[0]
78 |
79 | def clear(self):
80 | 'od.clear() -> None. Remove all items from od.'
81 | try:
82 | for node in self.__map.itervalues():
83 | del node[:]
84 | root = self.__root
85 | root[:] = [root, root, None]
86 | self.__map.clear()
87 | except AttributeError:
88 | pass
89 | dict.clear(self)
90 |
91 | def popitem(self, last=True):
92 | '''od.popitem() -> (k, v), return and remove a (key, value) pair.
93 | Pairs are returned in LIFO order if last is true or FIFO order if false.
94 |
95 | '''
96 | if not self:
97 | raise KeyError('dictionary is empty')
98 | root = self.__root
99 | if last:
100 | link = root[0]
101 | link_prev = link[0]
102 | link_prev[1] = root
103 | root[0] = link_prev
104 | else:
105 | link = root[1]
106 | link_next = link[1]
107 | root[1] = link_next
108 | link_next[0] = root
109 | key = link[2]
110 | del self.__map[key]
111 | value = dict.pop(self, key)
112 | return key, value
113 |
114 | # -- the following methods do not depend on the internal structure --
115 |
116 | def keys(self):
117 | 'od.keys() -> list of keys in od'
118 | return list(self)
119 |
120 | def values(self):
121 | 'od.values() -> list of values in od'
122 | return [self[key] for key in self]
123 |
124 | def items(self):
125 | 'od.items() -> list of (key, value) pairs in od'
126 | return [(key, self[key]) for key in self]
127 |
128 | def iterkeys(self):
129 | 'od.iterkeys() -> an iterator over the keys in od'
130 | return iter(self)
131 |
132 | def itervalues(self):
133 | 'od.itervalues -> an iterator over the values in od'
134 | for k in self:
135 | yield self[k]
136 |
137 | def iteritems(self):
138 | 'od.iteritems -> an iterator over the (key, value) items in od'
139 | for k in self:
140 | yield (k, self[k])
141 |
142 | def update(*args, **kwds):
143 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F.
144 |
145 | If E is a dict instance, does: for k in E: od[k] = E[k]
146 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
147 | Or if E is an iterable of items, does: for k, v in E: od[k] = v
148 | In either case, this is followed by: for k, v in F.items(): od[k] = v
149 |
150 | '''
151 | if len(args) > 2:
152 | raise TypeError('update() takes at most 2 positional '
153 | 'arguments (%d given)' % (len(args),))
154 | elif not args:
155 | raise TypeError('update() takes at least 1 argument (0 given)')
156 | self = args[0]
157 | # Make progressively weaker assumptions about "other"
158 | other = ()
159 | if len(args) == 2:
160 | other = args[1]
161 | if isinstance(other, dict):
162 | for key in other:
163 | self[key] = other[key]
164 | elif hasattr(other, 'keys'):
165 | for key in other.keys():
166 | self[key] = other[key]
167 | else:
168 | for key, value in other:
169 | self[key] = value
170 | for key, value in kwds.items():
171 | self[key] = value
172 |
173 | __update = update # let subclasses override update without breaking __init__
174 |
175 | __marker = object()
176 |
177 | def pop(self, key, default=__marker):
178 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
179 | If key is not found, d is returned if given, otherwise KeyError is raised.
180 |
181 | '''
182 | if key in self:
183 | result = self[key]
184 | del self[key]
185 | return result
186 | if default is self.__marker:
187 | raise KeyError(key)
188 | return default
189 |
190 | def setdefault(self, key, default=None):
191 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
192 | if key in self:
193 | return self[key]
194 | self[key] = default
195 | return default
196 |
197 | def __repr__(self, _repr_running={}):
198 | 'od.__repr__() <==> repr(od)'
199 | call_key = id(self), _get_ident()
200 | if call_key in _repr_running:
201 | return '...'
202 | _repr_running[call_key] = 1
203 | try:
204 | if not self:
205 | return '%s()' % (self.__class__.__name__,)
206 | return '%s(%r)' % (self.__class__.__name__, self.items())
207 | finally:
208 | del _repr_running[call_key]
209 |
210 | def __reduce__(self):
211 | 'Return state information for pickling'
212 | items = [[k, self[k]] for k in self]
213 | inst_dict = vars(self).copy()
214 | for k in vars(OrderedDict()):
215 | inst_dict.pop(k, None)
216 | if inst_dict:
217 | return (self.__class__, (items,), inst_dict)
218 | return self.__class__, (items,)
219 |
220 | def copy(self):
221 | 'od.copy() -> a shallow copy of od'
222 | return self.__class__(self)
223 |
224 | @classmethod
225 | def fromkeys(cls, iterable, value=None):
226 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
227 | and values equal to v (which defaults to None).
228 |
229 | '''
230 | d = cls()
231 | for key in iterable:
232 | d[key] = value
233 | return d
234 |
235 | def __eq__(self, other):
236 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
237 | while comparison to a regular mapping is order-insensitive.
238 |
239 | '''
240 | if isinstance(other, OrderedDict):
241 | return len(self)==len(other) and self.items() == other.items()
242 | return dict.__eq__(self, other)
243 |
244 | def __ne__(self, other):
245 | return not self == other
246 |
247 | # -- the following methods are only used in Python 2.7 --
248 |
249 | def viewkeys(self):
250 | "od.viewkeys() -> a set-like object providing a view on od's keys"
251 | return KeysView(self)
252 |
253 | def viewvalues(self):
254 | "od.viewvalues() -> an object providing a view on od's values"
255 | return ValuesView(self)
256 |
257 | def viewitems(self):
258 | "od.viewitems() -> a set-like object providing a view on od's items"
259 | return ItemsView(self)
260 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/packages/ssl_match_hostname/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | # Python 3.2+
3 | from ssl import CertificateError, match_hostname
4 | except ImportError:
5 | try:
6 | # Backport of the function from a pypi module
7 | from backports.ssl_match_hostname import CertificateError, match_hostname
8 | except ImportError:
9 | # Our vendored copy
10 | from ._implementation import CertificateError, match_hostname
11 |
12 | # Not needed, but documenting what we provide.
13 | __all__ = ('CertificateError', 'match_hostname')
14 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.py:
--------------------------------------------------------------------------------
1 | """The match_hostname() function from Python 3.3.3, essential when using SSL."""
2 |
3 | # Note: This file is under the PSF license as the code comes from the python
4 | # stdlib. http://docs.python.org/3/license.html
5 |
6 | import re
7 |
8 | __version__ = '3.4.0.2'
9 |
10 | class CertificateError(ValueError):
11 | pass
12 |
13 |
14 | def _dnsname_match(dn, hostname, max_wildcards=1):
15 | """Matching according to RFC 6125, section 6.4.3
16 |
17 | http://tools.ietf.org/html/rfc6125#section-6.4.3
18 | """
19 | pats = []
20 | if not dn:
21 | return False
22 |
23 | # Ported from python3-syntax:
24 | # leftmost, *remainder = dn.split(r'.')
25 | parts = dn.split(r'.')
26 | leftmost = parts[0]
27 | remainder = parts[1:]
28 |
29 | wildcards = leftmost.count('*')
30 | if wildcards > max_wildcards:
31 | # Issue #17980: avoid denials of service by refusing more
32 | # than one wildcard per fragment. A survey of established
33 | # policy among SSL implementations showed it to be a
34 | # reasonable choice.
35 | raise CertificateError(
36 | "too many wildcards in certificate DNS name: " + repr(dn))
37 |
38 | # speed up common case w/o wildcards
39 | if not wildcards:
40 | return dn.lower() == hostname.lower()
41 |
42 | # RFC 6125, section 6.4.3, subitem 1.
43 | # The client SHOULD NOT attempt to match a presented identifier in which
44 | # the wildcard character comprises a label other than the left-most label.
45 | if leftmost == '*':
46 | # When '*' is a fragment by itself, it matches a non-empty dotless
47 | # fragment.
48 | pats.append('[^.]+')
49 | elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
50 | # RFC 6125, section 6.4.3, subitem 3.
51 | # The client SHOULD NOT attempt to match a presented identifier
52 | # where the wildcard character is embedded within an A-label or
53 | # U-label of an internationalized domain name.
54 | pats.append(re.escape(leftmost))
55 | else:
56 | # Otherwise, '*' matches any dotless string, e.g. www*
57 | pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
58 |
59 | # add the remaining fragments, ignore any wildcards
60 | for frag in remainder:
61 | pats.append(re.escape(frag))
62 |
63 | pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
64 | return pat.match(hostname)
65 |
66 |
67 | def match_hostname(cert, hostname):
68 | """Verify that *cert* (in decoded format as returned by
69 | SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
70 | rules are followed, but IP addresses are not accepted for *hostname*.
71 |
72 | CertificateError is raised on failure. On success, the function
73 | returns nothing.
74 | """
75 | if not cert:
76 | raise ValueError("empty or no certificate")
77 | dnsnames = []
78 | san = cert.get('subjectAltName', ())
79 | for key, value in san:
80 | if key == 'DNS':
81 | if _dnsname_match(value, hostname):
82 | return
83 | dnsnames.append(value)
84 | if not dnsnames:
85 | # The subject is only checked when there is no dNSName entry
86 | # in subjectAltName
87 | for sub in cert.get('subject', ()):
88 | for key, value in sub:
89 | # XXX according to RFC 2818, the most specific Common Name
90 | # must be used.
91 | if key == 'commonName':
92 | if _dnsname_match(value, hostname):
93 | return
94 | dnsnames.append(value)
95 | if len(dnsnames) > 1:
96 | raise CertificateError("hostname %r "
97 | "doesn't match either of %s"
98 | % (hostname, ', '.join(map(repr, dnsnames))))
99 | elif len(dnsnames) == 1:
100 | raise CertificateError("hostname %r "
101 | "doesn't match %r"
102 | % (hostname, dnsnames[0]))
103 | else:
104 | raise CertificateError("no appropriate commonName or "
105 | "subjectAltName fields were found")
106 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/request.py:
--------------------------------------------------------------------------------
1 | try:
2 | from urllib.parse import urlencode
3 | except ImportError:
4 | from urllib import urlencode
5 |
6 | from .filepost import encode_multipart_formdata
7 |
8 |
9 | __all__ = ['RequestMethods']
10 |
11 |
12 | class RequestMethods(object):
13 | """
14 | Convenience mixin for classes who implement a :meth:`urlopen` method, such
15 | as :class:`~urllib3.connectionpool.HTTPConnectionPool` and
16 | :class:`~urllib3.poolmanager.PoolManager`.
17 |
18 | Provides behavior for making common types of HTTP request methods and
19 | decides which type of request field encoding to use.
20 |
21 | Specifically,
22 |
23 | :meth:`.request_encode_url` is for sending requests whose fields are
24 | encoded in the URL (such as GET, HEAD, DELETE).
25 |
26 | :meth:`.request_encode_body` is for sending requests whose fields are
27 | encoded in the *body* of the request using multipart or www-form-urlencoded
28 | (such as for POST, PUT, PATCH).
29 |
30 | :meth:`.request` is for making any kind of request, it will look up the
31 | appropriate encoding format and use one of the above two methods to make
32 | the request.
33 |
34 | Initializer parameters:
35 |
36 | :param headers:
37 | Headers to include with all requests, unless other headers are given
38 | explicitly.
39 | """
40 |
41 | _encode_url_methods = set(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
42 |
43 | def __init__(self, headers=None):
44 | self.headers = headers or {}
45 |
46 | def urlopen(self, method, url, body=None, headers=None,
47 | encode_multipart=True, multipart_boundary=None,
48 | **kw): # Abstract
49 | raise NotImplemented("Classes extending RequestMethods must implement "
50 | "their own ``urlopen`` method.")
51 |
52 | def request(self, method, url, fields=None, headers=None, **urlopen_kw):
53 | """
54 | Make a request using :meth:`urlopen` with the appropriate encoding of
55 | ``fields`` based on the ``method`` used.
56 |
57 | This is a convenience method that requires the least amount of manual
58 | effort. It can be used in most situations, while still having the
59 | option to drop down to more specific methods when necessary, such as
60 | :meth:`request_encode_url`, :meth:`request_encode_body`,
61 | or even the lowest level :meth:`urlopen`.
62 | """
63 | method = method.upper()
64 |
65 | if method in self._encode_url_methods:
66 | return self.request_encode_url(method, url, fields=fields,
67 | headers=headers,
68 | **urlopen_kw)
69 | else:
70 | return self.request_encode_body(method, url, fields=fields,
71 | headers=headers,
72 | **urlopen_kw)
73 |
74 | def request_encode_url(self, method, url, fields=None, **urlopen_kw):
75 | """
76 | Make a request using :meth:`urlopen` with the ``fields`` encoded in
77 | the url. This is useful for request methods like GET, HEAD, DELETE, etc.
78 | """
79 | if fields:
80 | url += '?' + urlencode(fields)
81 | return self.urlopen(method, url, **urlopen_kw)
82 |
83 | def request_encode_body(self, method, url, fields=None, headers=None,
84 | encode_multipart=True, multipart_boundary=None,
85 | **urlopen_kw):
86 | """
87 | Make a request using :meth:`urlopen` with the ``fields`` encoded in
88 | the body. This is useful for request methods like POST, PUT, PATCH, etc.
89 |
90 | When ``encode_multipart=True`` (default), then
91 | :meth:`urllib3.filepost.encode_multipart_formdata` is used to encode
92 | the payload with the appropriate content type. Otherwise
93 | :meth:`urllib.urlencode` is used with the
94 | 'application/x-www-form-urlencoded' content type.
95 |
96 | Multipart encoding must be used when posting files, and it's reasonably
97 | safe to use it in other times too. However, it may break request
98 | signing, such as with OAuth.
99 |
100 | Supports an optional ``fields`` parameter of key/value strings AND
101 | key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
102 | the MIME type is optional. For example::
103 |
104 | fields = {
105 | 'foo': 'bar',
106 | 'fakefile': ('foofile.txt', 'contents of foofile'),
107 | 'realfile': ('barfile.txt', open('realfile').read()),
108 | 'typedfile': ('bazfile.bin', open('bazfile').read(),
109 | 'image/jpeg'),
110 | 'nonamefile': 'contents of nonamefile field',
111 | }
112 |
113 | When uploading a file, providing a filename (the first parameter of the
114 | tuple) is optional but recommended to best mimick behavior of browsers.
115 |
116 | Note that if ``headers`` are supplied, the 'Content-Type' header will
117 | be overwritten because it depends on the dynamic random boundary string
118 | which is used to compose the body of the request. The random boundary
119 | string can be explicitly set with the ``multipart_boundary`` parameter.
120 | """
121 | if headers is None:
122 | headers = self.headers
123 |
124 | extra_kw = {'headers': {}}
125 |
126 | if fields:
127 | if 'body' in urlopen_kw:
128 | raise TypeError('request got values for both \'fields\' and \'body\', can only specify one.')
129 |
130 | if encode_multipart:
131 | body, content_type = encode_multipart_formdata(fields, boundary=multipart_boundary)
132 | else:
133 | body, content_type = urlencode(fields), 'application/x-www-form-urlencoded'
134 |
135 | extra_kw['body'] = body
136 | extra_kw['headers'] = {'Content-Type': content_type}
137 |
138 | extra_kw['headers'].update(headers)
139 | extra_kw.update(urlopen_kw)
140 |
141 | return self.urlopen(method, url, **extra_kw)
142 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/__init__.py:
--------------------------------------------------------------------------------
1 | # For backwards compatibility, provide imports that used to be here.
2 | from .connection import is_connection_dropped
3 | from .request import make_headers
4 | from .response import is_fp_closed
5 | from .ssl_ import (
6 | SSLContext,
7 | HAS_SNI,
8 | assert_fingerprint,
9 | resolve_cert_reqs,
10 | resolve_ssl_version,
11 | ssl_wrap_socket,
12 | )
13 | from .timeout import (
14 | current_time,
15 | Timeout,
16 | )
17 |
18 | from .retry import Retry
19 | from .url import (
20 | get_host,
21 | parse_url,
22 | split_first,
23 | Url,
24 | )
25 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/connection.py:
--------------------------------------------------------------------------------
1 | import socket
2 | try:
3 | from select import poll, POLLIN
4 | except ImportError: # `poll` doesn't exist on OSX and other platforms
5 | poll = False
6 | try:
7 | from select import select
8 | except ImportError: # `select` doesn't exist on AppEngine.
9 | select = False
10 |
11 |
12 | def is_connection_dropped(conn): # Platform-specific
13 | """
14 | Returns True if the connection is dropped and should be closed.
15 |
16 | :param conn:
17 | :class:`httplib.HTTPConnection` object.
18 |
19 | Note: For platforms like AppEngine, this will always return ``False`` to
20 | let the platform handle connection recycling transparently for us.
21 | """
22 | sock = getattr(conn, 'sock', False)
23 | if sock is False: # Platform-specific: AppEngine
24 | return False
25 | if sock is None: # Connection already closed (such as by httplib).
26 | return True
27 |
28 | if not poll:
29 | if not select: # Platform-specific: AppEngine
30 | return False
31 |
32 | try:
33 | return select([sock], [], [], 0.0)[0]
34 | except socket.error:
35 | return True
36 |
37 | # This version is better on platforms that support it.
38 | p = poll()
39 | p.register(sock, POLLIN)
40 | for (fno, ev) in p.poll(0.0):
41 | if fno == sock.fileno():
42 | # Either data is buffered (bad), or the connection is dropped.
43 | return True
44 |
45 |
46 | # This function is copied from socket.py in the Python 2.7 standard
47 | # library test suite. Added to its signature is only `socket_options`.
48 | def create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
49 | source_address=None, socket_options=None):
50 | """Connect to *address* and return the socket object.
51 |
52 | Convenience function. Connect to *address* (a 2-tuple ``(host,
53 | port)``) and return the socket object. Passing the optional
54 | *timeout* parameter will set the timeout on the socket instance
55 | before attempting to connect. If no *timeout* is supplied, the
56 | global default timeout setting returned by :func:`getdefaulttimeout`
57 | is used. If *source_address* is set it must be a tuple of (host, port)
58 | for the socket to bind as a source address before making the connection.
59 | An host of '' or port 0 tells the OS to use the default.
60 | """
61 |
62 | host, port = address
63 | err = None
64 | for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
65 | af, socktype, proto, canonname, sa = res
66 | sock = None
67 | try:
68 | sock = socket.socket(af, socktype, proto)
69 |
70 | # If provided, set socket level options before connecting.
71 | # This is the only addition urllib3 makes to this function.
72 | _set_socket_options(sock, socket_options)
73 |
74 | if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
75 | sock.settimeout(timeout)
76 | if source_address:
77 | sock.bind(source_address)
78 | sock.connect(sa)
79 | return sock
80 |
81 | except socket.error as _:
82 | err = _
83 | if sock is not None:
84 | sock.close()
85 |
86 | if err is not None:
87 | raise err
88 | else:
89 | raise socket.error("getaddrinfo returns an empty list")
90 |
91 |
92 | def _set_socket_options(sock, options):
93 | if options is None:
94 | return
95 |
96 | for opt in options:
97 | sock.setsockopt(*opt)
98 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/request.py:
--------------------------------------------------------------------------------
1 | from base64 import b64encode
2 |
3 | from ..packages.six import b
4 |
5 | ACCEPT_ENCODING = 'gzip,deflate'
6 |
7 |
8 | def make_headers(keep_alive=None, accept_encoding=None, user_agent=None,
9 | basic_auth=None, proxy_basic_auth=None, disable_cache=None):
10 | """
11 | Shortcuts for generating request headers.
12 |
13 | :param keep_alive:
14 | If ``True``, adds 'connection: keep-alive' header.
15 |
16 | :param accept_encoding:
17 | Can be a boolean, list, or string.
18 | ``True`` translates to 'gzip,deflate'.
19 | List will get joined by comma.
20 | String will be used as provided.
21 |
22 | :param user_agent:
23 | String representing the user-agent you want, such as
24 | "python-urllib3/0.6"
25 |
26 | :param basic_auth:
27 | Colon-separated username:password string for 'authorization: basic ...'
28 | auth header.
29 |
30 | :param proxy_basic_auth:
31 | Colon-separated username:password string for 'proxy-authorization: basic ...'
32 | auth header.
33 |
34 | :param disable_cache:
35 | If ``True``, adds 'cache-control: no-cache' header.
36 |
37 | Example::
38 |
39 | >>> make_headers(keep_alive=True, user_agent="Batman/1.0")
40 | {'connection': 'keep-alive', 'user-agent': 'Batman/1.0'}
41 | >>> make_headers(accept_encoding=True)
42 | {'accept-encoding': 'gzip,deflate'}
43 | """
44 | headers = {}
45 | if accept_encoding:
46 | if isinstance(accept_encoding, str):
47 | pass
48 | elif isinstance(accept_encoding, list):
49 | accept_encoding = ','.join(accept_encoding)
50 | else:
51 | accept_encoding = ACCEPT_ENCODING
52 | headers['accept-encoding'] = accept_encoding
53 |
54 | if user_agent:
55 | headers['user-agent'] = user_agent
56 |
57 | if keep_alive:
58 | headers['connection'] = 'keep-alive'
59 |
60 | if basic_auth:
61 | headers['authorization'] = 'Basic ' + \
62 | b64encode(b(basic_auth)).decode('utf-8')
63 |
64 | if proxy_basic_auth:
65 | headers['proxy-authorization'] = 'Basic ' + \
66 | b64encode(b(proxy_basic_auth)).decode('utf-8')
67 |
68 | if disable_cache:
69 | headers['cache-control'] = 'no-cache'
70 |
71 | return headers
72 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/response.py:
--------------------------------------------------------------------------------
1 | def is_fp_closed(obj):
2 | """
3 | Checks whether a given file-like object is closed.
4 |
5 | :param obj:
6 | The file-like object to check.
7 | """
8 |
9 | try:
10 | # Check via the official file-like-object way.
11 | return obj.closed
12 | except AttributeError:
13 | pass
14 |
15 | try:
16 | # Check if the object is a container for another file-like object that
17 | # gets released on exhaustion (e.g. HTTPResponse).
18 | return obj.fp is None
19 | except AttributeError:
20 | pass
21 |
22 | raise ValueError("Unable to determine whether fp is closed.")
23 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/ssl_.py:
--------------------------------------------------------------------------------
1 | from binascii import hexlify, unhexlify
2 | from hashlib import md5, sha1
3 |
4 | from ..exceptions import SSLError
5 |
6 |
7 | SSLContext = None
8 | HAS_SNI = False
9 | create_default_context = None
10 |
11 | import errno
12 | import ssl
13 |
14 | try: # Test for SSL features
15 | from ssl import wrap_socket, CERT_NONE, PROTOCOL_SSLv23
16 | from ssl import HAS_SNI # Has SNI?
17 | except ImportError:
18 | pass
19 |
20 |
21 | try:
22 | from ssl import OP_NO_SSLv2, OP_NO_SSLv3, OP_NO_COMPRESSION
23 | except ImportError:
24 | OP_NO_SSLv2, OP_NO_SSLv3 = 0x1000000, 0x2000000
25 | OP_NO_COMPRESSION = 0x20000
26 |
27 | try:
28 | from ssl import _DEFAULT_CIPHERS
29 | except ImportError:
30 | _DEFAULT_CIPHERS = (
31 | 'ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+HIGH:'
32 | 'DH+HIGH:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+HIGH:RSA+3DES:ECDH+RC4:'
33 | 'DH+RC4:RSA+RC4:!aNULL:!eNULL:!MD5'
34 | )
35 |
36 | try:
37 | from ssl import SSLContext # Modern SSL?
38 | except ImportError:
39 | import sys
40 |
41 | class SSLContext(object): # Platform-specific: Python 2 & 3.1
42 | supports_set_ciphers = sys.version_info >= (2, 7)
43 |
44 | def __init__(self, protocol_version):
45 | self.protocol = protocol_version
46 | # Use default values from a real SSLContext
47 | self.check_hostname = False
48 | self.verify_mode = ssl.CERT_NONE
49 | self.ca_certs = None
50 | self.options = 0
51 | self.certfile = None
52 | self.keyfile = None
53 | self.ciphers = None
54 |
55 | def load_cert_chain(self, certfile, keyfile):
56 | self.certfile = certfile
57 | self.keyfile = keyfile
58 |
59 | def load_verify_locations(self, location):
60 | self.ca_certs = location
61 |
62 | def set_ciphers(self, cipher_suite):
63 | if not self.supports_set_ciphers:
64 | raise TypeError(
65 | 'Your version of Python does not support setting '
66 | 'a custom cipher suite. Please upgrade to Python '
67 | '2.7, 3.2, or later if you need this functionality.'
68 | )
69 | self.ciphers = cipher_suite
70 |
71 | def wrap_socket(self, socket, server_hostname=None):
72 | kwargs = {
73 | 'keyfile': self.keyfile,
74 | 'certfile': self.certfile,
75 | 'ca_certs': self.ca_certs,
76 | 'cert_reqs': self.verify_mode,
77 | 'ssl_version': self.protocol,
78 | }
79 | if self.supports_set_ciphers: # Platform-specific: Python 2.7+
80 | return wrap_socket(socket, ciphers=self.ciphers, **kwargs)
81 | else: # Platform-specific: Python 2.6
82 | return wrap_socket(socket, **kwargs)
83 |
84 |
85 | def assert_fingerprint(cert, fingerprint):
86 | """
87 | Checks if given fingerprint matches the supplied certificate.
88 |
89 | :param cert:
90 | Certificate as bytes object.
91 | :param fingerprint:
92 | Fingerprint as string of hexdigits, can be interspersed by colons.
93 | """
94 |
95 | # Maps the length of a digest to a possible hash function producing
96 | # this digest.
97 | hashfunc_map = {
98 | 16: md5,
99 | 20: sha1
100 | }
101 |
102 | fingerprint = fingerprint.replace(':', '').lower()
103 | digest_length, odd = divmod(len(fingerprint), 2)
104 |
105 | if odd or digest_length not in hashfunc_map:
106 | raise SSLError('Fingerprint is of invalid length.')
107 |
108 | # We need encode() here for py32; works on py2 and p33.
109 | fingerprint_bytes = unhexlify(fingerprint.encode())
110 |
111 | hashfunc = hashfunc_map[digest_length]
112 |
113 | cert_digest = hashfunc(cert).digest()
114 |
115 | if not cert_digest == fingerprint_bytes:
116 | raise SSLError('Fingerprints did not match. Expected "{0}", got "{1}".'
117 | .format(hexlify(fingerprint_bytes),
118 | hexlify(cert_digest)))
119 |
120 |
121 | def resolve_cert_reqs(candidate):
122 | """
123 | Resolves the argument to a numeric constant, which can be passed to
124 | the wrap_socket function/method from the ssl module.
125 | Defaults to :data:`ssl.CERT_NONE`.
126 | If given a string it is assumed to be the name of the constant in the
127 | :mod:`ssl` module or its abbrevation.
128 | (So you can specify `REQUIRED` instead of `CERT_REQUIRED`.
129 | If it's neither `None` nor a string we assume it is already the numeric
130 | constant which can directly be passed to wrap_socket.
131 | """
132 | if candidate is None:
133 | return CERT_NONE
134 |
135 | if isinstance(candidate, str):
136 | res = getattr(ssl, candidate, None)
137 | if res is None:
138 | res = getattr(ssl, 'CERT_' + candidate)
139 | return res
140 |
141 | return candidate
142 |
143 |
144 | def resolve_ssl_version(candidate):
145 | """
146 | like resolve_cert_reqs
147 | """
148 | if candidate is None:
149 | return PROTOCOL_SSLv23
150 |
151 | if isinstance(candidate, str):
152 | res = getattr(ssl, candidate, None)
153 | if res is None:
154 | res = getattr(ssl, 'PROTOCOL_' + candidate)
155 | return res
156 |
157 | return candidate
158 |
159 |
160 | def create_urllib3_context(ssl_version=None, cert_reqs=ssl.CERT_REQUIRED,
161 | options=None, ciphers=None):
162 | """All arguments have the same meaning as ``ssl_wrap_socket``.
163 |
164 | By default, this function does a lot of the same work that
165 | ``ssl.create_default_context`` does on Python 3.4+. It:
166 |
167 | - Disables SSLv2, SSLv3, and compression
168 | - Sets a restricted set of server ciphers
169 |
170 | If you wish to enable SSLv3, you can do::
171 |
172 | from urllib3.util import ssl_
173 | context = ssl_.create_urllib3_context()
174 | context.options &= ~ssl_.OP_NO_SSLv3
175 |
176 | You can do the same to enable compression (substituting ``COMPRESSION``
177 | for ``SSLv3`` in the last line above).
178 |
179 | :param ssl_version:
180 | The desired protocol version to use. This will default to
181 | PROTOCOL_SSLv23 which will negotiate the highest protocol that both
182 | the server and your installation of OpenSSL support.
183 | :param cert_reqs:
184 | Whether to require the certificate verification. This defaults to
185 | ``ssl.CERT_REQUIRED``.
186 | :param options:
187 | Specific OpenSSL options. These default to ``ssl.OP_NO_SSLv2``,
188 | ``ssl.OP_NO_SSLv3``, ``ssl.OP_NO_COMPRESSION``.
189 | :param ciphers:
190 | Which cipher suites to allow the server to select.
191 | :returns:
192 | Constructed SSLContext object with specified options
193 | :rtype: SSLContext
194 | """
195 | context = SSLContext(ssl_version or ssl.PROTOCOL_SSLv23)
196 |
197 | if options is None:
198 | options = 0
199 | # SSLv2 is easily broken and is considered harmful and dangerous
200 | options |= OP_NO_SSLv2
201 | # SSLv3 has several problems and is now dangerous
202 | options |= OP_NO_SSLv3
203 | # Disable compression to prevent CRIME attacks for OpenSSL 1.0+
204 | # (issue #309)
205 | options |= OP_NO_COMPRESSION
206 |
207 | context.options |= options
208 |
209 | if getattr(context, 'supports_set_ciphers', True): # Platform-specific: Python 2.6
210 | context.set_ciphers(ciphers or _DEFAULT_CIPHERS)
211 |
212 | context.verify_mode = cert_reqs
213 | if getattr(context, 'check_hostname', None) is not None: # Platform-specific: Python 3.2
214 | context.check_hostname = (context.verify_mode == ssl.CERT_REQUIRED)
215 | return context
216 |
217 |
218 | def ssl_wrap_socket(sock, keyfile=None, certfile=None, cert_reqs=None,
219 | ca_certs=None, server_hostname=None,
220 | ssl_version=None, ciphers=None, ssl_context=None):
221 | """
222 | All arguments except for server_hostname and ssl_context have the same
223 | meaning as they do when using :func:`ssl.wrap_socket`.
224 |
225 | :param server_hostname:
226 | When SNI is supported, the expected hostname of the certificate
227 | :param ssl_context:
228 | A pre-made :class:`SSLContext` object. If none is provided, one will
229 | be created using :func:`create_urllib3_context`.
230 | :param ciphers:
231 | A string of ciphers we wish the client to support. This is not
232 | supported on Python 2.6 as the ssl module does not support it.
233 | """
234 | context = ssl_context
235 | if context is None:
236 | context = create_urllib3_context(ssl_version, cert_reqs,
237 | ciphers=ciphers)
238 |
239 | if ca_certs:
240 | try:
241 | context.load_verify_locations(ca_certs)
242 | except IOError as e: # Platform-specific: Python 2.6, 2.7, 3.2
243 | raise SSLError(e)
244 | # Py33 raises FileNotFoundError which subclasses OSError
245 | # These are not equivalent unless we check the errno attribute
246 | except OSError as e: # Platform-specific: Python 3.3 and beyond
247 | if e.errno == errno.ENOENT:
248 | raise SSLError(e)
249 | raise
250 | if certfile:
251 | context.load_cert_chain(certfile, keyfile)
252 | if HAS_SNI: # Platform-specific: OpenSSL with enabled SNI
253 | return context.wrap_socket(sock, server_hostname=server_hostname)
254 | return context.wrap_socket(sock)
255 |
--------------------------------------------------------------------------------
/requests/packages/urllib3/util/url.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 |
3 | from ..exceptions import LocationParseError
4 |
5 |
6 | url_attrs = ['scheme', 'auth', 'host', 'port', 'path', 'query', 'fragment']
7 |
8 |
9 | class Url(namedtuple('Url', url_attrs)):
10 | """
11 | Datastructure for representing an HTTP URL. Used as a return value for
12 | :func:`parse_url`.
13 | """
14 | slots = ()
15 |
16 | def __new__(cls, scheme=None, auth=None, host=None, port=None, path=None,
17 | query=None, fragment=None):
18 | return super(Url, cls).__new__(cls, scheme, auth, host, port, path,
19 | query, fragment)
20 |
21 | @property
22 | def hostname(self):
23 | """For backwards-compatibility with urlparse. We're nice like that."""
24 | return self.host
25 |
26 | @property
27 | def request_uri(self):
28 | """Absolute path including the query string."""
29 | uri = self.path or '/'
30 |
31 | if self.query is not None:
32 | uri += '?' + self.query
33 |
34 | return uri
35 |
36 | @property
37 | def netloc(self):
38 | """Network location including host and port"""
39 | if self.port:
40 | return '%s:%d' % (self.host, self.port)
41 | return self.host
42 |
43 | @property
44 | def url(self):
45 | """
46 | Convert self into a url
47 |
48 | This function should more or less round-trip with :func:`.parse_url`. The
49 | returned url may not be exactly the same as the url inputted to
50 | :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
51 | with a blank port will have : removed).
52 |
53 | Example: ::
54 |
55 | >>> U = parse_url('http://google.com/mail/')
56 | >>> U.url
57 | 'http://google.com/mail/'
58 | >>> Url('http', 'username:password', 'host.com', 80,
59 | ... '/path', 'query', 'fragment').url
60 | 'http://username:password@host.com:80/path?query#fragment'
61 | """
62 | scheme, auth, host, port, path, query, fragment = self
63 | url = ''
64 |
65 | # We use "is not None" we want things to happen with empty strings (or 0 port)
66 | if scheme is not None:
67 | url += scheme + '://'
68 | if auth is not None:
69 | url += auth + '@'
70 | if host is not None:
71 | url += host
72 | if port is not None:
73 | url += ':' + str(port)
74 | if path is not None:
75 | url += path
76 | if query is not None:
77 | url += '?' + query
78 | if fragment is not None:
79 | url += '#' + fragment
80 |
81 | return url
82 |
83 | def __str__(self):
84 | return self.url
85 |
86 | def split_first(s, delims):
87 | """
88 | Given a string and an iterable of delimiters, split on the first found
89 | delimiter. Return two split parts and the matched delimiter.
90 |
91 | If not found, then the first part is the full input string.
92 |
93 | Example::
94 |
95 | >>> split_first('foo/bar?baz', '?/=')
96 | ('foo', 'bar?baz', '/')
97 | >>> split_first('foo/bar?baz', '123')
98 | ('foo/bar?baz', '', None)
99 |
100 | Scales linearly with number of delims. Not ideal for large number of delims.
101 | """
102 | min_idx = None
103 | min_delim = None
104 | for d in delims:
105 | idx = s.find(d)
106 | if idx < 0:
107 | continue
108 |
109 | if min_idx is None or idx < min_idx:
110 | min_idx = idx
111 | min_delim = d
112 |
113 | if min_idx is None or min_idx < 0:
114 | return s, '', None
115 |
116 | return s[:min_idx], s[min_idx+1:], min_delim
117 |
118 |
119 | def parse_url(url):
120 | """
121 | Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
122 | performed to parse incomplete urls. Fields not provided will be None.
123 |
124 | Partly backwards-compatible with :mod:`urlparse`.
125 |
126 | Example::
127 |
128 | >>> parse_url('http://google.com/mail/')
129 | Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
130 | >>> parse_url('google.com:80')
131 | Url(scheme=None, host='google.com', port=80, path=None, ...)
132 | >>> parse_url('/foo?bar')
133 | Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
134 | """
135 |
136 | # While this code has overlap with stdlib's urlparse, it is much
137 | # simplified for our needs and less annoying.
138 | # Additionally, this implementations does silly things to be optimal
139 | # on CPython.
140 |
141 | if not url:
142 | # Empty
143 | return Url()
144 |
145 | scheme = None
146 | auth = None
147 | host = None
148 | port = None
149 | path = None
150 | fragment = None
151 | query = None
152 |
153 | # Scheme
154 | if '://' in url:
155 | scheme, url = url.split('://', 1)
156 |
157 | # Find the earliest Authority Terminator
158 | # (http://tools.ietf.org/html/rfc3986#section-3.2)
159 | url, path_, delim = split_first(url, ['/', '?', '#'])
160 |
161 | if delim:
162 | # Reassemble the path
163 | path = delim + path_
164 |
165 | # Auth
166 | if '@' in url:
167 | # Last '@' denotes end of auth part
168 | auth, url = url.rsplit('@', 1)
169 |
170 | # IPv6
171 | if url and url[0] == '[':
172 | host, url = url.split(']', 1)
173 | host += ']'
174 |
175 | # Port
176 | if ':' in url:
177 | _host, port = url.split(':', 1)
178 |
179 | if not host:
180 | host = _host
181 |
182 | if port:
183 | # If given, ports must be integers.
184 | if not port.isdigit():
185 | raise LocationParseError(url)
186 | port = int(port)
187 | else:
188 | # Blank ports are cool, too. (rfc3986#section-3.2.3)
189 | port = None
190 |
191 | elif not host and url:
192 | host = url
193 |
194 | if not path:
195 | return Url(scheme, auth, host, port, path, query, fragment)
196 |
197 | # Fragment
198 | if '#' in path:
199 | path, fragment = path.split('#', 1)
200 |
201 | # Query
202 | if '?' in path:
203 | path, query = path.split('?', 1)
204 |
205 | return Url(scheme, auth, host, port, path, query, fragment)
206 |
207 | def get_host(url):
208 | """
209 | Deprecated. Use :func:`.parse_url` instead.
210 | """
211 | p = parse_url(url)
212 | return p.scheme or 'http', p.hostname, p.port
213 |
--------------------------------------------------------------------------------
/requests/status_codes.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | from .structures import LookupDict
4 |
5 | _codes = {
6 |
7 | # Informational.
8 | 100: ('continue',),
9 | 101: ('switching_protocols',),
10 | 102: ('processing',),
11 | 103: ('checkpoint',),
12 | 122: ('uri_too_long', 'request_uri_too_long'),
13 | 200: ('ok', 'okay', 'all_ok', 'all_okay', 'all_good', '\\o/', '✓'),
14 | 201: ('created',),
15 | 202: ('accepted',),
16 | 203: ('non_authoritative_info', 'non_authoritative_information'),
17 | 204: ('no_content',),
18 | 205: ('reset_content', 'reset'),
19 | 206: ('partial_content', 'partial'),
20 | 207: ('multi_status', 'multiple_status', 'multi_stati', 'multiple_stati'),
21 | 208: ('already_reported',),
22 | 226: ('im_used',),
23 |
24 | # Redirection.
25 | 300: ('multiple_choices',),
26 | 301: ('moved_permanently', 'moved', '\\o-'),
27 | 302: ('found',),
28 | 303: ('see_other', 'other'),
29 | 304: ('not_modified',),
30 | 305: ('use_proxy',),
31 | 306: ('switch_proxy',),
32 | 307: ('temporary_redirect', 'temporary_moved', 'temporary'),
33 | 308: ('permanent_redirect',
34 | 'resume_incomplete', 'resume',), # These 2 to be removed in 3.0
35 |
36 | # Client Error.
37 | 400: ('bad_request', 'bad'),
38 | 401: ('unauthorized',),
39 | 402: ('payment_required', 'payment'),
40 | 403: ('forbidden',),
41 | 404: ('not_found', '-o-'),
42 | 405: ('method_not_allowed', 'not_allowed'),
43 | 406: ('not_acceptable',),
44 | 407: ('proxy_authentication_required', 'proxy_auth', 'proxy_authentication'),
45 | 408: ('request_timeout', 'timeout'),
46 | 409: ('conflict',),
47 | 410: ('gone',),
48 | 411: ('length_required',),
49 | 412: ('precondition_failed', 'precondition'),
50 | 413: ('request_entity_too_large',),
51 | 414: ('request_uri_too_large',),
52 | 415: ('unsupported_media_type', 'unsupported_media', 'media_type'),
53 | 416: ('requested_range_not_satisfiable', 'requested_range', 'range_not_satisfiable'),
54 | 417: ('expectation_failed',),
55 | 418: ('im_a_teapot', 'teapot', 'i_am_a_teapot'),
56 | 422: ('unprocessable_entity', 'unprocessable'),
57 | 423: ('locked',),
58 | 424: ('failed_dependency', 'dependency'),
59 | 425: ('unordered_collection', 'unordered'),
60 | 426: ('upgrade_required', 'upgrade'),
61 | 428: ('precondition_required', 'precondition'),
62 | 429: ('too_many_requests', 'too_many'),
63 | 431: ('header_fields_too_large', 'fields_too_large'),
64 | 444: ('no_response', 'none'),
65 | 449: ('retry_with', 'retry'),
66 | 450: ('blocked_by_windows_parental_controls', 'parental_controls'),
67 | 451: ('unavailable_for_legal_reasons', 'legal_reasons'),
68 | 499: ('client_closed_request',),
69 |
70 | # Server Error.
71 | 500: ('internal_server_error', 'server_error', '/o\\', '✗'),
72 | 501: ('not_implemented',),
73 | 502: ('bad_gateway',),
74 | 503: ('service_unavailable', 'unavailable'),
75 | 504: ('gateway_timeout',),
76 | 505: ('http_version_not_supported', 'http_version'),
77 | 506: ('variant_also_negotiates',),
78 | 507: ('insufficient_storage',),
79 | 509: ('bandwidth_limit_exceeded', 'bandwidth'),
80 | 510: ('not_extended',),
81 | }
82 |
83 | codes = LookupDict(name='status_codes')
84 |
85 | for (code, titles) in list(_codes.items()):
86 | for title in titles:
87 | setattr(codes, title, code)
88 | if not title.startswith('\\'):
89 | setattr(codes, title.upper(), code)
90 |
--------------------------------------------------------------------------------
/requests/structures.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """
4 | requests.structures
5 | ~~~~~~~~~~~~~~~~~~~
6 |
7 | Data structures that power Requests.
8 |
9 | """
10 |
11 | import collections
12 |
13 |
14 | class CaseInsensitiveDict(collections.MutableMapping):
15 | """
16 | A case-insensitive ``dict``-like object.
17 |
18 | Implements all methods and operations of
19 | ``collections.MutableMapping`` as well as dict's ``copy``. Also
20 | provides ``lower_items``.
21 |
22 | All keys are expected to be strings. The structure remembers the
23 | case of the last key to be set, and ``iter(instance)``,
24 | ``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
25 | will contain case-sensitive keys. However, querying and contains
26 | testing is case insensitive::
27 |
28 | cid = CaseInsensitiveDict()
29 | cid['Accept'] = 'application/json'
30 | cid['aCCEPT'] == 'application/json' # True
31 | list(cid) == ['Accept'] # True
32 |
33 | For example, ``headers['content-encoding']`` will return the
34 | value of a ``'Content-Encoding'`` response header, regardless
35 | of how the header name was originally stored.
36 |
37 | If the constructor, ``.update``, or equality comparison
38 | operations are given keys that have equal ``.lower()``s, the
39 | behavior is undefined.
40 |
41 | """
42 | def __init__(self, data=None, **kwargs):
43 | self._store = dict()
44 | if data is None:
45 | data = {}
46 | self.update(data, **kwargs)
47 |
48 | def __setitem__(self, key, value):
49 | # Use the lowercased key for lookups, but store the actual
50 | # key alongside the value.
51 | self._store[key.lower()] = (key, value)
52 |
53 | def __getitem__(self, key):
54 | return self._store[key.lower()][1]
55 |
56 | def __delitem__(self, key):
57 | del self._store[key.lower()]
58 |
59 | def __iter__(self):
60 | return (casedkey for casedkey, mappedvalue in self._store.values())
61 |
62 | def __len__(self):
63 | return len(self._store)
64 |
65 | def lower_items(self):
66 | """Like iteritems(), but with all lowercase keys."""
67 | return (
68 | (lowerkey, keyval[1])
69 | for (lowerkey, keyval)
70 | in self._store.items()
71 | )
72 |
73 | def __eq__(self, other):
74 | if isinstance(other, collections.Mapping):
75 | other = CaseInsensitiveDict(other)
76 | else:
77 | return NotImplemented
78 | # Compare insensitively
79 | return dict(self.lower_items()) == dict(other.lower_items())
80 |
81 | # Copy is required
82 | def copy(self):
83 | return CaseInsensitiveDict(self._store.values())
84 |
85 | def __repr__(self):
86 | return str(dict(self.items()))
87 |
88 | class LookupDict(dict):
89 | """Dictionary lookup object."""
90 |
91 | def __init__(self, name=None):
92 | self.name = name
93 | super(LookupDict, self).__init__()
94 |
95 | def __repr__(self):
96 | return '' % (self.name)
97 |
98 | def __getitem__(self, key):
99 | # We allow fall-through here, so values default to None
100 |
101 | return self.__dict__.get(key, None)
102 |
103 | def get(self, key, default=None):
104 | return self.__dict__.get(key, default)
105 |
--------------------------------------------------------------------------------
/wytorproxy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | # file: torproxy.py
4 | # mail: ringzero@0x557.org
5 |
6 | import sys
7 | import random
8 | import requests
9 | import json
10 |
11 | # 动态配置项
12 | retrycnt = 3 # 重试次数
13 | timeout = 10 # 超时时间
14 |
15 | # 动态使用代理,为空不使用,支持用户密码认证
16 | proxies = {
17 | # "http": "http://user:pass@10.10.1.10:3128/",
18 | # "https": "http://10.10.1.10:1080",
19 | "http": "http://127.0.0.1:8118", # TOR 洋葱路由器
20 | }
21 | result = {}
22 |
23 | # 随机生成User-Agent
24 | def random_useragent():
25 | USER_AGENTS = [
26 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; AcooBrowser; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
27 | "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Acoo Browser; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.04506)",
28 | "Mozilla/4.0 (compatible; MSIE 7.0; AOL 9.5; AOLBuild 4337.35; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)",
29 | "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
30 | "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)",
31 | "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)",
32 | "Mozilla/4.0 (compatible; MSIE 7.0b; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.2; .NET CLR 3.0.04506.30)",
33 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)",
34 | "Mozilla/5.0 (X11; U; Linux; en-US) AppleWebKit/527+ (KHTML, like Gecko, Safari/419.3) Arora/0.6",
35 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2pre) Gecko/20070215 K-Ninja/2.1.1",
36 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9) Gecko/20080705 Firefox/3.0 Kapiko/3.0",
37 | "Mozilla/5.0 (X11; Linux i686; U;) Gecko/20070322 Kazehakase/0.4.5",
38 | "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.8) Gecko Fedora/1.9.0.8-1.fc10 Kazehakase/0.5.6",
39 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
40 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20",
41 | "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
42 | ]
43 | return random.choice(USER_AGENTS)
44 |
45 | # 随机X-Forwarded-For,动态IP
46 | def random_x_forwarded_for():
47 | return '%d.%d.%d.%d' % (random.randint(1, 254),random.randint(1, 254),random.randint(1, 254),random.randint(1, 254))
48 |
49 | def http_request_get(url, body_content_workflow=0):
50 | trycnt = 0
51 | # cookies = dict(scan_worker='working', cookies_be='wscan.net')
52 | headers = {
53 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20',
54 | 'Referer' : url,
55 | 'Cookie': 'whoami=wytorproxy',
56 | }
57 | while True:
58 | try:
59 | if body_content_workflow == 1:
60 | result = requests.get(url, stream=True, headers=headers, timeout=timeout, proxies=proxies)
61 | return result
62 | else:
63 | result = requests.get(url, headers=headers, timeout=timeout, proxies=proxies)
64 | return result
65 | except Exception, e:
66 | # print 'Exception: %s' % e
67 | trycnt += 1
68 | if trycnt >= retrycnt:
69 | # print 'retry overflow'
70 | return False
71 |
72 | def http_request_post(url, payload, body_content_workflow=0):
73 | '''
74 | payload = {'key1': 'value1', 'key2': 'value2'}
75 | '''
76 | trycnt = 0
77 | # cookies = dict(scan_worker='working', cookies_be='wscan.net')
78 | headers = {
79 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.20 (KHTML, like Gecko) Chrome/19.0.1036.7 Safari/535.20',
80 | 'Referer' : url,
81 | 'Cookie': 'whoami=wytorproxy',
82 | }
83 | while True:
84 | try:
85 | if body_content_workflow == 1:
86 | result = requests.post(url, data=payload, headers=headers, stream=True, timeout=timeout, proxies=proxies)
87 | return result
88 | else:
89 | result = requests.post(url, data=payload, headers=headers, timeout=timeout, proxies=proxies)
90 | return result
91 | except Exception, e:
92 | # print 'Exception: %s' % e
93 | trycnt += 1
94 | if trycnt >= retrycnt:
95 | # print 'retry overflow'
96 | return False
97 |
98 | def check_website_status(url):
99 | result = http_request_get(url, body_content_workflow=1)
100 | if result == False:
101 | # 服务器宕机或者选项错误
102 | return {'status': False, 'info': 'server down or options error'}
103 | elif result.status_code != requests.codes.ok:
104 | # 返回值不等于200
105 | result_info = 'status_code: %s != 200' % result.status_code
106 | return {'status': False, 'info': result_info}
107 | else:
108 | # 返回正常
109 | return {'status': True, 'info': 'response ok'}
110 |
111 | print http_request_get('http://ip.taobao.com/service/getIpInfo2.php?ip=myip').text
112 |
113 |
114 |
--------------------------------------------------------------------------------