├── .gitignore ├── LICENSE ├── README.md └── proxy.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | .ropeproject/ 64 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 ~\(≧▽≦)/~ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # proxy_pool 2 | python 代理池 3 | 4 | ### TODO 5 | 6 | 慢慢实现 7 | -------------------------------------------------------------------------------- /proxy.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # vim:fenc=utf-8 4 | # 5 | # Copyright © 2016 mapleray 6 | # 7 | # Distributed under terms of the MIT license. 8 | 9 | import base64 10 | import requests 11 | from multiprocessing.dummy import Pool as ThreadPool 12 | 13 | 14 | class Proxy(object): 15 | def __init__(self, max_page=1): 16 | self.max_page = max_page 17 | self.proxies = [] 18 | self.checked_proxies = [] 19 | self.s = requests.Session() 20 | self.headers = { 21 | 'Host': 'proxy.peuland.com', 22 | 'Origin': 'https://proxy.peuland.com', 23 | 'Referer': 'https://proxy.peuland.com/proxy_list_by_category.htm', 24 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2692.0 Safari/537.36', 25 | 'X-Requested-With': 'XMLHttpRequest', 26 | 'Cookie': 'peuland_id=35fefe23fedc52da9283ac5ed131cbab;PHPSESSID=pkm7b65es5ojb8oerc7a9i0q31; peuland_md5=ca1f57155f5638ade3c28a900fbdbd55;w_h=800; w_w=1280; w_cd=24; w_a_h=773; w_a_w=1280; php_id=1792520643' 27 | } 28 | self.s.headers.update(self.headers) 29 | self.url = 'https://proxy.peuland.com/proxy/search_proxy.php' 30 | 31 | def _parse_proxy(self): 32 | i = 1 33 | while (i <= self.max_page): 34 | payload = { 35 | 'type': '', 36 | 'country_code': 'CN', 37 | 'is_clusters': '', 38 | 'is_https': '', 39 | 'level_type': 'anonymous', 40 | 'search_type': 'all', 41 | 'page': str(i), 42 | } 43 | r = self.s.post(self.url, data=payload) 44 | data = r.json()['data'] 45 | for line in data: 46 | rate = int(base64.b64decode(line['time_downloadspeed'])) 47 | if rate <= 7: 48 | continue 49 | proxy_type = base64.b64decode(line['type']) 50 | ip = base64.b64decode(line['ip']) 51 | port = base64.b64decode(line['port']) 52 | self.proxies.append({proxy_type: ip + ':' + port}) 53 | i = i + 1 54 | 55 | def _check_proxy(self, proxy, anonymous=False): 56 | 57 | try: 58 | r = requests.get('http://httpbin.org/ip', proxies=proxy, timeout=10) 59 | data = r.json() 60 | # 高匿检测 61 | if anonymous: 62 | if data['origin'] == proxy.values()[0].split(':')[0]: 63 | self.checked_proxies.append(proxy) 64 | self.checked_proxies.append(proxy) 65 | except Exception as e: 66 | print e 67 | 68 | def get_proxy(self): 69 | self._parse_proxy() 70 | pool = ThreadPool(8) 71 | pool.map(self._check_proxy, self.proxies) 72 | pool.close() 73 | pool.join() 74 | return self.checked_proxies 75 | 76 | 77 | if __name__ == '__main__': 78 | ins = Proxy() 79 | print ins.get_proxy() 80 | --------------------------------------------------------------------------------