├── .gitignore ├── LICENSE ├── Proxies ├── Proxies.py └── __init__.py ├── README.md ├── README.rst └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Yorking Yuan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Proxies/Proxies.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import requests, math 3 | import gevent 4 | from gevent.queue import Queue 5 | from gevent import monkey; monkey.patch_all() 6 | from pyquery import PyQuery 7 | 8 | class Proxies(): 9 | def __init__(self): 10 | self.domestic_gn_url = 'http://www.kuaidaili.com/free/inha/{0}/' 11 | self.domestic_pt_url = 'http://www.kuaidaili.com/free/intr/{0}/' 12 | self.abroad_gn_url = 'http://www.kuaidaili.com/free/outha/{0}/' 13 | self.abroad_pt_url = 'http://www.kuaidaili.com/free/outtr/{0}/' 14 | self.result_arr = [] 15 | self.s = requests 16 | self.headers = { 17 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 18 | 'Referer': 'http://www.kuaidaili.com/' 19 | } 20 | 21 | def fetch_urls(self, queue, quantity): 22 | while not queue.empty(): 23 | url = queue.get() 24 | html = self.s.get(url, headers=self.headers).text 25 | pq = PyQuery(html) 26 | size = pq.find('tbody tr').size() 27 | for index in range(size): 28 | item = pq.find('tbody tr').eq(index) 29 | ip = item.find('td').eq(0).text() 30 | port = item.find('td').eq(1).text() 31 | _type = item.find('td').eq(3).text() 32 | self.result_arr.append({ 33 | str(_type).lower(): '{0}://{1}:{2}'.format(str(_type).lower(), ip, port) 34 | }) 35 | if len(self.result_arr) >= quantity: 36 | break 37 | 38 | def get_proxies(self, quantity, type): 39 | ''' 40 | quantity: 数量 41 | type: 类型 42 | 1.国内高匿代理 43 | 2.国内普通代理 44 | 3.国外高匿代理 45 | 4.国外普通代理 46 | ''' 47 | url_queue = Queue() 48 | need_pages = int(math.ceil(quantity/15)) 49 | # 判断类型 50 | if type == 1: 51 | # 国内高匿代理 52 | base_url = self.domestic_gn_url 53 | elif type == 2: 54 | # 国内普通代理 55 | base_url = self.domestic_pt_url 56 | elif type == 3: 57 | # 国外高匿代理 58 | base_url = self.abroad_gn_url 59 | elif type == 4: 60 | # 国外普通代理 61 | base_url = self.abroad_pt_url 62 | # 获取所需要的页面URL 63 | for index in range(need_pages): 64 | url = base_url.format(index+1) 65 | url_queue.put(url) 66 | # 处理所有URL,开启2个协程 67 | gevent_list = [] 68 | gevent_list.append( 69 | gevent.spawn(self.fetch_urls, url_queue, quantity) 70 | ) 71 | gevent.joinall(gevent_list) 72 | 73 | def get_result(self): 74 | return self.result_arr 75 | 76 | if __name__ == '__main__': 77 | p = Proxies() 78 | p.get_proxies(17, 1) 79 | result = p.get_result() 80 | print(result) -------------------------------------------------------------------------------- /Proxies/__init__.py: -------------------------------------------------------------------------------- 1 | from .Proxies import Proxies 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI](https://img.shields.io/pypi/v/proxies.svg?maxAge=2592000)](https://pypi.python.org/pypi/proxies) 2 | [![PyPID](https://img.shields.io/pypi/dm/proxies.svg?maxAge=2592000)](https://pypi.python.org/pypi/proxies) 3 | [![PyPIV](https://img.shields.io/pypi/pyversions/proxies.svg?maxAge=2592000)](https://pypi.python.org/pypi/proxies) 4 | [![PyPIL](https://img.shields.io/pypi/l/proxies.svg?maxAge=2592000)](https://pypi.python.org/pypi/proxies) 5 | [![PyPIS](https://img.shields.io/pypi/status/proxies.svg?maxAge=2592000)](https://pypi.python.org/pypi/proxies) 6 | 7 | ## 获取最新的HTTP代理 8 | #### 安装 9 | ``` 10 | pip install proxies 11 | ``` 12 | 13 | > OR: 14 | 15 | ``` 16 | git clone https://github.com/MyFaith/proxies 17 | python setup.py install 18 | ``` 19 | 20 | #### 使用 21 | ``` 22 | from Proxies import Proxies 23 | 24 | p = Proxies() 25 | p.get_proxies(20, 1) 26 | # quantity: 数量 27 | # type: 类型 (1.国内高匿代理 2.国内普通代理 3.国外高匿代 4.国外普通代理) 28 | result = p.get_result() 29 | print(result) 30 | ``` 31 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | |PyPI| |PyPID| |PyPIV| |PyPIL| |PyPIS| 2 | 3 | Get latest http proxies 4 | ------------------ 5 | 6 | Install 7 | ^^^^ 8 | 9 | :: 10 | 11 | pip install proxies 12 | 13 | OR: 14 | 15 | :: 16 | 17 | git clone https://github.com/MyFaith/proxies 18 | python setup.py install 19 | 20 | Usage 21 | ^^^^ 22 | 23 | :: 24 | 25 | from Proxies import Proxies 26 | 27 | p = Proxies() 28 | p.get_proxies(20, 1) 29 | result = p.get_result() 30 | print(result) 31 | 32 | .. |PyPI| image:: https://img.shields.io/pypi/v/proxies.svg?maxAge=2592000 33 | :target: https://pypi.python.org/pypi/proxies 34 | .. |PyPID| image:: https://img.shields.io/pypi/dm/proxies.svg?maxAge=2592000 35 | :target: https://pypi.python.org/pypi/proxies 36 | .. |PyPIV| image:: https://img.shields.io/pypi/pyversions/proxies.svg?maxAge=2592000 37 | :target: https://pypi.python.org/pypi/proxies 38 | .. |PyPIL| image:: https://img.shields.io/pypi/l/proxies.svg?maxAge=2592000 39 | :target: https://pypi.python.org/pypi/proxies 40 | .. |PyPIS| image:: https://img.shields.io/pypi/status/proxies.svg?maxAge=2592000 41 | :target: https://pypi.python.org/pypi/proxies 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='proxies', 5 | version='1.6', 6 | keywords=('proxy', 'proxies', 'requests'), 7 | description='Get latest http proxies.', 8 | # long_description=read('README.rst'), 9 | author='MyFaith', 10 | author_email='faith0725@outlook.com', 11 | url='https://github.com/MyFaith/proxies', 12 | license='MIT', 13 | packages=find_packages(), 14 | classifiers=[ 15 | 'Development Status :: 3 - Alpha', 16 | 17 | 'Intended Audience :: Developers', 18 | 'Topic :: Software Development :: Libraries :: Python Modules', 19 | 20 | 'License :: OSI Approved :: MIT License', 21 | 22 | 'Programming Language :: Python :: 3', 23 | 'Programming Language :: Python :: 3.3', 24 | 'Programming Language :: Python :: 3.4', 25 | 'Programming Language :: Python :: 3.5', 26 | ], 27 | install_requires=['requests', 'pyquery', 'gevent'] 28 | ) 29 | --------------------------------------------------------------------------------