├── .gitignore ├── LICENSE ├── README.md ├── api-sdk ├── README.md ├── README.rst ├── examples │ ├── __init__.py │ ├── use_dps.py │ ├── use_kps.py │ ├── use_ops.py │ ├── use_tool.py │ └── use_tps.py ├── kdl │ ├── __init__.py │ ├── auth.py │ ├── client.py │ ├── endpoint.py │ ├── exceptions.py │ └── utils.py ├── setup.py └── test.py └── examples ├── README.md ├── api ├── py2_urllib2.py ├── py3_requests.py └── py3_urllib.py ├── http_proxy ├── phantomjs_demo.py ├── proxy_pool.py ├── py2_requests.py ├── py2_urllib2.py ├── py3_aiohttp.py ├── py3_feapder.py ├── py3_httpx.py ├── py3_playwright.py ├── py3_pyppeteer.py ├── py3_requests.py ├── py3_scrapy │ ├── scrapy.cfg │ └── tutorial │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── myextend.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ ├── __init__.py │ │ └── kdl_spiders.py ├── py3_urllib.py ├── py3_websocket.py ├── py3_websocket_short.py ├── selenium_chrome_username_password.py ├── selenium_chrome_whitelist.py ├── selenium_firefox_username_password.py └── selenium_firefox_whitelist.py ├── http_proxy_tunnel ├── py2_requests.py ├── py2_urllib2.py ├── py3_aiohttp.py ├── py3_feapder.py ├── py3_httpx.py ├── py3_pyppeteer.py ├── py3_requests.py ├── py3_scrapy │ ├── scrapy.cfg │ └── tutorial │ │ ├── __init__.py │ │ ├── items.py │ │ ├── middlewares.py │ │ ├── pipelines.py │ │ ├── settings.py │ │ └── spiders │ │ ├── __init__.py │ │ └── kdl_spider.py ├── py3_socket.py ├── py3_urllib.py ├── selenium_chrome_username_password.py ├── selenium_chrome_whitelist.py ├── selenium_firefox_username_password.py └── selenium_firefox_whitelist.py └── socks_proxy ├── phantomjs_demo.py ├── py2_requests.py ├── py3_requests.py └── selenium_chrome_whitelist.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | /venv 3 | .idea 4 | *.bat 5 | *.log 6 | .secret -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, Kuaidaili 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 快代理API SDK - Python 2 | 通过 SDK 可快速调用 API 接口,[查看详情](https://github.com/kuaidaili/python-sdk/tree/master/api-sdk) 3 | 4 | # 快代理官方代码样例 - Python 5 | 6 | ## 调用 API 7 | * 调用 API 8 | * [urllib2](./examples/api/py2_urllib2.py) 9 | * [urllib](./examples/api/py3_urllib.py) 10 | * [requests](./examples/api/py3_requests.py) 11 | 12 | ## HTTP 代理 13 | * Python2 14 | * [urllib2](./examples/http_proxy/py2_urllib2.py) 15 | * [requests](./examples/http_proxy/py2_requests.py) 16 | * Python3 17 | * [urllib](./examples/http_proxy/py3_urllib.py) 18 | * [requests](./examples/http_proxy/py3_requests.py) 19 | * [aiohttp](./examples/http_proxy/py3_aiohttp.py) 20 | * [httpx](./examples/http_proxy/py3_httpx.py) 21 | * [websocket 长连接](./examples/http_proxy/py3_websocket.py) 22 | * [websocket 短连接](./examples/http_proxy/py3_websocket_short.py) 23 | * [scrapy](./examples/http_proxy/py3_scrapy) 24 | * [feapder](./examples/http_proxy/py3_feapder.py) 25 | * [pyppeteer](./examples/http_proxy/py3_pyppeteer.py) 26 | * Selenium 27 | * [selenium_chrome 白名单验证](./examples/http_proxy/selenium_chrome_whitelist.py) 28 | * [selenium_chrome 用户名密码验证](./examples/http_proxy/selenium_chrome_username_password.py) 29 | * [selenium_firefox 白名单验证](./examples/http_proxy/selenium_firefox_whitelist.py) 30 | * [selenium_firefox 用户名密码验证](./examples/http_proxy/selenium_firefox_username_password.py) 31 | * [selenium_phantomjs 用户名密码验证](./examples/http_proxy/phantomjs_demo.py) 32 | * ProxyPool 33 | * [ProxyPool](./examples/http_proxy/proxy_pool.py) 34 | 35 | ## HTTP 隧道 36 | 37 | * Python2 38 | * [urllib2](./examples/http_proxy_tunnel/py2_urllib2.py) 39 | * [requests](./examples/http_proxy_tunnel/py2_requests.py) 40 | * Python3 41 | * [urllib](./examples/http_proxy_tunnel/py3_urllib.py) 42 | * [requests](./examples/http_proxy_tunnel/py3_requests.py) 43 | * [aiohttp](./examples/http_proxy_tunnel/py3_aiohttp.py) 44 | * [httpx](./examples/http_proxy_tunnel/py3_httpx.py) 45 | * [socket](./examples/http_proxy_tunnel/py3_socket.py) 46 | * [scrapy](./examples/http_proxy_tunnel/py3_scrapy) 47 | * [feapder](./examples/http_proxy_tunnel/py3_feapder.py) 48 | * [pyppeteer](./examples/http_proxy_tunnel/py3_pyppeteer.py) 49 | * Selenium 50 | * [selenium_chrome 白名单验证](./examples/http_proxy_tunnel/selenium_chrome_whitelist.py) 51 | * [selenium_chrome 用户名密码验证](./examples/http_proxy_tunnel/selenium_chrome_username_password.py) 52 | * [selenium_firefox 白名单验证](./examples/http_proxy_tunnel/selenium_firefox_whitelist.py) 53 | * [selenium_firefox 用户名密码验证](./examples/http_proxy_tunnel/selenium_firefox_username_password.py) 54 | 55 | ## Socks 56 | * Python2 57 | * [requests](./examples/socks_proxy/py2_requests.py) 58 | * Python3 59 | * [requests](./examples/socks_proxy/py3_requests.py) 60 | * Selenium 61 | * [selenium_chrome 白名单验证](./examples/socks_proxy/selenium_chrome_whitelist.py) 62 | * [selenium_phantomjs 用户名密码验证](./examples/socks_proxy/phantomjs_demo.py) 63 | 64 | 65 | # 技术支持 66 | 67 | 如果您发现代码有任何问题, 请提交 `Issue`。 68 | 69 | 欢迎提交 `Pull request` 以使代码样例更加完善。 70 | 71 | 获取更多关于调用 API 和代理服务器使用的资料,请参考[快代理文档中心](https://www.kuaidaili.com/helpcenter/)。 72 | 73 | * 技术支持微信:kuaidaili 74 | * 技术支持QQ:800849628 75 | -------------------------------------------------------------------------------- /api-sdk/README.md: -------------------------------------------------------------------------------- 1 | # 简介 2 | 快代理api SDK 3 | 4 | # 依赖环境 5 | 1. python2.7 到 python3.7 6 | 2. 从[快代理](https://www.kuaidaili.com)购买相应产品 7 | 3. [获取订单的`secret_id`和`secret_key`](https://www.kuaidaili.com/usercenter/api/secret/) 8 | 9 | # 获取安装 10 | 安装 Python SDK 前,请先获取订单对应的`secret_id`和`secret_key`,请严格保管,避免泄露。 11 | 12 | ## 通过pip安装(推荐) 13 | 您可以通过`pip`将SDK安装到您的项目中: 14 | ``` 15 | pip install kdl 16 | ``` 17 | 18 | 如果您的项目环境尚未安装`pip`,可参考: 19 | * Ubuntu/Debian安装pip:`apt-get install python-setuptools` 20 | * CentOS安装pip:`yum install python-setuptools` 21 | * MacOS安装pip: `curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py` 22 | 23 | 您还可以参考[pip官网](https://pip.pypa.io/en/stable/installing/?spm=a3c0i.o32026zh.a3.6.74134958lLSo6o)进行安装。 24 | 25 | ## 通过源码包安装 26 | 前往 [Github 代码托管地址](https://github.com/kuaidaili/python-sdk/tree/master/api-sdk) 下载最新代码,解压后 27 | 28 | ``` 29 | $ cd api-sdk 30 | $ python setup.py install 31 | ``` 32 | 33 | ## 示例 34 | 以私密代理订单使用为例 35 | ``` python 36 | # -*- coding: utf-8 -*- 37 | 38 | """私密代理使用示例 39 | 接口鉴权说明: 40 | 目前支持的鉴权方式有 "token" 和 "hmacsha1" 两种,默认使用 "token"鉴权。 41 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 42 | """ 43 | 44 | import kdl 45 | 46 | auth = kdl.Auth("secret_id", "secret_key") 47 | client = kdl.Client(auth, timeout=(8, 12), max_retries=3) 48 | 49 | # 获取订单到期时间, 返回时间字符串 50 | expire_time = client.get_order_expire_time() 51 | print("expire time", expire_time) 52 | 53 | 54 | 55 | # 获取ip白名单, 返回ip列表 56 | ip_whitelist = client.get_ip_whitelist() 57 | print("ip whitelist", ip_whitelist) 58 | 59 | # 设置ip白名单,参数类型为字符串或列表或元组 60 | # 成功则返回True, 否则抛出异常 61 | client.set_ip_whitelist([]) 62 | client.set_ip_whitelist("171.113.244.40,171.113.244.41") 63 | print(client.get_ip_whitelist()) 64 | 65 | 66 | 67 | client.set_ip_whitelist(tuple()) 68 | 69 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 70 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/" 71 | # 返回ip列表 72 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话 73 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海' 74 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东') 75 | print("dps proxy: ", ips) 76 | 77 | 78 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict 79 | ips = client.get_dps(2, sign_type='simple', format='json') 80 | valids = client.check_dps_valid(ips) 81 | print("valids: ", valids) 82 | 83 | # 获取私密代理剩余时间: 返回 ip: seconds(剩余秒数) 组成的dict 84 | ips = client.get_dps(5, format='json') 85 | seconds = client.get_dps_valid_time(ips) 86 | print("seconds: ", seconds) 87 | 88 | 89 | # 获取计数版ip余额(仅私密代理计数版) 90 | balance = client.get_ip_balance(sign_type='hmacsha1') 91 | print("balance: ", balance) 92 | 93 | # 获取代理鉴权信息 94 | # 获取指定订单访问代理IP的鉴权信息。 95 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。 96 | # plain_text 为1 表示明文显示用户名和密码 97 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/ 98 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple') 99 | print("proxyauthorization: ", proxyauthorization) 100 | ``` 101 | 您可以在examples目录下找到更详细的示例 102 | 103 | ## 参考资料 104 | 105 | * [查看API列表](https://www.kuaidaili.com/doc/api/) 106 | * [了解API鉴权](https://www.kuaidaili.com/doc/api/auth/) 107 | -------------------------------------------------------------------------------- /api-sdk/README.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | 快代理api SDK 3 | =============== 4 | 5 | ========== 6 | 依赖环境 7 | ========== 8 | 9 | 1. python2.7 到 python3.7 10 | 2. 从 `快代理 `_ 购买相应产品 11 | 3. `获取订单的secret_id和secret_key `_ 12 | 13 | ========= 14 | 获取安装: 15 | ========= 16 | 安装 Python SDK 前,请先获取订单对应的`secret_id`和`secret_key`,请严格保管,避免泄露。 17 | 18 | 通过pip安装(推荐) 19 | =================== 20 | 您可以通过 ``pip`` 将SDK安装到您的项目中: 21 | | ``pip install kdl`` 22 | 23 | 如果您的项目环境尚未安装 ``pip`` ,可参考: 24 | * Ubuntu/Debian安装pip:``apt-get install python-setuptools`` 25 | * CentOS安装pip:``yum install python-setuptools`` 26 | * MacOS安装pip: ``curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py`` 27 | 28 | 您还可以参考 `pip官网 `_ 进行安装。 29 | 30 | 31 | 通过源码包安装 32 | ============== 33 | 前往 `Github 代码托管地址 `_ 下载最新代码,解压后: 34 | 35 | .. code-block:: console 36 | 37 | $ cd api-sdk 38 | $ python setup.py install 39 | 40 | 41 | 示例 42 | ==== 43 | 以私密代理订单使用为例: 44 | 45 | .. code-block:: python 46 | 47 | # -*- coding: utf-8 -*- 48 | 49 | """ 50 | 私密代理使用示例 51 | 接口鉴权说明: 52 | 目前支持的鉴权方式有 "token" 和 "hmacsha1" 两种,默认使用 "token"鉴权。 53 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 54 | """ 55 | 56 | import kdl 57 | 58 | auth = kdl.Auth("secret_id", "secret_key") 59 | client = kdl.Client(auth) 60 | 61 | # 获取订单到期时间, 返回时间字符串 62 | expire_time = client.get_order_expire_time() 63 | print("expire time", expire_time) 64 | 65 | # 获取ip白名单, 返回ip列表 66 | ip_whitelist = client.get_ip_whitelist() 67 | print("ip whitelist", ip_whitelist) 68 | 69 | # 设置ip白名单,参数类型为字符串或列表或元组 70 | # 成功则返回True, 否则抛出异常 71 | client.set_ip_whitelist([]) 72 | client.set_ip_whitelist("127.0.0.1, 192.168.0.139") 73 | print(client.get_ip_whitelist()) 74 | client.set_ip_whitelist(tuple()) 75 | 76 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 77 | # 具体有哪些参数请参考帮助中心: "https://help.kuaidaili.com/api/getdps/" 78 | # 返回ip列表 79 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话 80 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海' 81 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东') 82 | print("dps proxy: ", ips) 83 | 84 | 85 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict 86 | ips = client.get_dps(2, sign_type='token', format='json') 87 | valids = client.check_dps_valid(ips) 88 | print("valids: ", valids) 89 | 90 | 91 | # 获取计数版ip余额(仅私密代理计数版) 92 | balance = client.get_ip_balance(sign_type='hmacsha1') 93 | print("balance: ", balance) 94 | 95 | 您可以在examples目录下找到更详细的示例 96 | 97 | 参考资料 98 | ========== 99 | 100 | * `查看API列表 `_ 101 | * `了解API鉴权 `_ -------------------------------------------------------------------------------- /api-sdk/examples/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /api-sdk/examples/use_dps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """私密代理使用示例 4 | 接口鉴权说明: 5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。 6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 7 | """ 8 | 9 | import kdl 10 | 11 | auth = kdl.Auth("secret_id", "secret_key") 12 | client = kdl.Client(auth) 13 | 14 | # 获取订单到期时间, 返回时间字符串 15 | expire_time = client.get_order_expire_time() 16 | print("expire time", expire_time) 17 | 18 | 19 | 20 | # 获取ip白名单, 返回ip列表 21 | ip_whitelist = client.get_ip_whitelist() 22 | print("ip whitelist", ip_whitelist) 23 | 24 | # 设置ip白名单,参数类型为字符串或列表或元组 25 | # 成功则返回True, 否则抛出异常 26 | client.set_ip_whitelist([]) 27 | client.set_ip_whitelist("171.113.244.40,171.113.244.41") 28 | print(client.get_ip_whitelist()) 29 | 30 | 31 | 32 | client.set_ip_whitelist(tuple()) 33 | 34 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 35 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/" 36 | # 返回ip列表 37 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话 38 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海' 39 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东') 40 | print("dps proxy: ", ips) 41 | 42 | 43 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict 44 | ips = client.get_dps(2, sign_type='simple', format='json') 45 | valids = client.check_dps_valid(ips) 46 | print("valids: ", valids) 47 | 48 | # 获取私密代理剩余时间: 返回 ip: seconds(剩余秒数) 组成的dict 49 | ips = client.get_dps(5, format='json') 50 | seconds = client.get_dps_valid_time(ips) 51 | print("seconds: ", seconds) 52 | 53 | 54 | # 获取计数版ip余额(仅私密代理计数版) 55 | balance = client.get_ip_balance(sign_type='hmacsha1') 56 | print("balance: ", balance) 57 | 58 | # 获取代理鉴权信息 59 | # 获取指定订单访问代理IP的鉴权信息。 60 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。 61 | # plain_text 为1 表示明文显示用户名和密码 62 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/ 63 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple') 64 | print("proxyauthorization: ", proxyauthorization) -------------------------------------------------------------------------------- /api-sdk/examples/use_kps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """独享代理使用示例 4 | 接口鉴权说明: 5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。 6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 7 | """ 8 | 9 | import kdl 10 | 11 | auth = kdl.Auth("secret_id", "secret_key") 12 | client = kdl.Client(auth) 13 | 14 | # 获取订单到期时间, 返回时间字符串 15 | expire_time = client.get_order_expire_time() 16 | print("expire time", expire_time) 17 | 18 | # 获取ip白名单, 返回ip列表 19 | ip_whitelist = client.get_ip_whitelist() 20 | print("ip whitelist", ip_whitelist) 21 | 22 | # 设置ip白名单,参数类型为字符串或列表或元组 23 | # 成功则返回True, 否则抛出异常 24 | client.set_ip_whitelist([]) 25 | client.set_ip_whitelist("127.0.0.1, 192.168.0.139") 26 | print(client.get_ip_whitelist()) 27 | client.set_ip_whitelist(tuple()) 28 | 29 | # 提取独享代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 30 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/" 31 | # 返回ip列表 32 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话 33 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海' 34 | ips = client.get_kps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东') 35 | print("kps proxy: ", ips) 36 | 37 | # 获取代理鉴权信息 38 | # 获取指定订单访问代理IP的鉴权信息。 39 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。 40 | # plain_text 为1 表示明文显示用户名和密码 41 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/ 42 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple') 43 | print("proxyauthorization: ", proxyauthorization) -------------------------------------------------------------------------------- /api-sdk/examples/use_ops.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """开放代理使用示例 4 | 接口鉴权说明: 5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。 6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 7 | """ 8 | 9 | import sys 10 | sys.path.append("/mnt/hgfs/PycharmProjects/python-sdk-master/api-sdk") 11 | import kdl 12 | 13 | auth = kdl.Auth("secret_id", "secret_key") 14 | client = kdl.Client(auth) 15 | 16 | # 获取订单到期时间, 返回时间字符串 17 | expire_time = client.get_order_expire_time() 18 | print("expire time", expire_time) 19 | 20 | 21 | # 提取开放代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 22 | # 具体有哪些参数请参考帮助中心: "https://help.kuaidaili.com/api/getdps/" 23 | # 返回ip列表 24 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话 25 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海' 26 | # 若您是开放代理svip订单,请传入order_level='svip', 若您是开放代理专业版订单,请传入order_level='ent' 27 | ips = client.get_proxy(4, sign_type='simple', order_level='svip', format='json', pt=2, area='北京,上海,广东') 28 | print("ops proxy: ", ips) 29 | 30 | 31 | 32 | # 检测开放代理有效性 33 | ips = client.get_proxy(4, sign_type='simple', order_level='svip', format='json', pt=2, area='北京,上海,广东') 34 | valids = client.check_ops_valid(ips) 35 | print("valids: ", valids) 36 | -------------------------------------------------------------------------------- /api-sdk/examples/use_tool.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | from kdl.client import Client 4 | from kdl.auth import Auth 5 | 6 | auth = Auth("secret_id", "secret_key") 7 | client = Client(auth) 8 | 9 | # 提取User Agent 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp) 10 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getua/" 11 | # 返回user agent列表 12 | ua = client.get_ua(10, browser="weixin") 13 | print("ua:", ua) -------------------------------------------------------------------------------- /api-sdk/examples/use_tps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """隧道代理使用示例 4 | 接口鉴权说明: 5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。 6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。 7 | """ 8 | 9 | import kdl 10 | auth = kdl.Auth("secret_id","secret_key") 11 | client = kdl.Client(auth) 12 | 13 | expire_time = client.get_order_expire_time() 14 | print("expire time:",expire_time) 15 | 16 | # 获取ip白名单, 返回ip列表 17 | ip_whitelist = client.get_ip_whitelist() 18 | print("ip whitelist:", ip_whitelist) 19 | 20 | # 设置ip白名单,参数类型为字符串或列表或元组 21 | # 成功则返回True, 否则抛出异常 22 | client.set_ip_whitelist([]) 23 | client.set_ip_whitelist("171.113.244.40") 24 | print(client.get_ip_whitelist()) 25 | 26 | # 显示隧道代理当前的ip 27 | ip = client.tps_current_ip() 28 | print("current_ip:",ip) 29 | 30 | 31 | # 改变当前隧道ip 32 | new_ip = client.change_tps_ip() 33 | print("new_ip:",new_ip) 34 | 35 | # 获取代理鉴权信息 36 | # 获取指定订单访问代理IP的鉴权信息。 37 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。 38 | # plain_text 为1 表示明文显示用户名和密码 39 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/ 40 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple') 41 | print("proxyauthorization: ", proxyauthorization) 42 | 43 | # 获取隧道代理IP 44 | # 获取订单对应的隧道代理IP。 45 | # 具体参数请查看:https://www.kuaidaili.com/doc/api/gettps/ 46 | tps_list = client.get_tps(2,sign_type='hmacsha1', format='json') 47 | print(tps_list) 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /api-sdk/kdl/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __version__ = "0.2.21" 4 | 5 | from .client import Client 6 | from .auth import Auth 7 | -------------------------------------------------------------------------------- /api-sdk/kdl/auth.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """封装auth对象 4 | 用于保存用户secret_id、secret_key,以及计算签名 5 | """ 6 | 7 | import base64 8 | import hashlib 9 | import hmac 10 | 11 | class Auth(object): 12 | """用于保存用户secret_id、secret_key以及计算签名的对象。""" 13 | 14 | def __init__(self, secret_id, secret_key): 15 | self.secret_id = secret_id 16 | self.secret_key = secret_key 17 | 18 | @classmethod 19 | def get_string_to_sign(cls, method, endpoint, params): 20 | """ 生成签名原文字符串 """ 21 | cls.clear_req_params(params) 22 | s = method + endpoint.split('.com')[1] + '?' 23 | query_str = '&'.join("%s=%s" % (k, params[k]) for k in sorted(params)) 24 | return s + query_str 25 | 26 | @classmethod 27 | def clear_req_params(cls, params): 28 | if 'timeout' in params: 29 | del params['timeout'] 30 | if 'max_retries' in params: 31 | del params['max_retries'] 32 | 33 | def sign_str(self, raw_str, method=hashlib.sha1): 34 | """ 生成签名串 """ 35 | try: 36 | hmac_str = hmac.new(self.secret_key.encode('utf8'), raw_str.encode('utf8'), method).digest() 37 | except UnicodeDecodeError as e: 38 | hmac_str = hmac.new(self.secret_key.encode('utf8'), raw_str, method).digest() 39 | return base64.b64encode(hmac_str) 40 | 41 | -------------------------------------------------------------------------------- /api-sdk/kdl/client.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """将快代理所有api接口封装到Client 4 | """ 5 | 6 | import json 7 | import os 8 | import time 9 | import requests 10 | from requests.adapters import HTTPAdapter 11 | 12 | from kdl.endpoint import EndPoint 13 | from kdl.exceptions import KdlException, KdlNameError, KdlTypeError, KdlStatusError 14 | from kdl.utils import OpsOrderLevel 15 | 16 | 17 | SECRET_PATH = './.secret' 18 | 19 | 20 | class Client: 21 | def __init__(self, auth, timeout=None, max_retries=None): 22 | self.auth = auth 23 | self.session = requests.Session() 24 | self.timeout = timeout or (6, 8) # default (connect_timeout, read_timeout) 25 | self.max_retries = max_retries 26 | 27 | def get_order_expire_time(self, sign_type="token", timeout=None, max_retries=None): 28 | """获取订单到期时间, 强制签名验证 29 | :return 订单过期时间字符串 30 | """ 31 | 32 | endpoint = EndPoint.GetOrderExpireTime.value 33 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 34 | res = self._get_base_res("GET", endpoint, params) 35 | 36 | if isinstance(res, dict): 37 | return res['data']['expire_time'] 38 | return res 39 | 40 | def get_proxy_authorization(self, plain_text=0, sign_type="token", timeout=None, max_retries=None): 41 | """获取指定订单访问代理IP的鉴权信息。 42 | 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。 43 | :return 返回信息的字典 44 | """ 45 | endpoint = EndPoint.GetProxyAuthorization.value 46 | params = self._get_params(endpoint, plaintext=plain_text, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 47 | res = self._get_base_res("GET", endpoint, params) 48 | if isinstance(res, dict): 49 | return res['data'] 50 | return res 51 | 52 | def get_ip_whitelist(self, sign_type="token", timeout=None, max_retries=None): 53 | """获取订单的ip白名单, 强制签名验证 54 | :return ip白名单列表 55 | """ 56 | endpoint = EndPoint.GetIpWhitelist.value 57 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 58 | res = self._get_base_res("GET", endpoint, params) 59 | if isinstance(res, dict): 60 | return res['data']['ipwhitelist'] 61 | return res 62 | 63 | def set_ip_whitelist(self, iplist=None, sign_type="token", timeout=None, max_retries=None): 64 | """设置订单的ip白名单, 强制签名验证 65 | :param iplist参数类型为 str 或 list 或 tuple 66 | 如果为字符串则ip之间用逗号隔开 67 | :return 成功则返回True, 否则抛出异常 68 | """ 69 | 70 | if iplist is None: 71 | raise KdlNameError("miss param: iplist") 72 | if not (isinstance(iplist, list) or isinstance(iplist, tuple) or isinstance(iplist, str)): 73 | raise KdlTypeError("iplist type error, should be a instance of list or tuple or str") 74 | if isinstance(iplist, list) or isinstance(iplist, tuple): 75 | iplist = ','.join(iplist) 76 | endpoint = EndPoint.SetIpWhitelist.value 77 | params = self._get_params(endpoint, iplist=iplist, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 78 | self._get_base_res("POST", endpoint, params) 79 | return True 80 | 81 | def tps_current_ip(self, sign_type="token", timeout=None, max_retries=None): 82 | """仅支持支持换IP周期>=1分钟的隧道代理订单 83 | 获取隧道当前的IP,默认“token”鉴权 84 | :param sign_type:默认token 85 | :return:返回ip地址。 86 | """ 87 | endpoint = EndPoint.TpsCurrentIp.value 88 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 89 | res = self._get_base_res("GET", endpoint, params) 90 | return res['data']['current_ip'] 91 | 92 | def change_tps_ip(self, sign_type="token", timeout=None, max_retries=None): 93 | """仅支持支持换IP周期>=1分钟的隧道代理订单 94 | :param sign_type: 默认token 95 | :return: 返回新的IP地址 96 | """ 97 | endpoint = EndPoint.ChangeTpsIp.value 98 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 99 | res = self._get_base_res("GET", endpoint, params) 100 | return res['data']['new_ip'] 101 | 102 | def get_tps(self, num=None, sign_type="token", **kwargs): 103 | """获取隧道代理IP, 默认"token"鉴权 https://www.kuaidaili.com/doc/api/gettps/ 104 | :param num : 提取数量,int类型 105 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明 106 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回 107 | """ 108 | if num is None: 109 | raise KdlNameError("miss param: num") 110 | if not isinstance(num, int): 111 | KdlTypeError("num should be a integer") 112 | endpoint = EndPoint.GetTps.value 113 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs) 114 | res = self._get_base_res("GET", endpoint, params) 115 | if isinstance(res, dict): 116 | return res['data']['proxy_list'] 117 | return res 118 | 119 | def get_dps_valid_time(self, proxy=None, sign_type="token", **kwargs): 120 | """获取私密代理ip有效时间 121 | :param proxy: 私密代理列表, 格式: IP:PORT, eg: 113.120.61.166:22989,122.4.44.132:21808 122 | :param sign_type: 认证方式 123 | :return: 返回data部分, 格式为由'proxy: seconds(剩余秒数)'组成的列表 124 | """ 125 | if not proxy: 126 | raise KdlNameError("miss param: proxy") 127 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str)): 128 | raise KdlTypeError("proxy should be a instance of list or tuple or str") 129 | if isinstance(proxy, list) or isinstance(proxy, tuple): 130 | proxy = ','.join(proxy) 131 | endpoint = EndPoint.GetDpsValidTime.value 132 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type) 133 | res = self._get_base_res("GET", endpoint, params) 134 | if isinstance(res, dict): 135 | return res['data'] 136 | return res 137 | 138 | def get_dps(self, num=None, sign_type="token", **kwargs): 139 | """获取私密代理, 默认"token"鉴权 140 | :param num: 提取数量, int类型 141 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明 142 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回 143 | """ 144 | if num is None: 145 | raise KdlNameError("miss param: num") 146 | if not isinstance(num, int): 147 | KdlTypeError("num should be a integer") 148 | endpoint = EndPoint.GetDpsProxy.value 149 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs) 150 | res = self._get_base_res("GET", endpoint, params) 151 | if isinstance(res, dict): 152 | return res['data']['proxy_list'] 153 | return res 154 | 155 | def check_dps_valid(self, proxy=None, sign_type="token", **kwargs): 156 | """检测私密代理有效性, 强制签名验证 157 | :return 返回data部分, 格式为由'proxy: True/False'组成的dict 158 | """ 159 | if not proxy: 160 | raise KdlNameError("miss param: proxy") 161 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str) or isinstance(proxy, unicode)): 162 | raise KdlTypeError("proxy should be a instance of list or tuple or str") 163 | if isinstance(proxy, list) or isinstance(proxy, tuple): 164 | proxy = ','.join(proxy) 165 | endpoint = EndPoint.CheckDpsValid.value 166 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type) 167 | res = self._get_base_res("GET", endpoint, params) 168 | if isinstance(res, dict): 169 | return res['data'] 170 | return res 171 | 172 | def get_ip_balance(self, sign_type="token", timeout=None, max_retries=None): 173 | """获取计数版订单ip余额, 强制签名验证, 174 | 此接口只对按量付费订单和包年包月的集中提取型订单有效 175 | :return 返回data中的balance字段, int类型 176 | """ 177 | endpoint = EndPoint.GetIpBalance.value 178 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries) 179 | res = self._get_base_res("GET", endpoint, params) 180 | if isinstance(res, dict): 181 | return res['data']['balance'] 182 | return res 183 | 184 | def get_kps(self, num=None, sign_type="token", **kwargs): 185 | """获取独享代理, 默认"token"鉴权 186 | :param num: 提取数量, sign_type: 鉴权方式 187 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明 188 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回 189 | """ 190 | if num is None: 191 | raise KdlNameError("miss param: num") 192 | if not isinstance(num, int): 193 | KdlTypeError("num should be a integer") 194 | endpoint = EndPoint.GetKpsProxy.value 195 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs) 196 | res = self._get_base_res("GET", endpoint, params) 197 | if isinstance(res, dict): 198 | return res['data']['proxy_list'] 199 | return res 200 | 201 | def get_proxy(self, num=None, order_level=OpsOrderLevel.NORMAL, sign_type="token", **kwargs): 202 | """获取开放代理, 默认不需要鉴权 203 | :param num: 提取数量, sign_type: 鉴权方式, order_level: 开放代理订单类型 204 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明 205 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回 206 | """ 207 | if num is None: 208 | raise KdlNameError("miss param: num") 209 | if not isinstance(num, int): 210 | KdlTypeError("num should be a integer") 211 | endpoint = EndPoint.GetOpsProxyNormalOrVip.value 212 | if order_level == OpsOrderLevel.SVIP: 213 | endpoint = EndPoint.GetOpsProxySvip.value 214 | if order_level == OpsOrderLevel.PRO: 215 | endpoint = EndPoint.GetOpsProxyEnt.value 216 | 217 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs) 218 | res = self._get_base_res("GET", endpoint, params) 219 | if isinstance(res, dict): 220 | return res['data']['proxy_list'] 221 | return res 222 | 223 | def check_ops_valid(self, proxy=None, sign_type="token", **kwargs): 224 | """检测开放代理有效性, 强制签名验证 225 | :return 返回data部分, 格式为由'proxy: True/False'组成的列表 226 | """ 227 | if not proxy: 228 | raise KdlNameError("miss param: proxy") 229 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str)): 230 | raise KdlTypeError("proxy should be a instance of list or tuple or str") 231 | if isinstance(proxy, list) or isinstance(proxy, tuple): 232 | proxy = ','.join(proxy) 233 | endpoint = EndPoint.CheckOpsValid.value 234 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type) 235 | res = self._get_base_res("GET", endpoint, params) 236 | if isinstance(res, dict): 237 | return res['data'] 238 | return res 239 | 240 | def get_ua(self, num=1, **kwargs): 241 | """获取User Agent 242 | :return 若为json格式, 则返回data中ua_list部分, 即user agent列表, 否则原样返回 243 | """ 244 | endPoint = EndPoint.GetUA.value 245 | params = self._get_params(endPoint, num=num, sign_type="token", **kwargs) 246 | res = self._get_base_res("GET", endPoint, params) 247 | if isinstance(res, dict): 248 | return res['data']["ua_list"] 249 | return res 250 | 251 | def get_area_code(self, area, **kwargs): 252 | """获取指定地区编码 253 | :return: 254 | """ 255 | endpoint = EndPoint.GetAreaCode.value 256 | params = self._get_params(endpoint, area=area, sign_type="token", **kwargs) 257 | res = self._get_base_res("GET", endpoint, params) 258 | if isinstance(res, dict): 259 | return res['data'] 260 | return res 261 | 262 | def get_account_balance(self, **kwargs): 263 | """获取账户余额 264 | :return: 265 | """ 266 | endpoint = EndPoint.GetAccountBalance.value 267 | params = self._get_params(endpoint, sign_type="token", **kwargs) 268 | res = self._get_base_res("GET", endpoint, params) 269 | if isinstance(res, dict): 270 | return res['data'] 271 | return res 272 | 273 | def create_order(self, product, pay_type, **kwargs): 274 | """创建订单,自动从账户余额里结算费用 275 | :return: 276 | """ 277 | if not (product and pay_type): 278 | raise KdlNameError('miss param: product or pay_type') 279 | endpoint = EndPoint.CreateOrder.value 280 | params = self._get_params(endpoint,product=product, pay_type=pay_type, sign_type="hmacsha1", **kwargs) 281 | res = self._get_base_res("GET", endpoint, params) 282 | return res 283 | 284 | def get_order_info(self, **kwargs): 285 | """获取订单的详细信息 286 | :return: 287 | """ 288 | endpoint = EndPoint.GetOrderInfo.value 289 | params = self._get_params(endpoint, sign_type="hmacsha1", **kwargs) 290 | res = self._get_base_res("GET", endpoint, params) 291 | return res 292 | 293 | def set_auto_renew(self, autorenew, **kwargs): 294 | """开启/关闭自动续费 295 | :return: 296 | """ 297 | if not autorenew: 298 | raise KdlNameError('miss param: autorenew') 299 | endpoint = EndPoint.SetAutoRenew.value 300 | params = self._get_params(endpoint, autorenew=autorenew, sign_type="hmacsha1", **kwargs) 301 | res = self._get_base_res("GET", endpoint, params) 302 | return res 303 | 304 | def close_order(self, **kwargs): 305 | """关闭指定订单, 此接口只对按量付费(后付费)订单有效 306 | :return: 307 | """ 308 | endpoint = EndPoint.CloseOrder.value 309 | params = self._get_params(endpoint, sign_type="hmacsha1", **kwargs) 310 | res = self._get_base_res("GET", endpoint, params) 311 | return res 312 | 313 | def query_kps_city(self, serie, **kwargs): 314 | """查询独享代理有哪些城市可供开通。对于IP共享型还可查询到每个城市可开通的IP数量。 315 | :return: 316 | """ 317 | if not serie: 318 | raise KdlNameError('miss params: serie') 319 | endpoint = EndPoint.QueryKpsCity.value 320 | params = self._get_params(endpoint, serie=serie, sign_type="hmacsha1", **kwargs) 321 | res = self._get_base_res("GET", endpoint, params) 322 | return res 323 | 324 | def _get_secret_token(self, timeout=None, max_retries=None): 325 | try: 326 | timeout = timeout or self.timeout 327 | max_retries = max_retries or self.max_retries 328 | self.session.mount('http://', HTTPAdapter(max_retries=max_retries)) 329 | self.session.mount('https://', HTTPAdapter(max_retries=max_retries)) 330 | r = self.session.post(url='https://' + EndPoint.GetSecretToken.value, 331 | data={'secret_id': self.auth.secret_id, 'secret_key': self.auth.secret_key}, 332 | timeout=timeout) 333 | if r.status_code != 200: 334 | raise KdlStatusError(r.status_code, r.content.decode('utf8')) 335 | except requests.exceptions.RequestException as e: 336 | pass # TODO: 重试后失败 处理 337 | raise e 338 | 339 | res = json.loads(r.content.decode('utf8')) 340 | code, msg = res['code'], res['msg'] 341 | if code != 0: 342 | raise KdlException(code, msg) 343 | secret_token = res['data']['secret_token'] 344 | expire = str(res['data']['expire']) 345 | _time = '%.6f' % time.time() 346 | return secret_token, expire, _time 347 | 348 | def _read_secret_token(self): 349 | with open(SECRET_PATH, 'r') as f: 350 | token_info = f.read() 351 | secret_token, expire, _time = token_info.split('|') 352 | if float(_time) + float(expire) - 3 * 60 < time.time(): # 还有3分钟过期时更新 353 | secret_token, expire, _time = self._get_secret_token() 354 | with open(SECRET_PATH, 'w') as f: 355 | f.write(secret_token + '|' + expire + '|' + _time) 356 | return secret_token 357 | 358 | def get_secret_token(self): 359 | if os.path.exists(SECRET_PATH): 360 | secret_token = self._read_secret_token() 361 | else: 362 | secret_token, expire, _time = self._get_secret_token() 363 | with open(SECRET_PATH, 'w') as f: 364 | f.write(secret_token + '|' + expire + '|' + _time) 365 | return secret_token 366 | 367 | def _get_params(self, endpoint, **kwargs): 368 | """构造请求参数""" 369 | params = dict(secret_id=self.auth.secret_id) 370 | params.update(kwargs) 371 | 372 | sign_type = kwargs.get('sign_type', None) 373 | if not sign_type: 374 | return params 375 | 376 | if not self.auth.secret_key: 377 | raise KdlNameError("secret_key is required for signature") 378 | 379 | if sign_type == "hmacsha1": 380 | params['timestamp'] = int(time.time()) 381 | if endpoint == EndPoint.SetIpWhitelist.value: 382 | raw_str = self.auth.get_string_to_sign("POST", endpoint, params.copy()) 383 | else: 384 | raw_str = self.auth.get_string_to_sign("GET", endpoint, params.copy()) 385 | params["signature"] = self.auth.sign_str(raw_str) 386 | elif sign_type == "token": 387 | secret_token = self.get_secret_token() 388 | params['signature'] = secret_token 389 | else: 390 | raise KdlNameError("unknown sign_type {}".format(sign_type)) 391 | 392 | return params 393 | 394 | def _get_base_res(self, method, endpoint, params): 395 | """处理基础请求, 396 | 若响应为json格式则返回请求结果dict 397 | 否则直接返回原格式 398 | """ 399 | try: 400 | r = None 401 | timeout = params.get('timeout', '') or self.timeout 402 | max_retries = params.get('max_retries', '') or self.max_retries 403 | self.session.mount('http://', HTTPAdapter(max_retries=max_retries)) 404 | self.session.mount('https://', HTTPAdapter(max_retries=max_retries)) 405 | self.auth.clear_req_params(params) 406 | 407 | if method == "GET": 408 | r = requests.get("https://" + endpoint, params=params, timeout=timeout) 409 | elif method == "POST": 410 | r = requests.post("https://" + endpoint, data=params, headers={"Content-Type": "application/x-www-form-urlencoded"}, timeout=timeout) 411 | if r.status_code != 200: 412 | raise KdlStatusError(r.status_code, r.content.decode('utf8')) 413 | try: 414 | res = json.loads(r.content.decode('utf8')) 415 | code, msg = res['code'], res['msg'] 416 | if code != 0: 417 | raise KdlException(code, msg) 418 | return res 419 | 420 | except ValueError as e: 421 | # 返回结果不是json格式, 直接返回 422 | if r.content.decode('utf8').strip().startswith("ERROR"): 423 | raise KdlException(-3, r.content) 424 | return r.content.decode('utf8') 425 | except Exception as e: 426 | raise e 427 | -------------------------------------------------------------------------------- /api-sdk/kdl/endpoint.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """枚举各个api的主机+路径 4 | """ 5 | 6 | from enum import Enum, unique 7 | 8 | 9 | @unique 10 | class EndPoint(Enum): 11 | """ 各个api的主机+路径 """ 12 | GetOrderExpireTime = "dev.kdlapi.com/api/getorderexpiretime" 13 | GetIpWhitelist = "dev.kdlapi.com/api/getipwhitelist" # 获取IP白名单 14 | SetIpWhitelist = "dev.kdlapi.com/api/setipwhitelist" # 设置IP白名单 15 | GetKpsProxy = "kps.kdlapi.com/api/getkps" 16 | GetDpsProxy = "dps.kdlapi.com/api/getdps" 17 | GetOpsProxyNormalOrVip = "dev.kdlapi.com/api/getproxy" 18 | GetOpsProxySvip = "svip.kdlapi.com/api/getproxy" 19 | GetOpsProxyEnt = "ent.kdlapi.com/api/getproxy" 20 | CheckDpsValid = "dps.kdlapi.com/api/checkdpsvalid" 21 | CheckOpsValid = "dev.kdlapi.com/api/checkopsvalid" 22 | GetIpBalance = "dps.kdlapi.com/api/getipbalance" 23 | GetDpsValidTime = "dps.kdlapi.com/api/getdpsvalidtime" 24 | TpsCurrentIp = "tps.kdlapi.com/api/tpscurrentip" # 获取当前隧道代理IP 25 | ChangeTpsIp = "tps.kdlapi.com/api/changetpsip" # 更改当前隧道代理IP 26 | GetTps = "tps.kdlapi.com/api/gettps" # 获取隧道代理IP 27 | GetProxyAuthorization = "dev.kdlapi.com/api/getproxyauthorization" # 获取代理鉴权信息 28 | 29 | # 工具接口 30 | GetUA = "www.kuaidaili.com/api/getua" # 获取User Agent 31 | GetAreaCode = "dev.kdlapi.com/api/getareacode" # 获取指定地区编码 32 | GetAccountBalance = "dev.kdlapi.com/api/getaccountbalance" # 获取账户余额 33 | 34 | # 订单相关接口 35 | CreateOrder = "dev.kdlapi.com/api/createorder" # 创建订单 36 | GetOrderInfo = "dev.kdlapi.com/api/getorderinfo" # 获取订单信息 37 | SetAutoRenew = "dev.kdlapi.com/api/setautorenew" # 开启/关闭自动续费 38 | CloseOrder = "dev.kdlapi.com/api/closeorder" # 关闭订单 39 | QueryKpsCity = "dev.kdlapi.com/api/querykpscity" # 查询独享代理城市信息 40 | 41 | GetSecretToken = "auth.kdlapi.com/api/get_secret_token" # 获取token 42 | -------------------------------------------------------------------------------- /api-sdk/kdl/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """快代理自定义异常""" 4 | 5 | import sys 6 | 7 | 8 | class KdlException(Exception): 9 | """异常类""" 10 | 11 | def __init__(self, code=None, message=None): 12 | self.code = code 13 | if sys.version_info[0] < 3 and isinstance(message, unicode): 14 | message = message.encode("utf8") 15 | self.message = message 16 | self._hint_message = "[KdlException] code: {} message: {}".format(self.code, self.message) 17 | 18 | @property 19 | def hint_message(self): 20 | return self._hint_message 21 | 22 | @hint_message.setter 23 | def hint_message(self, value): 24 | self._hint_message = value 25 | 26 | def __str__(self): 27 | if sys.version_info[0] < 3 and isinstance(self.hint_message, unicode): 28 | self.hint_message = self.hint_message.encode("utf8") 29 | return self.hint_message 30 | 31 | 32 | class KdlStatusError(KdlException): 33 | """状态码异常类""" 34 | def __init__(self, code, message): 35 | super(KdlStatusError, self).__init__(code, message) 36 | self.hint_message = "[KdlStatusError] status_code: {}, message: {}".format(self.code, self.message) 37 | 38 | 39 | class KdlNameError(KdlException): 40 | """参数异常类""" 41 | def __init__(self, message, code=-2): 42 | super(KdlNameError, self).__init__(code, message) 43 | self.hint_message = "[KdlNameError] message: {}".format(self.message) 44 | 45 | 46 | class KdlTypeError(KdlException): 47 | """类型异常类""" 48 | def __init__(self, message, code=-1): 49 | super(KdlTypeError, self).__init__(code, message) 50 | self.hint_message = "[KdlTypeError] message: {}".format(self.message) 51 | -------------------------------------------------------------------------------- /api-sdk/kdl/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """枚举开放代理订单级别 4 | """ 5 | 6 | class OpsOrderLevel(object): 7 | """开放代理订单级别""" 8 | NORMAL = "dev" # 普通 9 | VIP = "dev" # vip 10 | SVIP = "svip" # svip 11 | PRO = "ent" # 专业版 12 | -------------------------------------------------------------------------------- /api-sdk/setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | from setuptools import setup 5 | 6 | if sys.version_info < (3,0): 7 | long_description_file = open('README.rst').read() 8 | else: 9 | long_description_file=open('README.rst', encoding='UTF-8').read() 10 | 11 | setup( 12 | name='kdl', 13 | version='0.2.21', 14 | description=( 15 | 'kuaidaili api sdk python, site: https://www.kuaidaili.com' 16 | ), 17 | 18 | long_description=long_description_file, 19 | author='kuaidaili-dev', 20 | author_email='service@kuaidaili.com', 21 | license='BSD License', 22 | packages= [ 23 | 'kdl', 24 | ], 25 | platforms='any', 26 | install_requires=[ 27 | 'requests' 28 | ], 29 | url='https://github.com/kuaidaili/python-sdk/api-sdk', 30 | classifiers=[ 31 | 'Development Status :: 4 - Beta', 32 | 'Operating System :: OS Independent', 33 | 'Intended Audience :: Developers', 34 | 'License :: OSI Approved :: BSD License', 35 | 'Programming Language :: Python', 36 | 'Programming Language :: Python :: Implementation', 37 | 'Programming Language :: Python :: 2', 38 | 'Programming Language :: Python :: 2.7', 39 | 'Programming Language :: Python :: 3', 40 | 'Programming Language :: Python :: 3.4', 41 | 'Programming Language :: Python :: 3.5', 42 | 'Programming Language :: Python :: 3.6', 43 | 'Programming Language :: Python :: 3.7', 44 | 'Topic :: Software Development :: Libraries' 45 | ], 46 | ) 47 | -------------------------------------------------------------------------------- /api-sdk/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """python api sdk单元测试 4 | """ 5 | 6 | from kdl import Auth, Client 7 | 8 | import unittest 9 | 10 | from kdl.exceptions import KdlException 11 | import re 12 | 13 | 14 | secret_id = "" 15 | secret_key = "" 16 | 17 | 18 | 19 | ip_pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$" 20 | ip_port_pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]):\d{2,5}$" 21 | time_pattern = "(((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-(0?[13578]|1[02])-(0?[1-9]|[12]\\d|3[01]))|((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-(0?[13456789]|1[012])-(0?[1-9]|[12]\\d|30))|((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-0?2-(0?[1-9]|1\\d|2[0-8]))|(((1[6-9]|[2-9]\\d)(0[48]|[2468][048]|[13579][26])|((04|08|12|16|[2468][048]|[3579][26])00))-0?2-29)) (20|21|22|23|[0-1]?\\d):[0-5]?\\d:[0-5]?\\d" 22 | 23 | 24 | def is_valid_str(str, pattern): 25 | """判断格式是否正确""" 26 | if str and re.match(pattern, str): 27 | return True 28 | return False 29 | 30 | 31 | def is_valid_ip_list(lis, pattern): 32 | """判断返回的ip列表或者ip加端口列表是否格式正确""" 33 | if not lis: 34 | return True 35 | 36 | for i in lis: 37 | flag = is_valid_str(i, pattern) 38 | if not flag : 39 | return False 40 | return True 41 | 42 | 43 | class TestBase(unittest.TestCase): 44 | """单元测试基类,所有单元测试类从此类继承""" 45 | 46 | name = "单元测试" 47 | 48 | @classmethod 49 | def setUpClass(cls): 50 | print('%s测试开始' % cls.name) 51 | 52 | @classmethod 53 | def tearDownClass(cls): 54 | print('%s测试结束' % cls.name) 55 | 56 | def setUp(self): 57 | self.auth = Auth(secret_id, secret_key) 58 | self.client = Client(self.auth, timeout=(5, 6), max_retries=3) 59 | 60 | def test_get_expire_time(self): 61 | """ 获取订单过期时间 """ 62 | expire_time = self.client.get_order_expire_time(sign_type='hmacsha1') 63 | # assert isinstance(expire_time, unicode) or isinstance(expire_time, str) 64 | print(expire_time) 65 | assert isinstance(expire_time, str) and is_valid_str(expire_time,time_pattern) 66 | 67 | 68 | class TestBase2(TestBase): 69 | """具有获取IP白名单api和设置IP白名单的api,获取鉴权信息api的类, 70 | 目前只有私密代理,独享代理,隧道代理""" 71 | 72 | def test_get_ip_whitelist(self): 73 | """ 获取ip白名单 """ 74 | ip_whitelist = self.client.get_ip_whitelist() 75 | print(ip_whitelist) 76 | assert isinstance(ip_whitelist, list) and is_valid_ip_list(ip_whitelist,ip_pattern) 77 | 78 | def test_set_ip_whitelist(self): 79 | """ 设置ip白名单 """ 80 | self.client.set_ip_whitelist([]) 81 | ip_whitelist = self.client.get_ip_whitelist() 82 | assert len(ip_whitelist) == 0 83 | set_ip_list = ["171.113.144.44", "171.113.244.41"] 84 | self.client.set_ip_whitelist(set_ip_list) 85 | ip_whitelist = self.client.get_ip_whitelist() 86 | set_ip_list.reverse() 87 | assert len(ip_whitelist) == 2 and isinstance(ip_whitelist, list) and is_valid_ip_list(ip_whitelist,ip_pattern) and ip_whitelist == set_ip_list 88 | self.client.set_ip_whitelist([]) 89 | 90 | def test_get_proxy_authorization(self): 91 | data = self.client.get_proxy_authorization(plain_text=1, sign_type='token') 92 | assert isinstance(data, dict) 93 | print(data) 94 | 95 | 96 | class TestDpsOrder(TestBase2): 97 | """ 私密代理 """ 98 | 99 | name = "私密代理测试" 100 | 101 | def test_get_proxy(self): 102 | """ 获取私密代理 """ 103 | ips = self.client.get_dps(2, sign_type='hmacsha1', format='text', area='云南,广东', pt=2, f_citycode=1, ) 104 | # ips = self.client.get_dps(2, format='text') 105 | print(ips) 106 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern) 107 | 108 | def test_check_dps_valid(self): 109 | """检测是否有效""" 110 | ips = self.client.get_dps(2, format='json', area='北京,上海') 111 | print(ips) 112 | is_valid = self.client.check_dps_valid(ips) 113 | assert isinstance(is_valid, dict) 114 | 115 | def test_get_ip_balance(self): 116 | """检测还剩多少ip地址可以提取""" 117 | balance = self.client.get_ip_balance() 118 | assert isinstance(balance, int) 119 | 120 | def test_get_dps_valid_time(self): 121 | ips = self.client.get_dps(5, format='json', sign_type="hmacsha1") 122 | print("ips: ", ips) 123 | seconds = self.client.get_dps_valid_time(ips, sign_type="hmacsha1") 124 | print("seconds: ", seconds) 125 | assert isinstance(seconds, dict) 126 | 127 | def test_get_secret_token(self): 128 | secret_token = self.client.get_secret_token() 129 | print(secret_token) 130 | 131 | 132 | class TestKpsOrder(TestBase): 133 | """ 独享代理 """ 134 | name = '独享代理' 135 | 136 | def test_get_proxy(self): 137 | """ 获取私密代理 """ 138 | ips = self.client.get_kps(2, sign_type='token', format='json', area='云南,广东', pt=2, f_citycode=1) 139 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern) 140 | 141 | def test_check_dps_valid(self): 142 | """检测是否有效""" 143 | ips = self.client.get_kps(2, format='json', area='北京', pt=2) 144 | with self.assertRaises(KdlException): 145 | self.client.check_dps_valid(ips) 146 | 147 | def test_get_ip_balance(self): 148 | """检测还剩多少ip地址可以提取""" 149 | with self.assertRaises(KdlException): 150 | self.client.get_ip_balance() 151 | 152 | class TestOpsOrder(TestBase): 153 | """ 开放代理 """ 154 | 155 | name = '开放代理' 156 | 157 | def test_get_ip_whitelist(self): 158 | """ 获取ip白名单 """ 159 | with self.assertRaises(KdlException): 160 | self.client.get_ip_whitelist() 161 | 162 | def test_get_proxy(self): 163 | """ 获取私密代理 """ 164 | ips = self.client.get_proxy(2, order_level='vip', sign_type='hmacsha1', format='json', area='云南,广东', pt=2, 165 | f_citycode=1) 166 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern) 167 | 168 | def test_check_ops_valid(self): 169 | """检测是否有效""" 170 | ips = self.client.get_proxy(2, format='json', area='北京', pt=2) 171 | is_valid = self.client.check_ops_valid(ips) 172 | assert isinstance(is_valid, dict) 173 | 174 | def test_get_ip_balance(self): 175 | """检测还剩多少ip地址可以提取""" 176 | with self.assertRaises(KdlException): 177 | self.client.get_ip_balance() 178 | 179 | class TestTpsOrder(TestBase2): 180 | name = "隧道代理" 181 | 182 | def test_get_tps_ip(self): 183 | """获取当前隧道ip""" 184 | current_ip = self.client.tps_current_ip(sign_type='hmacsha1') 185 | assert len(current_ip) == 0 or (len(current_ip.split('.')) == 4 and is_valid_str(current_ip,ip_pattern)) 186 | 187 | def test_change_tcp_ip(self): 188 | """立即改变隧道ip""" 189 | new_ip = self.client.change_tps_ip() 190 | assert len(new_ip.split('.')) == 4 and is_valid_str(new_ip,ip_pattern) 191 | 192 | def test_get_tps(self): 193 | tps_list = self.client.get_tps(2,sign_type='hmacsha1', format='json') 194 | assert isinstance(tps_list, list) 195 | 196 | 197 | 198 | class TestExpiredKpsOrder(unittest.TestCase): 199 | """ 过期订单 """ 200 | 201 | name = "过期订单" 202 | 203 | def test_get_expire_time(self): 204 | with self.assertRaises(KdlException): 205 | self.client.get_order_expire_time() 206 | 207 | def test_get_ip_whitelist(self): 208 | with self.assertRaises(KdlException): 209 | self.client.get_ip_whitelist() 210 | 211 | def test_set_ip_whitelist(self): 212 | with self.assertRaises(KdlException): 213 | self.client.set_ip_whitelist("127.0.0.1") 214 | 215 | def test_get_proxy(self): 216 | with self.assertRaises(KdlException): 217 | self.client.get_kps(1) 218 | 219 | 220 | class TestNoApiKeyOrder(unittest.TestCase): 221 | """ 不提供apiKey,仅能成功调用不需要signature的api """ 222 | name = "不提供apiKey,仅能成功调用不需要signature的api " 223 | 224 | def test_get_ip_whitelist(self): 225 | """ 获取ip白名单 """ 226 | with self.assertRaises(NameError): 227 | self.client.get_ip_whitelist() 228 | 229 | def test_get_proxy(self): 230 | """ 获取私密代理 """ 231 | with self.assertRaises(NameError): 232 | self.client.get_proxy(2, order_level='vip', sign_type='hmacsha1', format='json', area='云南,广东', pt=2, 233 | f_citycode=1) 234 | with self.assertRaises(NameError): 235 | self.client.get_proxy(2, order_level='vip', sign_type='simple', format='json', area='云南,广东', pt=2, 236 | f_citycode=1) 237 | ips = self.client.get_proxy(2, order_level='vip', format='json', area='云南,广东', pt=2, 238 | f_citycode=1) 239 | 240 | assert isinstance(ips, list) and is_valid_ip_list(ips,ip_port_pattern) 241 | 242 | 243 | def test_check_ops_valid(self): 244 | ips = self.client.get_proxy(2, format='json', area='北京', pt=2) 245 | with self.assertRaises(NameError): 246 | is_valid = self.client.check_ops_valid(ips) 247 | assert isinstance(is_valid, dict) 248 | 249 | def test_get_ip_balance(self): 250 | with self.assertRaises(NameError): 251 | self.client.get_ip_balance() 252 | 253 | 254 | if __name__ == '__main__': 255 | suite = unittest.TestLoader().loadTestsFromTestCase(TestTpsOrder) 256 | unittest.TextTestRunner(verbosity=2).run(suite) 257 | 258 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # 快代理SDK - Python 2 | 3 | * [调用API](#调用api) 4 | * [python/api_urllib2.py](#pythonapi_urllib2py) 5 | * [python/api_urllib.py](#pythonapi_urllibpy) 6 | * [python/api_requests.py](#pythonapi_requestspy) 7 | * [Http代理-Python2部分:](#http代理-python2部分) 8 | * [python/proxy_urllib2.py](#pythonproxy_urllib2py) 9 | * [python/proxy_requests.py](#pythonproxy_requestspy) 10 | * [python/selenium_chrome_http.py](#pythonselenium_chrome_httppy) 11 | * [python/selenium_phantomjs_http.py](#pythonselenium_phantomjs_httppy) 12 | * [python/selenium_phantomjs_http_auth.py](#pythonselenium_phantomjs_http_authpy) 13 | * [Http代理-Python3部分:](#http代理-python3部分) 14 | * [python3/http_urllib.py](#python3http_urllibpy) 15 | * [python3/http_requests.py](#python3http_requestspy) 16 | * [Http代理-Scrapy部分:](#http代理-scrapy部分) 17 | * [scrapy/scrapy_proxy/scrapy_proxy/middlewares.py](#scrapyscrapy_proxyscrapy_proxymiddlewarespy) 18 | * [scrapy/scrapy_proxy/scrapy_proxy/settings.py](#scrapyscrapy_proxyscrapy_proxysettingspy) 19 | * [scrapy/scrapy_proxy/scrapy_proxy/spiders/main.py](#scrapyscrapy_proxyscrapy_proxyspidersmainpy) 20 | * [Socks代理-Python2部分:](#socks代理-python2部分) 21 | * [python/socks_requests.py](#pythonsocks_requestspy) 22 | * [python/socks_urllib2.py](#pythonsocks_urllib2py) 23 | * [python/selenium_chrome_sock5.py](#pythonselenium_chrome_sock5py) 24 | * [python/selenium_phantomjs_sock5.py](#pythonselenium_phantomjs_sock5py) 25 | * [python/selenium_phantomjs_sock5_auth.py](#pythonselenium_phantomjs_sock5_authpy) 26 | * [Socks代理-Python3部分:](#socks代理-python3部分) 27 | * [python3/proxy_requests_socks.py](#python3proxy_requests_sockspy) 28 | * [python3/proxy_urllib.py](#python3proxy_urllibpy) 29 | * [隧道代理-Python2部分:](#隧道代理-Python2部分) 30 | * [python/tps_proxy_urllib2.py](#pythontps_proxy_urllib2py) 31 | * [python/tps_proxy_request.py](#pythontps_proxy_requestpy) 32 | * [隧道代理-python3部分](#隧道代理-python3部分) 33 | * [python3/tps_proxy_request.py](#python3tps_proxy_requestpy) 34 | * [python3/tps_proxy_urllib.py](#python3tps_proxy_urllibpy) 35 | * [隧道代理-Scrapy部分](#隧道代理-Scrapy部分) 36 | * [scrapy_proxy/scrapy_proxy/middlewares.py](#scrapy_proxyscrapy_proxymiddlewarespy) 37 | * [scrapy_proxy/scrapy_proxy/settings.py](#scrapy_proxyscrapy_proxysettingspy) 38 | * [scrapy_proxy/scrapy_proxy/spiders/main.py](#scrapy_proxyscrapy_proxyspidersmainpy) 39 | 40 | * [技术支持](#技术支持) 41 | 42 | 43 | ## 调用API 44 | 45 | ### python/api_urllib2.py 46 | 使用urllib2调用api示例 47 | ``` 48 | 使用提示: 运行环境要求 python2.6/2.7 49 | ``` 50 | 51 | ### python/api_urllib.py 52 | 使用urllib调用api示例 53 | ``` 54 | 使用提示: 运行环境要求 python3.x 55 | ``` 56 | 57 | ### python/api_requests.py 58 | 使用requests库调用api示例 59 | ``` 60 | 使用提示: 61 | * 此样例支持 python 2.6—2.7以及3.3—3.7 62 | * requests不是python原生库,需要安装才能使用: pip install requests 63 | ``` 64 | 65 | ## Http代理-Python2部分: 66 | 67 | ### python/proxy_urllib2.py 68 | 使用urllib2请求Http代理服务器, 支持访问http和https网页, 推荐使用 69 | ``` 70 | 使用提示: 运行环境要求 python2.6/2.7 71 | ``` 72 | 73 | ### python/proxy_requests.py 74 | 使用requests请求Http代理服务器, 支持使用白名单访问http和https网页, 使用用户名密码不支持访问https网页 75 | ``` 76 | 使用提示: requests不是python原生库, 需要安装才能使用: pip install requests 77 | ``` 78 | 79 | ### python/selenium_chrome_http.py 80 | 以`白名单`认证形式使用selenium库和Chrome驱动请求Http代理服务器 81 | ``` 82 | 使用提示: 83 | * 基于白名单的http/https代理Chrome 84 | * 运行环境要求`python2.x + selenium + Chrome + Chromedriver + xvfb` 85 | * 安装xvfb:`pip install xvfbwrapper` 86 | * Ubuntu下开发环境配置参考: https://christopher.su/2015/selenium-chromedriver-ubuntu/ 87 | ``` 88 | 89 | ### python/selenium_phantomjs_http.py 90 | 以`白名单`认证形式使用selenium库和PhantomJS驱动请求Http代理服务器 91 | ``` 92 | 使用提示: 93 | * 基于白名单的http/https代理PhantomJS 94 | * 运行环境要求`python2.x + selenium + PhantomJS` 95 | * `selenium + PhantomJS` 可以直接使用pip安装 96 | ``` 97 | 98 | ### python/selenium_phantomjs_http_auth.py 99 | 以`用户名密码`认证形式使用selenium库和PhantomJS驱动请求Http代理服务器 100 | ``` 101 | 使用提示: 102 | * 基于密码认证的http/https代理PhantomJS 103 | * 运行环境要求`python2.x + selenium + PhantomJS` 104 | * `selenium + PhantomJS` 可以直接使用pip安装 105 | ``` 106 | 107 | ## Http代理-Python3部分: 108 | 109 | ### python3/http_urllib.py 110 | 使用`urllib`库请求Http代理服务器, 支持访问http和https网页 111 | ``` 112 | 使用提示: 113 | * 基于urllib的代码样例同时支持访问http和https网页,推荐使用 114 | * 运行环境要求 python3.x 115 | ``` 116 | 117 | ### python3/http_requests.py 118 | 使用`requests`库请求Http代理服务器, 支持使用白名单访问http,https网页, 使用用户名密码不支持访问https网页 119 | ``` 120 | 使用提示: 121 | * 基于requests的代码样例支持使用白名单访问http,https网页,使用用户名密码不支持访问https网页 122 | * requests不是python原生库,需要安装才能使用: pip install requests 123 | ``` 124 | 125 | ## Http代理-Scrapy部分: 126 | scrapy项目标准目录结构如下: 127 | 128 | ![scrapy项目结构](https://help.kuaidaili.com/dev/img/scrapy.jpg) 129 | 130 | ### scrapy/scrapy_proxy/scrapy_proxy/middlewares.py 131 | 设置代理 132 | 133 | ### scrapy/scrapy_proxy/scrapy_proxy/settings.py 134 | 使代理生效 135 | 136 | ### scrapy/scrapy_proxy/scrapy_proxy/spiders/main.py 137 | 使用代理 138 | ``` 139 | 使用提示: 140 | * http/https网页均可适用 141 | * scrapy不是python原生库,需要安装才能使用: pip install scrapy 142 | * 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl main 143 | ``` 144 | 145 | ## Socks代理-Python2部分: 146 | 147 | ### python/socks_requests.py 148 | 使用`requests`库请求Socks代理服务器 149 | ``` 150 | 使用提示: 151 | * http/https网页均可适用 152 | * 运行环境要求: requests >= 2.10.0 153 | * socks支持是`requests`的额外特性,需要安装才能使用: pip install requests[socks] 154 | ``` 155 | 156 | ### python/socks_urllib2.py 157 | 使用`urllib2`库请求Socks代理服务器 158 | ``` 159 | 使用提示: 160 | * 运行环境要求 python2.6 / 2.7 161 | * http/https网页均可适用 162 | * 使用此样例需要安装PySocks:pip install PySocks 163 | ``` 164 | 165 | ### python/selenium_chrome_sock5.py 166 | 以`白名单`认证形式使用selenium库和Chrome驱动请求Socks代理服务器 167 | ``` 168 | 使用提示: 169 | * 运行环境要求 python2.x + selenium + chrome + chrome driver + xvfb 170 | * socks5代理网页均可适用 171 | * 安装xvfb:pip install xvfbwrapper 172 | * 开发环境配置参考: https://christopher.su/2015/selenium-chromedriver-ubuntu/ 173 | ``` 174 | 175 | ### python/selenium_phantomjs_sock5.py 176 | 以`白名单`认证形式使用selenium库和PhantomJS驱动请求Socks代理服务器 177 | ``` 178 | 使用提示: 179 | * 运行环境要求: python2.x 180 | * socks5代理网页均可适用 181 | * 使用此样例需要安装 selenium、PhantomJS 182 | * PhantomJS 可以直接使用pip安装 183 | ``` 184 | 185 | ### python/selenium_phantomjs_sock5_auth.py 186 | 以`用户名密码`认证形式使用selenium库和PhantomJS驱动请求Socks代理服务器 187 | ``` 188 | 使用提示: 189 | * 运行环境要求 python2.x 190 | * socks5代理http/https网页均可适用 191 | * 使用此样例需要安装 selenium、PhantomJS 192 | * PhantomJS 可以直接使用pip安装 193 | ``` 194 | 195 | ## Socks代理-Python3部分: 196 | 197 | ### python3/proxy_requests_socks.py 198 | 使用`requests`库请求Socks代理服务器, http/https网页均适用 199 | ``` 200 | 使用提示: 201 | * http/https网页均可适用 202 | * 运行环境要求:requests >= 2.10.0 203 | * socks支持是requests的额外特性,需要安装才能使用: pip install requests[socks] 204 | ``` 205 | 206 | ### python3/proxy_urllib.py 207 | 使用`urllib`库请求Socks代理服务器, http/https网页均适用 208 | ``` 209 | 使用提示: 210 | * http/https网页均可适用 211 | * 请先安装socks: pip install pysocks 212 | ``` 213 | 214 | ## 隧道代理-Python2部分 215 | ### python/tps_proxy_urllib2.py 216 | 使用urllib2请求隧道代理服务器, 支持访问http和https网页, 推荐使用 217 | ``` 218 | 使用提示: 运行环境要求 python2.6/2.7 219 | ``` 220 | 221 | ### python/tps_proxy_request.py 222 | 使用requests请求隧道代理服务器, 支持访问http和https网页。推荐使用 223 | ``` 224 | 使用提示: 225 | * 基于requests的代码样例支持支持访问http和https网页。推荐使用 226 | * requests不是python原生库,需要安装才能使用: pip install requests 227 | ``` 228 | 229 | ## 隧道代理-python3部分 230 | ### python3/tps_proxy_request.py 231 | 使用requests请求隧道代理服务器, 支持访问http和https网页。推荐使用 232 | ``` 233 | 使用提示: 234 | * 基于requests的代码样例支持支持访问http和https网页。推荐使用 235 | * 236 | ``` 237 | 238 | ### python3/tps_proxy_urllib.py 239 | 使用urllib2请求隧道代理服务器, 支持访问http和https网页, 推荐使用 240 | ``` 241 | 使用提示: 运行环境要求 python3 242 | ``` 243 | 244 | ## 隧道代理-Scrapy部分: 245 | scrapy项目标准目录结构如下: 246 | 247 | ![scrapy项目结构](https://help.kuaidaili.com/dev/img/scrapy.jpg) 248 | 249 | ### scrapy_proxy/scrapy_proxy/middlewares.py 250 | 设置代理 251 | 252 | ### scrapy_proxy/scrapy_proxy/settings.py 253 | 使代理生效 254 | 255 | ### scrapy_proxy/scrapy_proxy/spiders/main.py 256 | 使用代理 257 | 258 | ``` 259 | 使用提示: 260 | * http/https网页均可适用 261 | * scrapy不是python原生库,需要安装才能使用: pip install scrapy 262 | * 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl main 263 | ``` 264 | 265 | 266 | 267 | 268 | ## 技术支持 269 | 270 | 如果您发现代码有任何问题, 请提交`Issue`。 271 | 272 | 欢迎提交`Pull request`以使代码样例更加完善。 273 | 274 | 获取更多关于调用API和代理服务器使用的资料,请参考[开发者指南](https://help.kuaidaili.com/dev/api/)。 275 | 276 | * 技术支持微信:kuaidaili 277 | * 技术支持QQ:800849628 278 | -------------------------------------------------------------------------------- /examples/api/py2_urllib2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | #-*- coding: utf-8 -*- 3 | 4 | """使用urllib2调用API接口 5 | """ 6 | 7 | import urllib2 8 | import zlib 9 | 10 | #api链接 11 | api_url = "http://dev.kdlapi.com/api/getproxy/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=100&protocol=1&method=2&an_ha=1&sep=1" 12 | 13 | req = urllib2.Request(api_url) 14 | req.add_header("Accept-Encoding", "Gzip") #使用gzip压缩传输数据让访问更快 15 | r = urllib2.urlopen(req) 16 | 17 | print r.code #获取Reponse的返回码 18 | content_encoding = r.headers.getheader("Content-Encoding") 19 | if content_encoding and "gzip" in content_encoding: 20 | print zlib.decompress(r.read(), 16+zlib.MAX_WBITS) #获取页面内容 21 | else: 22 | print r.read() #获取页面内容 -------------------------------------------------------------------------------- /examples/api/py3_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """使用requests请求代理服务器 5 | 请求http和https网页均适用 6 | """ 7 | 8 | import requests 9 | import random 10 | 11 | page_url = "http://dev.kdlapi.com/testproxy" # 要访问的目标网页 12 | # API接口,返回格式为json 13 | api_url = "http://dps.kdlapi.com/api/getdps?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&format=json&sep=1" 14 | 15 | # API接口返回的ip 16 | proxy_ip = requests.get(api_url).json()['data']['proxy_list'] 17 | 18 | # 用户名密码认证(私密代理/独享代理) 19 | username = "username" 20 | password = "password" 21 | 22 | proxies = { 23 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)}, 24 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)} 25 | } 26 | headers = { 27 | "Accept-Encoding": "Gzip", # 使用gzip压缩传输数据让访问更快 28 | } 29 | r = requests.get(page_url, proxies=proxies, headers=headers) 30 | print(r.status_code) # 获取Response的返回码 31 | 32 | if r.status_code == 200: 33 | r.enconding = "utf-8" # 设置返回内容的编码 34 | print(r.content) # 获取页面内容 35 | -------------------------------------------------------------------------------- /examples/api/py3_urllib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """使用urllib.request调用API接口(在python3中urllib2被改为urllib.request) 5 | """ 6 | 7 | import urllib.request 8 | import zlib 9 | 10 | #api链接 11 | api_url = "http://dev.kdlapi.com/api/getproxy/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=100&protocol=1&method=2&an_ha=1&sep=1" 12 | 13 | headers = {"Accept-Encoding": "Gzip"} #使用gzip压缩传输数据让访问更快 14 | 15 | req = urllib.request.Request(url=api_url, headers=headers) 16 | 17 | # 请求api链接 18 | res = urllib.request.urlopen(req) 19 | 20 | print(res.code) # 获取Reponse的返回码 21 | content_encoding = res.headers.get('Content-Encoding') 22 | if content_encoding and "gzip" in content_encoding: 23 | print(zlib.decompress(res.read(), 16 + zlib.MAX_WBITS).decode('utf-8')) #获取页面内容 24 | else: 25 | print(res.read().decode('utf-8')) #获取页面内容 -------------------------------------------------------------------------------- /examples/http_proxy/phantomjs_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | #先下载phantomjs包文件,再填入phantomjs.exe的路径 (路径不要包含中文, 下载地址:https://mirrors.huaweicloud.com/phantomjs/) 8 | executable_path = '${executable_path}' 9 | service_args=[ 10 | '--proxy=host:port', #此处替换您的代理ip,如59.38.241.25:23918 11 | '--proxy-type=http', 12 | '--proxy-auth=username:password' #用户名密码 13 | ] 14 | driver=webdriver.PhantomJS(service_args=service_args,executable_path=executable_path) 15 | driver.get('https://dev.kdlapi.com/testproxy') 16 | 17 | print(driver.page_source) 18 | time.sleep(3) 19 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy/proxy_pool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | 5 | import time 6 | import random 7 | import threading 8 | 9 | import requests 10 | 11 | 12 | class ProxyPool(): 13 | 14 | def __init__(self, secret_id, secret_token, proxy_count): 15 | self.secret_id = secret_id 16 | self.signature = secret_token 17 | self.proxy_count = proxy_count if proxy_count < 50 else 50 # 池子维护的IP总数,建议一般不要超过50 18 | self.alive_proxy_list = [] # 活跃IP列表 19 | 20 | def _fetch_proxy_list(self, count): 21 | """调用快代理API获取代理IP列表""" 22 | try: 23 | res = requests.get("http://dps.kdlapi.com/api/getdps/?secret_id=%s&signature=%s&num=%s&pt=1&sep=1&f_et=1&format=json" % (self.secret_id, self.signature, count)) 24 | return [proxy.split(',') for proxy in res.json().get('data').get('proxy_list')] 25 | except: 26 | print("API获取IP异常,请检查订单") 27 | return [] 28 | 29 | def _init_proxy(self): 30 | """初始化IP池""" 31 | self.alive_proxy_list = self._fetch_proxy_list(self.proxy_count) 32 | 33 | def add_alive_proxy(self, add_count): 34 | """导入新的IP, 参数为新增IP数""" 35 | self.alive_proxy_list.extend(self._fetch_proxy_list(add_count)) 36 | 37 | def get_proxy(self): 38 | """从IP池中获取IP""" 39 | return random.choice(self.alive_proxy_list)[0] if self.alive_proxy_list else "" 40 | 41 | def run(self): 42 | sleep_seconds = 1 43 | self._init_proxy() 44 | while True: 45 | for proxy in self.alive_proxy_list: 46 | proxy[1] = float(proxy[1]) - sleep_seconds # proxy[1]代表此IP的剩余可用时间 47 | if proxy[1] <= 3: 48 | self.alive_proxy_list.remove(proxy) # IP还剩3s时丢弃此IP 49 | if len(self.alive_proxy_list) < self.proxy_count: 50 | self.add_alive_proxy(self.proxy_count - len(self.alive_proxy_list)) 51 | time.sleep(sleep_seconds) 52 | 53 | def start(self): 54 | """开启子线程更新IP池""" 55 | t = threading.Thread(target=self.run) 56 | t.setDaemon(True) # 将子线程设为守护进程,主线程不会等待子线程结束,主线程结束子线程立刻结束 57 | t.start() 58 | 59 | 60 | def parse_url(proxy): 61 | # 用户名密码认证(私密代理/独享代理) 62 | username = "username" 63 | password = "password" 64 | proxies = { 65 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password,"proxy": proxy}, 66 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password,"proxy": proxy} 67 | } 68 | 69 | # 白名单方式(需提前设置白名单) 70 | # proxies = { 71 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 72 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip} 73 | # } 74 | 75 | # 要访问的目标网页 76 | target_url = "https://dev.kdlapi.com/testproxy" 77 | # 使用代理IP发送请求 78 | response = requests.get(target_url, proxies=proxies) 79 | # 获取页面内容 80 | if response.status_code == 200: 81 | print(response.text) 82 | 83 | 84 | if __name__ == '__main__': 85 | proxy_pool = ProxyPool('o1fjh1re9o28876h7c08', 'xxxxxx', 30) # 订单SecretId, 签名(secret_token), 池子中维护的IP数 86 | proxy_pool.start() 87 | time.sleep(1) # 等待IP池初始化 88 | 89 | proxy = proxy_pool.get_proxy() # 从IP池中提取IP 90 | if proxy: 91 | parse_url(proxy) -------------------------------------------------------------------------------- /examples/http_proxy/py2_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 提取代理API接口,获取1个代理IP 12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 13 | 14 | # 获取API接口返回的代理IP 15 | proxy_ip = requests.get(api_url).text 16 | 17 | # 用户名密码认证(私密代理/独享代理) 18 | username = "username" 19 | password = "password" 20 | proxies = { 21 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 22 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 23 | } 24 | 25 | # 白名单方式(需提前设置白名单) 26 | # proxies = { 27 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 28 | # "https": "https://%(proxy)s/" % {"proxy": proxy_ip} 29 | # } 30 | 31 | # 要访问的目标网页 32 | target_url = "https://dev.kdlapi.com/testproxy" 33 | 34 | # 使用代理IP发送请求 35 | response = requests.get(target_url, proxies=proxies) 36 | 37 | # 获取页面内容 38 | if response.status_code == 200: 39 | print response.text -------------------------------------------------------------------------------- /examples/http_proxy/py2_urllib2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用urllib2请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import urllib2 10 | import ssl 11 | 12 | # 全局取消证书验证,避免访问https网页报错 13 | ssl._create_default_https_context = ssl._create_unverified_context 14 | 15 | # 提取代理API接口,获取1个代理IP 16 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 17 | 18 | # 获取API接口返回的IP 19 | proxy_ip = urllib2.urlopen(api_url).read() 20 | 21 | # 用户名密码认证(私密代理/独享代理) 22 | username = "username" 23 | password = "password" 24 | proxies = { 25 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 26 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 27 | } 28 | 29 | # 白名单方式(需提前设置白名单) 30 | # proxies = { 31 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 32 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip} 33 | # } 34 | 35 | # 要访问的目标网页 36 | target_url = "https://dev.kdlapi.com/testproxy" 37 | 38 | # 使用代理IP发送请求 39 | proxy_support = urllib2.ProxyHandler(proxies) 40 | opener = urllib2.build_opener(proxy_support) 41 | urllib2.install_opener(opener) 42 | response = urllib2.urlopen(target_url) 43 | 44 | # 获取页面内容 45 | if response.code == 200: 46 | print response.read() -------------------------------------------------------------------------------- /examples/http_proxy/py3_aiohttp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用aiohttp请求代理服务器 6 | 请求http和https网页均适用 7 | 8 | """ 9 | import random 10 | import asyncio 11 | 12 | 13 | import aiohttp 14 | import requests 15 | 16 | page_url = "http://icanhazip.com/" # 要访问的目标网页 17 | 18 | # API接口,返回格式为json 19 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=5&pt=1&format=json&sep=1" # API接口 20 | 21 | # API接口返回的proxy_list 22 | proxy_list = requests.get(api_url).json().get('data').get('proxy_list') 23 | 24 | # 用户名密码认证(私密代理/独享代理) 25 | username = "username" 26 | password = "password" 27 | 28 | proxy_auth = aiohttp.BasicAuth(username, password) 29 | 30 | 31 | async def fetch(url): 32 | async with aiohttp.ClientSession() as session: 33 | async with session.get(url, proxy="http://" + random.choice(proxy_list), proxy_auth=proxy_auth) as resp: 34 | content = await resp.read() 35 | print(f"status_code: {resp.status}, content: {content}") 36 | 37 | 38 | def run(): 39 | loop = asyncio.get_event_loop() 40 | # 异步发出5次请求 41 | tasks = [fetch(page_url) for _ in range(5)] 42 | loop.run_until_complete(asyncio.wait(tasks)) 43 | 44 | 45 | if __name__ == '__main__': 46 | run() -------------------------------------------------------------------------------- /examples/http_proxy/py3_feapder.py: -------------------------------------------------------------------------------- 1 | import feapder 2 | 3 | 4 | class Py3Feapder(feapder.AirSpider): 5 | def start_requests(self): 6 | yield feapder.Request("https://dev.kdlapi.com/testproxy") 7 | 8 | def download_midware(self, request): 9 | # 提取代理API接口,获取1个代理IP 10 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 11 | 12 | # 获取API接口返回的代理IP 13 | proxy_ip = feapder.Request(api_url).get_response().text 14 | 15 | # 用户名密码认证(私密代理/独享代理) 16 | username = "username" 17 | password = "password" 18 | proxies = { 19 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 20 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 21 | } 22 | 23 | # 白名单认证(需提前设置白名单) 24 | # proxies = { 25 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 26 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip} 27 | # } 28 | 29 | request.proxies = proxies 30 | return request 31 | 32 | def parse(self, request, response): 33 | print(response.text) 34 | 35 | 36 | if __name__ == "__main__": 37 | Py3Feapder().start() 38 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_httpx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import random 10 | import asyncio 11 | 12 | import httpx 13 | import requests 14 | 15 | page_url = "http://icanhazip.com/" # 要访问的目标网页 16 | 17 | # API接口,返回格式为json 18 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&pt=1&format=json&sep=1" # API接口 19 | 20 | # API接口返回的proxy_list 21 | proxy_list = requests.get(api_url).json().get('data').get('proxy_list') 22 | 23 | # 用户名密码认证(私密代理/独享代理) 24 | username = "username" 25 | password = "password" 26 | 27 | 28 | async def fetch(url): 29 | proxies = { 30 | "http": f"http://{username}:{password}@{random.choice(proxy_list)}", 31 | "https": f"http://{username}:{password}@{random.choice(proxy_list)}", 32 | } 33 | async with httpx.AsyncClient(proxies=proxies, timeout=10) as client: 34 | resp = await client.get(url) 35 | print(f"status_code: {resp.status_code}, content: {resp.content}") 36 | 37 | 38 | def run(): 39 | loop = asyncio.get_event_loop() 40 | # 异步发出5次请求 41 | tasks = [fetch(page_url) for _ in range(5)] 42 | loop.run_until_complete(asyncio.wait(tasks)) 43 | 44 | 45 | if __name__ == '__main__': 46 | run() -------------------------------------------------------------------------------- /examples/http_proxy/py3_playwright.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | """ 4 | 使用requests请求代理服务器 5 | 使用Playwright添加IP代理 6 | """ 7 | import requests 8 | from playwright.sync_api import sync_playwright 9 | 10 | 11 | # 通过API获取代理 12 | def get_proxy(api, params): 13 | r = requests.get(api, params=params) 14 | if r.status_code == 200: 15 | return r.text 16 | else: 17 | return None 18 | 19 | 20 | # 使用Playwright添加私密代理 21 | def playwright_use_proxy(proxy_server): 22 | if not proxy_server: 23 | print('获取代理失败') 24 | return 25 | with sync_playwright() as p: 26 | browser = p.chromium.launch(proxy={"server": f'http://{proxy_server}'}) 27 | page = browser.new_page() 28 | page.goto("https://dev.kdlapi.com/testproxy") 29 | content = page.content() 30 | browser.close() 31 | return content 32 | 33 | 34 | def main(): 35 | # 定义API配置 36 | params = { 37 | 'num': 1, 38 | 'pt': 1, 39 | 'sep': 1, 40 | 'secret_id': 'your secret_id', 41 | 'signature': 'yoru signature', 42 | } 43 | api = 'https://dps.kdlapi.com/api/getdps/' 44 | proxy = get_proxy(api, params) 45 | content = playwright_use_proxy(proxy) 46 | print(content) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_pyppeteer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | """ 5 | 请求http和https网页均适用 6 | """ 7 | 8 | import asyncio 9 | 10 | import requests 11 | from pyppeteer import launch 12 | 13 | # 提取代理API接口,获取1个代理IP 14 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 15 | # 获取API接口返回的代理IP 16 | proxy_ip = requests.get(api_url).text 17 | proxy = "http://" + proxy_ip 18 | 19 | 20 | def accounts(): 21 | # 用户名密码认证(私密代理/独享代理) 22 | username = "username" 23 | password = "password" 24 | account = {"username": username, "password": password} 25 | return account 26 | 27 | 28 | async def main(): 29 | # 要访问的目标网页 30 | target_url = "https://dev.kdlapi.com/testproxy" 31 | 32 | browser = await launch({'headless': False, 'args': ['--disable-infobars', '--proxy-server=' + proxy]}) 33 | page = await browser.newPage() 34 | await page.authenticate(accounts()) # 白名单方式,注释本行(需提前设置白名单) 35 | await page.setViewport({'width': 1920, 'height': 1080}) 36 | # 使用代理IP发送请求 37 | await page.goto(target_url) 38 | await asyncio.sleep(209) 39 | await browser.close() 40 | 41 | asyncio.get_event_loop().run_until_complete(main()) 42 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 提取代理API接口,获取1个代理IP 12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 13 | 14 | # 获取API接口返回的代理IP 15 | proxy_ip = requests.get(api_url).text 16 | 17 | # 用户名密码认证(私密代理/独享代理) 18 | username = "username" 19 | password = "password" 20 | proxies = { 21 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 22 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 23 | } 24 | 25 | # 白名单方式(需提前设置白名单) 26 | # proxies = { 27 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 28 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip} 29 | # } 30 | 31 | # 要访问的目标网页 32 | target_url = "https://dev.kdlapi.com/testproxy" 33 | 34 | # 使用代理IP发送请求 35 | response = requests.get(target_url, proxies=proxies) 36 | 37 | # 获取页面内容 38 | if response.status_code == 200: 39 | print(response.text) -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = tutorial.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = tutorial 12 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaidaili/python-sdk/87d895b68c3ec1aed905d524d02f842ae6426468/examples/http_proxy/py3_scrapy/tutorial/__init__.py -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/items.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your scraped items 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/items.html 5 | 6 | import scrapy 7 | 8 | 9 | class TutorialItem(scrapy.Item): 10 | # define the fields for your item here like: 11 | # name = scrapy.Field() 12 | pass 13 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/middlewares.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your spider middleware 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 5 | 6 | from scrapy import signals 7 | from .myextend import pro 8 | import random 9 | # useful for handling different item types with a single interface 10 | from itemadapter import is_item, ItemAdapter 11 | 12 | 13 | class TutorialSpiderMiddleware: 14 | # Not all methods need to be defined. If a method is not defined, 15 | # scrapy acts as if the spider middleware does not modify the 16 | # passed objects. 17 | 18 | @classmethod 19 | def from_crawler(cls, crawler): 20 | # This method is used by Scrapy to create your spiders. 21 | s = cls() 22 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 23 | return s 24 | 25 | def process_spider_input(self, response, spider): 26 | # Called for each response that goes through the spider 27 | # middleware and into the spider. 28 | 29 | # Should return None or raise an exception. 30 | return None 31 | 32 | def process_spider_output(self, response, result, spider): 33 | # Called with the results returned from the Spider, after 34 | # it has processed the response. 35 | 36 | # Must return an iterable of Request, or item objects. 37 | for i in result: 38 | yield i 39 | 40 | def process_spider_exception(self, response, exception, spider): 41 | # Called when a spider or process_spider_input() method 42 | # (from other spider middleware) raises an exception. 43 | 44 | # Should return either None or an iterable of Request or item objects. 45 | pass 46 | 47 | def process_start_requests(self, start_requests, spider): 48 | # Called with the start requests of the spider, and works 49 | # similarly to the process_spider_output() method, except 50 | # that it doesn’t have a response associated. 51 | 52 | # Must return only requests (not items). 53 | for r in start_requests: 54 | yield r 55 | 56 | def spider_opened(self, spider): 57 | spider.logger.info('Spider opened: %s' % spider.name) 58 | 59 | 60 | class TutorialDownloaderMiddleware: 61 | # Not all methods need to be defined. If a method is not defined, 62 | # scrapy acts as if the downloader middleware does not modify the 63 | # passed objects. 64 | 65 | @classmethod 66 | def from_crawler(cls, crawler): 67 | # This method is used by Scrapy to create your spiders. 68 | s = cls() 69 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 70 | return s 71 | 72 | def process_request(self, request, spider): 73 | # Called for each request that goes through the downloader 74 | # middleware. 75 | 76 | # Must either: 77 | # - return None: continue processing this request 78 | # - or return a Response object 79 | # - or return a Request object 80 | # - or raise IgnoreRequest: process_exception() methods of 81 | # installed downloader middleware will be called 82 | return None 83 | 84 | def process_response(self, request, response, spider): 85 | # Called with the response returned from the downloader. 86 | 87 | # Must either; 88 | # - return a Response object 89 | # - return a Request object 90 | # - or raise IgnoreRequest 91 | return response 92 | 93 | def process_exception(self, request, exception, spider): 94 | # Called when a download handler or a process_request() 95 | # (from other downloader middleware) raises an exception. 96 | 97 | # Must either: 98 | # - return None: continue processing this exception 99 | # - return a Response object: stops process_exception() chain 100 | # - return a Request object: stops process_exception() chain 101 | pass 102 | 103 | def spider_opened(self, spider): 104 | spider.logger.info('Spider opened: %s' % spider.name) 105 | 106 | 107 | class ProxyDownloaderMiddleware: 108 | 109 | def process_request(self, request, spider): 110 | proxy = random.choice(pro.proxy_list) 111 | 112 | # 用户名密码认证(私密代理/独享代理) 113 | username = "username" 114 | password = "password" 115 | request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy} 116 | 117 | # 白名单认证(私密代理/独享代理) 118 | # request.meta['proxy'] = "http://%(proxy)s/" % {"proxy": proxy} 119 | return None 120 | 121 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/myextend.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | import time 4 | import threading 5 | 6 | import requests 7 | from scrapy import signals 8 | 9 | # 提取代理IP的api 10 | api_url = 'http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&pt=1&format=json&sep=1' 11 | foo = True 12 | 13 | class Proxy: 14 | 15 | def __init__(self, ): 16 | self._proxy_list = requests.get(api_url).json().get('data').get('proxy_list') 17 | 18 | @property 19 | def proxy_list(self): 20 | return self._proxy_list 21 | 22 | @proxy_list.setter 23 | def proxy_list(self, list): 24 | self._proxy_list = list 25 | 26 | 27 | pro = Proxy() 28 | print(pro.proxy_list) 29 | 30 | 31 | class MyExtend: 32 | 33 | def __init__(self, crawler): 34 | self.crawler = crawler 35 | # 将自定义方法绑定到scrapy信号上,使程序与spider引擎同步启动与关闭 36 | # scrapy信号文档: https://www.osgeo.cn/scrapy/topics/signals.html 37 | # scrapy自定义拓展文档: https://www.osgeo.cn/scrapy/topics/extensions.html 38 | crawler.signals.connect(self.start, signals.engine_started) 39 | crawler.signals.connect(self.close, signals.spider_closed) 40 | 41 | @classmethod 42 | def from_crawler(cls, crawler): 43 | return cls(crawler) 44 | 45 | def start(self): 46 | t = threading.Thread(target=self.extract_proxy) 47 | t.start() 48 | 49 | def extract_proxy(self): 50 | while foo: 51 | pro.proxy_list = requests.get(api_url).json().get('data').get('proxy_list') 52 | #设置每15秒提取一次ip 53 | time.sleep(15) 54 | 55 | def close(self): 56 | global foo 57 | foo = False -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/pipelines.py: -------------------------------------------------------------------------------- 1 | # Define your item pipelines here 2 | # 3 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 4 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html 5 | 6 | 7 | # useful for handling different item types with a single interface 8 | from itemadapter import ItemAdapter 9 | 10 | 11 | class TutorialPipeline: 12 | def process_item(self, item, spider): 13 | return item 14 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/settings.py: -------------------------------------------------------------------------------- 1 | # Scrapy settings for tutorial project 2 | # 3 | # For simplicity, this file contains only settings considered important or 4 | # commonly used. You can find more settings consulting the documentation: 5 | # 6 | # https://docs.scrapy.org/en/latest/topics/settings.html 7 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 8 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 9 | 10 | BOT_NAME = 'tutorial' 11 | 12 | SPIDER_MODULES = ['tutorial.spiders'] 13 | NEWSPIDER_MODULE = 'tutorial.spiders' 14 | 15 | 16 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 17 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)' 18 | 19 | # Obey robots.txt rules 20 | ROBOTSTXT_OBEY = False 21 | 22 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 23 | #CONCURRENT_REQUESTS = 32 24 | 25 | # Configure a delay for requests for the same website (default: 0) 26 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay 27 | # See also autothrottle settings and docs 28 | #DOWNLOAD_DELAY = 3 29 | # The download delay setting will honor only one of: 30 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 31 | #CONCURRENT_REQUESTS_PER_IP = 16 32 | 33 | # Disable cookies (enabled by default) 34 | #COOKIES_ENABLED = False 35 | 36 | # Disable Telnet Console (enabled by default) 37 | #TELNETCONSOLE_ENABLED = False 38 | 39 | # Override the default request headers: 40 | #DEFAULT_REQUEST_HEADERS = { 41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 42 | # 'Accept-Language': 'en', 43 | #} 44 | 45 | # Enable or disable spider middlewares 46 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html 47 | #SPIDER_MIDDLEWARES = { 48 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543, 49 | #} 50 | 51 | # Enable or disable downloader middlewares 52 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 53 | DOWNLOADER_MIDDLEWARES = { 54 | 'tutorial.middlewares.ProxyDownloaderMiddleware': 100, 55 | } 56 | LOG_LEVEL = 'WARNING' 57 | # Enable or disable extensions 58 | # See https://docs.scrapy.org/en/latest/topics/extensions.html 59 | EXTENSIONS = { 60 | 'tutorial.myextend.MyExtend': 300, 61 | } 62 | 63 | # Configure item pipelines 64 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html 65 | #ITEM_PIPELINES = { 66 | # 'tutorial.pipelines.TutorialPipeline': 300, 67 | #} 68 | 69 | # Enable and configure the AutoThrottle extension (disabled by default) 70 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html 71 | #AUTOTHROTTLE_ENABLED = True 72 | # The initial download delay 73 | #AUTOTHROTTLE_START_DELAY = 5 74 | # The maximum download delay to be set in case of high latencies 75 | #AUTOTHROTTLE_MAX_DELAY = 60 76 | # The average number of requests Scrapy should be sending in parallel to 77 | # each remote server 78 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 79 | # Enable showing throttling stats for every response received: 80 | #AUTOTHROTTLE_DEBUG = False 81 | 82 | # Enable and configure HTTP caching (disabled by default) 83 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 84 | #HTTPCACHE_ENABLED = True 85 | #HTTPCACHE_EXPIRATION_SECS = 0 86 | #HTTPCACHE_DIR = 'httpcache' 87 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 88 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 89 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /examples/http_proxy/py3_scrapy/tutorial/spiders/kdl_spiders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | import scrapy 4 | 5 | class KdlSpider(scrapy.spiders.Spider): 6 | name = "kdl" 7 | 8 | def start_requests(self): 9 | url = "https://dev.kdlapi.com/testproxy" 10 | yield scrapy.Request(url, callback=self.parse) 11 | 12 | def parse(self, response): 13 | print(response.status) -------------------------------------------------------------------------------- /examples/http_proxy/py3_urllib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用urllib请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import urllib.request 10 | import ssl 11 | 12 | # 全局取消证书验证,避免访问https网页报错 13 | ssl._create_default_https_context = ssl._create_unverified_context 14 | 15 | # 提取代理API接口,获取1个代理IP 16 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1" 17 | 18 | # 获取API接口返回的IP 19 | proxy_ip = urllib.request.urlopen(api_url).read().decode('utf-8') 20 | 21 | # 用户名密码认证(私密代理/独享代理) 22 | username = "username" 23 | password = "password" 24 | proxies = { 25 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 26 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 27 | } 28 | 29 | # 白名单方式(需提前设置白名单) 30 | # proxies = { 31 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip}, 32 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip} 33 | # } 34 | 35 | # 要访问的目标网页 36 | target_url = "https://dev.kdlapi.com/testproxy" 37 | 38 | # 使用代理IP发送请求 39 | proxy_support = urllib.request.ProxyHandler(proxies) 40 | opener = urllib.request.build_opener(proxy_support) 41 | urllib.request.install_opener(opener) 42 | response = urllib.request.urlopen(target_url) 43 | 44 | # 获取页面内容 45 | if response.code == 200: 46 | print(response.read().decode('utf-8')) -------------------------------------------------------------------------------- /examples/http_proxy/py3_websocket.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用HTTP代理发送websocket请求 6 | """ 7 | import gzip 8 | import zlib 9 | 10 | import websocket 11 | 12 | OPCODE_DATA = (websocket.ABNF.OPCODE_TEXT, websocket.ABNF.OPCODE_BINARY) 13 | 14 | url = "ws://echo.websocket.org/" 15 | 16 | proxies = { 17 | "http_proxy_host": "59.38.241.25", 18 | "http_proxy_port": 23916, 19 | "http_proxy_auth": ("username", "password"), 20 | } 21 | 22 | ws = websocket.create_connection(url, **proxies) 23 | 24 | 25 | def recv(): 26 | try: 27 | frame = ws.recv_frame() 28 | except websocket.WebSocketException: 29 | return websocket.ABNF.OPCODE_CLOSE, None 30 | if not frame: 31 | raise websocket.WebSocketException("Not a valid frame %s" % frame) 32 | elif frame.opcode in OPCODE_DATA: 33 | return frame.opcode, frame.data 34 | elif frame.opcode == websocket.ABNF.OPCODE_CLOSE: 35 | ws.send_close() 36 | return frame.opcode, None 37 | elif frame.opcode == websocket.ABNF.OPCODE_PING: 38 | ws.pong(frame.data) 39 | return frame.opcode, frame.data 40 | 41 | return frame.opcode, frame.data 42 | 43 | 44 | def recv_ws(): 45 | opcode, data = recv() 46 | if opcode == websocket.ABNF.OPCODE_CLOSE: 47 | return 48 | if opcode == websocket.ABNF.OPCODE_TEXT and isinstance(data, bytes): 49 | data = str(data, "utf-8") 50 | if isinstance(data, bytes) and len(data) > 2 and data[:2] == b'\037\213': # gzip magick 51 | try: 52 | data = "[gzip] " + str(gzip.decompress(data), "utf-8") 53 | except Exception: 54 | pass 55 | elif isinstance(data, bytes): 56 | try: 57 | data = "[zlib] " + str(zlib.decompress(data, -zlib.MAX_WBITS), "utf-8") 58 | except Exception: 59 | pass 60 | if isinstance(data, bytes): 61 | data = repr(data) 62 | 63 | print("< " + data) 64 | 65 | 66 | def main(): 67 | print("Press Ctrl+C to quit") 68 | while True: 69 | message = input("> ") 70 | ws.send(message) 71 | recv_ws() 72 | 73 | 74 | if __name__ == "__main__": 75 | try: 76 | main() 77 | except KeyboardInterrupt: 78 | print('\nbye') 79 | except Exception as e: 80 | print(e) -------------------------------------------------------------------------------- /examples/http_proxy/py3_websocket_short.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | #!/usr/bin/env python 5 | # -*- encoding: utf-8 -*- 6 | 7 | import ssl 8 | import websocket 9 | 10 | 11 | def on_message(ws, message): 12 | print(message) 13 | 14 | 15 | def on_error(ws, error): 16 | print(error) 17 | 18 | 19 | def on_open(ws): 20 | data = '{}' # 此处填入您需要传给目标网站的json格式参数,如{"type":"web","data":{"_id":"xxxx"}} 21 | ws.send(data) 22 | 23 | 24 | def on_close(*args): 25 | print("### closed ###") 26 | 27 | 28 | proxies = { 29 | "http_proxy_host": "59.38.241.25", 30 | "http_proxy_port": 23916, 31 | "http_proxy_auth": ("username", "password"), 32 | } 33 | 34 | 35 | def start(): 36 | websocket.enableTrace(True) 37 | target_url = 'ws://127.0.0.1:5000/socket.io/?EIO=4&transport=websocket' # 此处替换您的目标网站 38 | ws = websocket.WebSocketApp( 39 | url = target_url, 40 | header = [ 41 | "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36" 42 | ], 43 | on_message=on_message, 44 | on_error=on_error, 45 | on_close=on_close, 46 | ) 47 | ws.on_open = on_open 48 | ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}, **proxies) 49 | 50 | 51 | if __name__ == "__main__": 52 | start() 53 | -------------------------------------------------------------------------------- /examples/http_proxy/selenium_chrome_username_password.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import string 6 | import zipfile 7 | import time 8 | 9 | 10 | def create_proxyauth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http', plugin_path=None): 11 | """代理认证插件 12 | 13 | args: 14 | proxy_host (str): 你的代理地址或者域名(str类型) 15 | proxy_port (int): 代理端口号(int类型) 16 | # 用户名密码认证(私密代理/独享代理) 17 | proxy_username (str):用户名(字符串) 18 | proxy_password (str): 密码 (字符串) 19 | kwargs: 20 | scheme (str): 代理方式 默认http 21 | plugin_path (str): 扩展的绝对路径 22 | 23 | return str -> plugin_path 24 | """ 25 | 26 | if plugin_path is None: 27 | plugin_path = 'vimm_chrome_proxyauth_plugin.zip' 28 | 29 | manifest_json = """ 30 | { 31 | "version": "1.0.0", 32 | "manifest_version": 2, 33 | "name": "Chrome Proxy", 34 | "permissions": [ 35 | "proxy", 36 | "tabs", 37 | "unlimitedStorage", 38 | "storage", 39 | "", 40 | "webRequest", 41 | "webRequestBlocking" 42 | ], 43 | "background": { 44 | "scripts": ["background.js"] 45 | }, 46 | "minimum_chrome_version":"22.0.0" 47 | } 48 | """ 49 | 50 | background_js = string.Template( 51 | """ 52 | var config = { 53 | mode: "fixed_servers", 54 | rules: { 55 | singleProxy: { 56 | scheme: "${scheme}", 57 | host: "${host}", 58 | port: parseInt(${port}) 59 | }, 60 | bypassList: ["foobar.com"] 61 | } 62 | }; 63 | 64 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); 65 | 66 | function callbackFn(details) { 67 | return { 68 | authCredentials: { 69 | username: "${username}", 70 | password: "${password}" 71 | } 72 | }; 73 | } 74 | 75 | chrome.webRequest.onAuthRequired.addListener( 76 | callbackFn, 77 | {urls: [""]}, 78 | ['blocking'] 79 | ); 80 | """ 81 | ).substitute( 82 | host=proxy_host, 83 | port=proxy_port, 84 | username=proxy_username, 85 | password=proxy_password, 86 | scheme=scheme, 87 | ) 88 | with zipfile.ZipFile(plugin_path, 'w') as zp: 89 | zp.writestr("manifest.json", manifest_json) 90 | zp.writestr("background.js", background_js) 91 | return plugin_path 92 | 93 | 94 | proxyauth_plugin_path = create_proxyauth_extension( 95 | proxy_host="${proxy_ip}", # 代理IP 96 | proxy_port="${proxy_port}", # 端口号 97 | # 用户名密码(私密代理/独享代理) 98 | proxy_username="${username}", 99 | proxy_password="${password}" 100 | ) 101 | 102 | 103 | options = webdriver.ChromeOptions() 104 | options.add_extension(proxyauth_plugin_path) 105 | # ${chromedriver_path}: chromedriver驱动存放路径 106 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", options=options) 107 | driver.get("https://dev.kdlapi.com/testproxy") 108 | 109 | # 获取页面内容 110 | print(driver.page_source) 111 | 112 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 113 | time.sleep(3) 114 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy/selenium_chrome_whitelist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | options = webdriver.ChromeOptions() 8 | options.add_argument('--proxy-server=http://${ip:port}') # 代理IP:端口号 9 | # ${chromedriver_path}: chromedriver驱动存放路径 10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", options=options) 11 | driver.get("https://dev.kdlapi.com/testproxy") 12 | 13 | # 获取页面内容 14 | print(driver.page_source) 15 | 16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 17 | time.sleep(3) 18 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy/selenium_firefox_username_password.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import time 5 | 6 | from seleniumwire import webdriver # pip install selenium-wire 7 | 8 | username = 'username' # 请替换您的用户名和密码 9 | password = 'password' 10 | proxy_ip = '59.38.241.25:23916' # 请替换您提取到的代理ip 11 | options = { 12 | 'proxy': { 13 | 'http': "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 14 | 'https': "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 15 | } 16 | } 17 | 18 | driver = webdriver.Firefox(seleniumwire_options=options,executable_path="${geckodriver_path}") 19 | 20 | driver.get('https://dev.kdlapi.com/testproxy') 21 | 22 | # 获取页面内容 23 | print(driver.page_source) 24 | 25 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 26 | time.sleep(3) 27 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy/selenium_firefox_whitelist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | fp = webdriver.FirefoxProfile() 8 | proxy = '${ip:port}' 9 | ip, port = proxy.split(":") 10 | port = int(port) 11 | 12 | # 设置代理配置 13 | fp.set_preference('network.proxy.type', 1) 14 | fp.set_preference('network.proxy.http', ip) 15 | fp.set_preference('network.proxy.http_port', port) 16 | fp.set_preference('network.proxy.ssl', ip) 17 | fp.set_preference('network.proxy.ssl_port', port) 18 | 19 | driver = webdriver.Firefox(executable_path="${geckodriver_path}", firefox_profile=fp) 20 | driver.get('https://dev.kdlapi.com/testproxy') 21 | 22 | # 获取页面内容 23 | print(driver.page_source) 24 | 25 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 26 | time.sleep(3) 27 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py2_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求隧道服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 隧道域名:端口号 12 | tunnel = "tpsXXX.kdlapi.com:15818" 13 | 14 | # 用户名密码方式 15 | username = "username" 16 | password = "password" 17 | proxies = { 18 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}, 19 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 20 | } 21 | 22 | # 白名单方式(需提前设置白名单) 23 | # proxies = { 24 | # "http": "http://%(proxy)s/" % {"proxy": tunnel}, 25 | # "https": "http://%(proxy)s/" % {"proxy": tunnel} 26 | # } 27 | 28 | # 要访问的目标网页 29 | target_url = "https://dev.kdlapi.com/testproxy" 30 | 31 | # 使用隧道域名发送请求 32 | response = requests.get(target_url, proxies=proxies) 33 | 34 | # 获取页面内容 35 | if response.status_code == 200: 36 | print response.text -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py2_urllib2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用urllib2请求隧道服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import urllib2 10 | import ssl 11 | 12 | # 全局取消证书验证,避免访问https网页报错 13 | ssl._create_default_https_context = ssl._create_unverified_context 14 | 15 | # 隧道域名:端口号 16 | tunnel = "tpsXXX.kdlapi.com:15818" 17 | 18 | # 用户名密码方式 19 | username = "username" 20 | password = "password" 21 | proxies = { 22 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}, 23 | "https": "https://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 24 | } 25 | 26 | # 白名单方式(需提前设置白名单) 27 | # proxies = { 28 | # "http": "http://%(proxy)s/" % {"proxy": tunnel}, 29 | # "https": "https://%(proxy)s/" % {"proxy": tunnel} 30 | # } 31 | 32 | # 要访问的目标网页 33 | target_url = "https://dev.kdlapi.com/testproxy" 34 | 35 | # 使用隧道域名发送请求 36 | proxy_support = urllib2.ProxyHandler(proxies) 37 | opener = urllib2.build_opener(proxy_support) 38 | urllib2.install_opener(opener) 39 | response = urllib2.urlopen(target_url) 40 | 41 | # 获取页面内容 42 | if response.code == 200: 43 | print response.read() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_aiohttp.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用aiohttp请求代理服务器 6 | 请求http和https网页均适用 7 | 8 | """ 9 | 10 | import aiohttp 11 | import asyncio 12 | # asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) windows系统请求https网站报错时调用此方法 13 | 14 | page_url = "https://dev.kdlapi.com/testproxy" # 要访问的目标网页 15 | 16 | # 隧道域名:端口号 17 | tunnel = "tpsXXX.kdlapi.com:15818" 18 | 19 | # 用户名和密码方式 20 | username = "username" 21 | password = "password" 22 | 23 | proxy_auth = aiohttp.BasicAuth(username, password) 24 | 25 | async def fetch(session, url): 26 | async with session.get(url, proxy="http://"+tunnel, proxy_auth=proxy_auth) as response: 27 | return await response.text() 28 | 29 | async def main(): 30 | # aiohttp默认使用严格的HTTPS协议检查。可以通过将ssl设置为False来放松认证检查 31 | # async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session: 32 | async with aiohttp.ClientSession() as session: 33 | html = await fetch(session, page_url) 34 | print(html) 35 | 36 | if __name__ == '__main__': 37 | loop = asyncio.get_event_loop() 38 | loop.run_until_complete(main()) 39 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_feapder.py: -------------------------------------------------------------------------------- 1 | import feapder 2 | 3 | 4 | class Py3Feapder(feapder.AirSpider): 5 | def start_requests(self): 6 | yield feapder.Request("https://dev.kdlapi.com/testproxy") 7 | 8 | def download_midware(self, request): 9 | # 隧道域名:端口号 10 | tunnel = "XXX.kdlapi.com:15818" 11 | 12 | # 用户名密码认证 13 | username = "username" 14 | password = "password" 15 | proxies = { 16 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}, 17 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 18 | } 19 | 20 | # 白名单认证(需提前设置白名单) 21 | # proxies = { 22 | # "http": "http://%(proxy)s/" % {"proxy": tunnel}, 23 | # "https": "http://%(proxy)s/" % {"proxy": tunnel} 24 | # } 25 | 26 | request.proxies = proxies 27 | return request 28 | 29 | def parse(self, request, response): 30 | print(response.text) 31 | 32 | 33 | if __name__ == "__main__": 34 | Py3Feapder().start() 35 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_httpx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import httpx 10 | 11 | # 隧道域名:端口号 12 | tunnel = "tpsXXX.kdlapi.com:15818" 13 | 14 | # 用户名和密码方式 15 | username = "username" 16 | password = "password" 17 | 18 | proxy_url = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 19 | 20 | proxies = httpx.Proxy( 21 | url=proxy_url, 22 | mode="DEFAULT" 23 | ) 24 | 25 | with httpx.Client(proxies=proxies) as client: 26 | r = client.get('http://dev.kdlapi.com/testproxy') 27 | print(r.text) -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_pyppeteer.py: -------------------------------------------------------------------------------- 1 | #!/#!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 请求http和https网页均适用 6 | """ 7 | import asyncio 8 | 9 | from pyppeteer import launch 10 | # 隧道服务器 11 | proxy_raw = "tpsXXX.kdlapi.com:15818" 12 | 13 | 14 | def accounts(): 15 | # 用户名密码, 若已添加白名单则不需要添加 16 | username = "username" 17 | password = "password" 18 | account = {"username": username, "password": password} 19 | return account 20 | 21 | 22 | async def main(): 23 | # 要访问的目标网页 24 | target_url = "https://dev.kdlapi.com/testproxy" 25 | 26 | browser = await launch({'headless': False, 'args': ['--disable-infobars', '--proxy-server=' + proxy_raw]}) 27 | page = await browser.newPage() 28 | 29 | await page.authenticate(accounts()) # 白名单方式,注释本行(需提前设置白名单) 30 | await page.setViewport({'width': 1920, 'height': 1080}) 31 | # 使用代理IP发送请求 32 | await page.goto(target_url) 33 | await asyncio.sleep(209) 34 | await browser.close() 35 | 36 | asyncio.get_event_loop().run_until_complete(main()) -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求隧道服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 隧道域名:端口号 12 | tunnel = "tpsXXX.kdlapi.com:15818" 13 | 14 | # 用户名密码方式 15 | username = "username" 16 | password = "password" 17 | proxies = { 18 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}, 19 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 20 | } 21 | 22 | # 白名单方式(需提前设置白名单) 23 | # proxies = { 24 | # "http": "http://%(proxy)s/" % {"proxy": tunnel}, 25 | # "https": "http://%(proxy)s/" % {"proxy": tunnel} 26 | # } 27 | 28 | # 要访问的目标网页 29 | target_url = "https://dev.kdlapi.com/testproxy" 30 | 31 | # 使用隧道域名发送请求 32 | response = requests.get(target_url, proxies=proxies) 33 | 34 | # 获取页面内容 35 | if response.status_code == 200: 36 | print(response.text) -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html 5 | 6 | [settings] 7 | default = tutorial.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = tutorial 12 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kuaidaili/python-sdk/87d895b68c3ec1aed905d524d02f842ae6426468/examples/http_proxy_tunnel/py3_scrapy/tutorial/__init__.py -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/items.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your scraped items 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/items.html 5 | 6 | import scrapy 7 | 8 | 9 | class TutorialItem(scrapy.Item): 10 | # define the fields for your item here like: 11 | # name = scrapy.Field() 12 | pass 13 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/middlewares.py: -------------------------------------------------------------------------------- 1 | # Define here the models for your spider middleware 2 | # 3 | # See documentation in: 4 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 5 | 6 | from scrapy import signals 7 | 8 | # useful for handling different item types with a single interface 9 | from itemadapter import is_item, ItemAdapter 10 | 11 | 12 | class TutorialSpiderMiddleware: 13 | # Not all methods need to be defined. If a method is not defined, 14 | # scrapy acts as if the spider middleware does not modify the 15 | # passed objects. 16 | 17 | @classmethod 18 | def from_crawler(cls, crawler): 19 | # This method is used by Scrapy to create your spiders. 20 | s = cls() 21 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 22 | return s 23 | 24 | def process_spider_input(self, response, spider): 25 | # Called for each response that goes through the spider 26 | # middleware and into the spider. 27 | 28 | # Should return None or raise an exception. 29 | return None 30 | 31 | def process_spider_output(self, response, result, spider): 32 | # Called with the results returned from the Spider, after 33 | # it has processed the response. 34 | 35 | # Must return an iterable of Request, or item objects. 36 | for i in result: 37 | yield i 38 | 39 | def process_spider_exception(self, response, exception, spider): 40 | # Called when a spider or process_spider_input() method 41 | # (from other spider middleware) raises an exception. 42 | 43 | # Should return either None or an iterable of Request or item objects. 44 | pass 45 | 46 | def process_start_requests(self, start_requests, spider): 47 | # Called with the start requests of the spider, and works 48 | # similarly to the process_spider_output() method, except 49 | # that it doesn’t have a response associated. 50 | 51 | # Must return only requests (not items). 52 | for r in start_requests: 53 | yield r 54 | 55 | def spider_opened(self, spider): 56 | spider.logger.info('Spider opened: %s' % spider.name) 57 | 58 | 59 | class TutorialDownloaderMiddleware: 60 | # Not all methods need to be defined. If a method is not defined, 61 | # scrapy acts as if the downloader middleware does not modify the 62 | # passed objects. 63 | 64 | @classmethod 65 | def from_crawler(cls, crawler): 66 | # This method is used by Scrapy to create your spiders. 67 | s = cls() 68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened) 69 | return s 70 | 71 | def process_request(self, request, spider): 72 | # Called for each request that goes through the downloader 73 | # middleware. 74 | 75 | # Must either: 76 | # - return None: continue processing this request 77 | # - or return a Response object 78 | # - or return a Request object 79 | # - or raise IgnoreRequest: process_exception() methods of 80 | # installed downloader middleware will be called 81 | return None 82 | 83 | def process_response(self, request, response, spider): 84 | # Called with the response returned from the downloader. 85 | 86 | # Must either; 87 | # - return a Response object 88 | # - return a Request object 89 | # - or raise IgnoreRequest 90 | return response 91 | 92 | def process_exception(self, request, exception, spider): 93 | # Called when a download handler or a process_request() 94 | # (from other downloader middleware) raises an exception. 95 | 96 | # Must either: 97 | # - return None: continue processing this exception 98 | # - return a Response object: stops process_exception() chain 99 | # - return a Request object: stops process_exception() chain 100 | pass 101 | 102 | def spider_opened(self, spider): 103 | spider.logger.info('Spider opened: %s' % spider.name) 104 | 105 | 106 | class ProxyDownloaderMiddleware: 107 | _proxy = ('XXX.kdlapi.com', '15818') 108 | 109 | def process_request(self, request, spider): 110 | 111 | # 用户名密码认证 112 | username = "username" 113 | password = "password" 114 | request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": ':'.join(ProxyDownloaderMiddleware._proxy)} 115 | 116 | # 白名单认证 117 | # request.meta['proxy'] = "http://%(proxy)s/" % {"proxy": proxy} 118 | 119 | request.headers["Connection"] = "close" 120 | return None 121 | 122 | def process_exception(self, request, exception, spider): 123 | """捕获407异常""" 124 | if "'status': 407" in exception.__str__(): # 不同版本的exception的写法可能不一样,可以debug出当前版本的exception再修改条件 125 | from scrapy.resolver import dnscache 126 | dnscache.__delitem__(ProxyDownloaderMiddleware._proxy[0]) # 删除proxy host的dns缓存 127 | return exception 128 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/pipelines.py: -------------------------------------------------------------------------------- 1 | # Define your item pipelines here 2 | # 3 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 4 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html 5 | 6 | 7 | # useful for handling different item types with a single interface 8 | from itemadapter import ItemAdapter 9 | 10 | 11 | class TutorialPipeline: 12 | def process_item(self, item, spider): 13 | return item 14 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/settings.py: -------------------------------------------------------------------------------- 1 | # Scrapy settings for tutorial project 2 | # 3 | # For simplicity, this file contains only settings considered important or 4 | # commonly used. You can find more settings consulting the documentation: 5 | # 6 | # https://docs.scrapy.org/en/latest/topics/settings.html 7 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 8 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html 9 | 10 | BOT_NAME = 'tutorial' 11 | 12 | SPIDER_MODULES = ['tutorial.spiders'] 13 | NEWSPIDER_MODULE = 'tutorial.spiders' 14 | 15 | 16 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 17 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)' 18 | 19 | # Obey robots.txt rules 20 | ROBOTSTXT_OBEY = True 21 | 22 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 23 | #CONCURRENT_REQUESTS = 32 24 | 25 | # Configure a delay for requests for the same website (default: 0) 26 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay 27 | # See also autothrottle settings and docs 28 | #DOWNLOAD_DELAY = 3 29 | # The download delay setting will honor only one of: 30 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 31 | #CONCURRENT_REQUESTS_PER_IP = 16 32 | 33 | # Disable cookies (enabled by default) 34 | #COOKIES_ENABLED = False 35 | 36 | # Disable Telnet Console (enabled by default) 37 | #TELNETCONSOLE_ENABLED = False 38 | 39 | # Override the default request headers: 40 | #DEFAULT_REQUEST_HEADERS = { 41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 42 | # 'Accept-Language': 'en', 43 | #} 44 | 45 | # Enable or disable spider middlewares 46 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html 47 | #SPIDER_MIDDLEWARES = { 48 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543, 49 | #} 50 | 51 | # Enable or disable downloader middlewares 52 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html 53 | DOWNLOADER_MIDDLEWARES = { 54 | 'tutorial.middlewares.ProxyDownloaderMiddleware': 100, 55 | } 56 | 57 | # Enable or disable extensions 58 | # See https://docs.scrapy.org/en/latest/topics/extensions.html 59 | #EXTENSIONS = { 60 | # 'scrapy.extensions.telnet.TelnetConsole': None, 61 | #} 62 | 63 | # Configure item pipelines 64 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html 65 | #ITEM_PIPELINES = { 66 | # 'tutorial.pipelines.TutorialPipeline': 300, 67 | #} 68 | 69 | # Enable and configure the AutoThrottle extension (disabled by default) 70 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html 71 | #AUTOTHROTTLE_ENABLED = True 72 | # The initial download delay 73 | #AUTOTHROTTLE_START_DELAY = 5 74 | # The maximum download delay to be set in case of high latencies 75 | #AUTOTHROTTLE_MAX_DELAY = 60 76 | # The average number of requests Scrapy should be sending in parallel to 77 | # each remote server 78 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 79 | # Enable showing throttling stats for every response received: 80 | #AUTOTHROTTLE_DEBUG = False 81 | 82 | # Enable and configure HTTP caching (disabled by default) 83 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 84 | #HTTPCACHE_ENABLED = True 85 | #HTTPCACHE_EXPIRATION_SECS = 0 86 | #HTTPCACHE_DIR = 'httpcache' 87 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 88 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 89 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_scrapy/tutorial/spiders/kdl_spider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -- coding: utf-8 -- 3 | 4 | import scrapy 5 | 6 | class KdlSpider(scrapy.spiders.Spider): 7 | name = "kdl" 8 | 9 | def start_requests(self): 10 | url = "https://dev.kdlapi.com/testproxy" 11 | yield scrapy.Request(url, callback=self.parse) 12 | 13 | def parse(self, response): 14 | print(response.text) 15 | 16 | 17 | # 如scrapy报ssl异常"('SSL routines', 'ssl3_get_record', 'wrong version number')", 您可以尝试打开以下代码来解决 18 | # from OpenSSL import SSL 19 | # from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory 20 | # 21 | # init = ScrapyClientContextFactory.__init__ 22 | # def init2(self, *args, **kwargs): 23 | # init(self, *args, **kwargs) 24 | # self.method = SSL.SSLv23_METHOD 25 | # ScrapyClientContextFactory.__init__ = init2 26 | -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_socket.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用socket请求隧道服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import socket 10 | import socks # pip install PySocks 11 | 12 | socks.set_default_proxy(socks.HTTP, addr='tpsXXX.kdlapi.com', port=15818, username='username',password='password') # 设置代理类型为HTPP 13 | # socks.set_default_proxy(socks.SOCKS5, addr='tpsXXX.kdlapi.com', port=20818) # 设置代理类型为socks 14 | socket.socket = socks.socksocket # 把代理添加到socket 15 | 16 | 17 | def main(): 18 | sock = socket.socket() 19 | sock.connect(('dev.kdlapi.com', 80)) # 连接 20 | # 按照http协议格式完整构造http request 21 | request = 'GET https://dev.kdlapi.com/testproxy \r\nUser-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36\r\n Connection: close' # 包含method, url, headers 22 | 23 | response = b'' # 接收数据 24 | sock.send(request.encode()) # 发送请求 25 | chunk = sock.recv(1024) # 一次接收1024字节数据 26 | while chunk: # 循环接收数据,若没有数据了说明已接收完 27 | response += chunk 28 | chunk = sock.recv(1024) 29 | print(response.decode()) 30 | 31 | 32 | if __name__ == '__main__': 33 | main() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/py3_urllib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用urllib请求隧道服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import urllib.request 10 | import ssl 11 | 12 | # 全局取消证书验证,避免访问https网页报错 13 | ssl._create_default_https_context = ssl._create_unverified_context 14 | 15 | # 隧道域名:端口号 16 | tunnel = "tpsXXX.kdlapi.com:15818" 17 | 18 | # 用户名密码方式 19 | username = "username" 20 | password = "password" 21 | proxies = { 22 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}, 23 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel} 24 | } 25 | 26 | # 白名单方式(需提前设置白名单) 27 | # proxies = { 28 | # "http": "http://%(proxy)s/" % {"proxy": tunnel}, 29 | # "https": "http://%(proxy)s/" % {"proxy": tunnel} 30 | # } 31 | 32 | # 要访问的目标网页 33 | target_url = "https://dev.kdlapi.com/testproxy" 34 | 35 | # 使用隧道域名发送请求 36 | proxy_support = urllib.request.ProxyHandler(proxies) 37 | opener = urllib.request.build_opener(proxy_support) 38 | urllib.request.install_opener(opener) 39 | response = urllib.request.urlopen(target_url) 40 | 41 | # 获取页面内容 42 | if response.code == 200: 43 | print(response.read().decode('utf-8')) -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/selenium_chrome_username_password.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import string 6 | import zipfile 7 | import time 8 | 9 | 10 | def create_proxyauth_extension(tunnelhost, tunnelport, proxy_username, proxy_password, scheme='http', plugin_path=None): 11 | """代理认证插件 12 | 13 | args: 14 | tunnelhost (str): 你的代理地址或者域名(str类型) 15 | tunnelport (int): 代理端口号(int类型) 16 | proxy_username (str):用户名(字符串) 17 | proxy_password (str): 密码 (字符串) 18 | kwargs: 19 | scheme (str): 代理方式 默认http 20 | plugin_path (str): 扩展的绝对路径 21 | 22 | return str -> plugin_path 23 | """ 24 | 25 | if plugin_path is None: 26 | plugin_path = 'vimm_chrome_proxyauth_plugin.zip' 27 | 28 | manifest_json = """ 29 | { 30 | "version": "1.0.0", 31 | "manifest_version": 2, 32 | "name": "Chrome Proxy", 33 | "permissions": [ 34 | "proxy", 35 | "tabs", 36 | "unlimitedStorage", 37 | "storage", 38 | "", 39 | "webRequest", 40 | "webRequestBlocking" 41 | ], 42 | "background": { 43 | "scripts": ["background.js"] 44 | }, 45 | "minimum_chrome_version":"22.0.0" 46 | } 47 | """ 48 | 49 | background_js = string.Template( 50 | """ 51 | var config = { 52 | mode: "fixed_servers", 53 | rules: { 54 | singleProxy: { 55 | scheme: "${scheme}", 56 | host: "${host}", 57 | port: parseInt(${port}) 58 | }, 59 | bypassList: ["foobar.com"] 60 | } 61 | }; 62 | 63 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); 64 | 65 | function callbackFn(details) { 66 | return { 67 | authCredentials: { 68 | username: "${username}", 69 | password: "${password}" 70 | } 71 | }; 72 | } 73 | 74 | chrome.webRequest.onAuthRequired.addListener( 75 | callbackFn, 76 | {urls: [""]}, 77 | ['blocking'] 78 | ); 79 | """ 80 | ).substitute( 81 | host=tunnelhost, 82 | port=tunnelport, 83 | username=proxy_username, 84 | password=proxy_password, 85 | scheme=scheme, 86 | ) 87 | with zipfile.ZipFile(plugin_path, 'w') as zp: 88 | zp.writestr("manifest.json", manifest_json) 89 | zp.writestr("background.js", background_js) 90 | return plugin_path 91 | 92 | 93 | proxyauth_plugin_path = create_proxyauth_extension( 94 | tunnelhost="${tunnelhost}", # 隧道域名 95 | tunnelport="${tunnelport}", # 端口号 96 | proxy_username="${username}", # 用户名 97 | proxy_password="${password}" # 密码 98 | ) 99 | 100 | 101 | chrome_options = webdriver.ChromeOptions() 102 | chrome_options.add_extension(proxyauth_plugin_path) 103 | # ${chromedriver_path}: chromedriver驱动存放路径 104 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options) 105 | driver.get("https://dev.kdlapi.com/testproxy") 106 | 107 | # 获取页面内容 108 | print(driver.page_source) 109 | 110 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 111 | time.sleep(3) 112 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/selenium_chrome_whitelist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | chrome_options = webdriver.ChromeOptions() 8 | chrome_options.add_argument('--proxy-server=http://${tunnelhost:tunnelport}') # 隧道域名:端口号 9 | # ${chromedriver_path}: chromedriver驱动存放路径 10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options) 11 | driver.get("https://dev.kdlapi.com/testproxy") 12 | 13 | # 获取页面内容 14 | print(driver.page_source) 15 | 16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 17 | time.sleep(3) 18 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/selenium_firefox_username_password.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import time 5 | from seleniumwire import webdriver # pip install selenium-wire 6 | 7 | options = { 8 | 'proxy': { 9 | 'http': 'http://username:password@tpsXXX.kdlapi.com:15818', 10 | 'https': 'http://username:password@tpsXXX.kdlapi.com:15818', 11 | } 12 | } 13 | driver = webdriver.Firefox(seleniumwire_options=options,executable_path="${geckodriver_path}") 14 | 15 | driver.get('https://dev.kdlapi.com/testproxy') 16 | 17 | # 获取页面内容 18 | print(driver.page_source) 19 | 20 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 21 | time.sleep(3) 22 | driver.close() -------------------------------------------------------------------------------- /examples/http_proxy_tunnel/selenium_firefox_whitelist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import time 5 | from selenium import webdriver 6 | 7 | 8 | fp = webdriver.FirefoxProfile() 9 | proxy_ip = "tpsXXX.kdlapi.com" # 隧道服务器域名 10 | proxy_port = 15818 # 端口号 11 | 12 | fp.set_preference('network.proxy.type', 1) 13 | fp.set_preference('network.proxy.http', proxy_ip) 14 | fp.set_preference('network.proxy.http_port', proxy_port) 15 | fp.set_preference('network.proxy.ssl', proxy_ip) 16 | fp.set_preference('network.proxy.ssl_port', proxy_port) 17 | 18 | driver = webdriver.Firefox(executable_path="${geckodriver_path}", firefox_profile=fp) 19 | driver.get('https://dev.kdlapi.com/testproxy') 20 | 21 | # 获取页面内容 22 | print(driver.page_source) 23 | 24 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 25 | time.sleep(3) 26 | driver.close() -------------------------------------------------------------------------------- /examples/socks_proxy/phantomjs_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | #先下载phantomjs包文件,再填入phantomjs.exe的路径 (路径不要包含中文) 8 | executable_path = '${executable_path}' 9 | service_args=[ 10 | '--proxy=host:port', #此处替换您的代理ip,如59.38.241.25:23918 11 | '--proxy-type=socks5', 12 | '--proxy-auth=username:password' #用户名密码 13 | ] 14 | driver=webdriver.PhantomJS(service_args=service_args,executable_path=executable_path) 15 | driver.get('https://dev.kdlapi.com/testproxy') 16 | 17 | print(driver.page_source) 18 | time.sleep(3) 19 | driver.close() -------------------------------------------------------------------------------- /examples/socks_proxy/py2_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 提取代理API接口,获取1个代理IP 12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=2&sep=1" 13 | 14 | # 获取API接口返回的代理IP 15 | proxy_ip = requests.get(api_url).text 16 | 17 | # 用户名密码认证(私密代理/独享代理) 18 | username = "username" 19 | password = "password" 20 | proxies = { 21 | "http": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 22 | "https": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 23 | } 24 | 25 | # 白名单方式(需提前设置白名单) 26 | # proxies = { 27 | # "http": "socks5://%(proxy)s/" % {"proxy": proxy_ip}, 28 | # "https": "socks5://%(proxy)s/" % {"proxy": proxy_ip} 29 | # } 30 | 31 | # 要访问的目标网页 32 | target_url = "https://dev.kdlapi.com/testproxy" 33 | 34 | # 使用代理IP发送请求 35 | response = requests.get(target_url, proxies=proxies) 36 | 37 | # 获取页面内容 38 | if response.status_code == 200: 39 | print response.text -------------------------------------------------------------------------------- /examples/socks_proxy/py3_requests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | 使用requests请求代理服务器 6 | 请求http和https网页均适用 7 | """ 8 | 9 | import requests 10 | 11 | # 提取代理API接口,获取1个代理IP 12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=2&sep=1" 13 | 14 | # 获取API接口返回的代理IP 15 | proxy_ip = requests.get(api_url).text 16 | 17 | # 用户名密码认证(私密代理/独享代理) 18 | username = "username" 19 | password = "password" 20 | proxies = { 21 | "http": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}, 22 | "https": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip} 23 | } 24 | 25 | # 白名单方式(需提前设置白名单) 26 | # proxies = { 27 | # "http": "socks5://%(proxy)s/" % {"proxy": proxy_ip}, 28 | # "https": "socks5://%(proxy)s/" % {"proxy": proxy_ip} 29 | # } 30 | 31 | # 要访问的目标网页 32 | target_url = "https://dev.kdlapi.com/testproxy" 33 | 34 | # 使用代理IP发送请求 35 | response = requests.get(target_url, proxies=proxies) 36 | 37 | # 获取页面内容 38 | if response.status_code == 200: 39 | print(response.text) -------------------------------------------------------------------------------- /examples/socks_proxy/selenium_chrome_whitelist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | from selenium import webdriver 5 | import time 6 | 7 | chrome_options = webdriver.ChromeOptions() 8 | chrome_options.add_argument('--proxy-server=socks5://${ip:port}') # 代理IP:端口号 9 | # ${chromedriver_path}: chromedriver驱动存放路径 10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options) 11 | driver.get("https://dev.kdlapi.com/testproxy") 12 | 13 | # 获取页面内容 14 | print(driver.page_source) 15 | 16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出 17 | time.sleep(3) 18 | driver.close() --------------------------------------------------------------------------------