├── .gitignore
├── LICENSE
├── README.md
├── api-sdk
├── README.md
├── README.rst
├── examples
│ ├── __init__.py
│ ├── use_dps.py
│ ├── use_kps.py
│ ├── use_ops.py
│ ├── use_tool.py
│ └── use_tps.py
├── kdl
│ ├── __init__.py
│ ├── auth.py
│ ├── client.py
│ ├── endpoint.py
│ ├── exceptions.py
│ └── utils.py
├── setup.py
└── test.py
└── examples
├── README.md
├── api
├── py2_urllib2.py
├── py3_requests.py
└── py3_urllib.py
├── http_proxy
├── phantomjs_demo.py
├── proxy_pool.py
├── py2_requests.py
├── py2_urllib2.py
├── py3_aiohttp.py
├── py3_feapder.py
├── py3_httpx.py
├── py3_playwright.py
├── py3_pyppeteer.py
├── py3_requests.py
├── py3_scrapy
│ ├── scrapy.cfg
│ └── tutorial
│ │ ├── __init__.py
│ │ ├── items.py
│ │ ├── middlewares.py
│ │ ├── myextend.py
│ │ ├── pipelines.py
│ │ ├── settings.py
│ │ └── spiders
│ │ ├── __init__.py
│ │ └── kdl_spiders.py
├── py3_urllib.py
├── py3_websocket.py
├── py3_websocket_short.py
├── selenium_chrome_username_password.py
├── selenium_chrome_whitelist.py
├── selenium_firefox_username_password.py
└── selenium_firefox_whitelist.py
├── http_proxy_tunnel
├── py2_requests.py
├── py2_urllib2.py
├── py3_aiohttp.py
├── py3_feapder.py
├── py3_httpx.py
├── py3_pyppeteer.py
├── py3_requests.py
├── py3_scrapy
│ ├── scrapy.cfg
│ └── tutorial
│ │ ├── __init__.py
│ │ ├── items.py
│ │ ├── middlewares.py
│ │ ├── pipelines.py
│ │ ├── settings.py
│ │ └── spiders
│ │ ├── __init__.py
│ │ └── kdl_spider.py
├── py3_socket.py
├── py3_urllib.py
├── selenium_chrome_username_password.py
├── selenium_chrome_whitelist.py
├── selenium_firefox_username_password.py
└── selenium_firefox_whitelist.py
└── socks_proxy
├── phantomjs_demo.py
├── py2_requests.py
├── py3_requests.py
└── selenium_chrome_whitelist.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | /venv
3 | .idea
4 | *.bat
5 | *.log
6 | .secret
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2019, Kuaidaili
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 快代理API SDK - Python
2 | 通过 SDK 可快速调用 API 接口,[查看详情](https://github.com/kuaidaili/python-sdk/tree/master/api-sdk)
3 |
4 | # 快代理官方代码样例 - Python
5 |
6 | ## 调用 API
7 | * 调用 API
8 | * [urllib2](./examples/api/py2_urllib2.py)
9 | * [urllib](./examples/api/py3_urllib.py)
10 | * [requests](./examples/api/py3_requests.py)
11 |
12 | ## HTTP 代理
13 | * Python2
14 | * [urllib2](./examples/http_proxy/py2_urllib2.py)
15 | * [requests](./examples/http_proxy/py2_requests.py)
16 | * Python3
17 | * [urllib](./examples/http_proxy/py3_urllib.py)
18 | * [requests](./examples/http_proxy/py3_requests.py)
19 | * [aiohttp](./examples/http_proxy/py3_aiohttp.py)
20 | * [httpx](./examples/http_proxy/py3_httpx.py)
21 | * [websocket 长连接](./examples/http_proxy/py3_websocket.py)
22 | * [websocket 短连接](./examples/http_proxy/py3_websocket_short.py)
23 | * [scrapy](./examples/http_proxy/py3_scrapy)
24 | * [feapder](./examples/http_proxy/py3_feapder.py)
25 | * [pyppeteer](./examples/http_proxy/py3_pyppeteer.py)
26 | * Selenium
27 | * [selenium_chrome 白名单验证](./examples/http_proxy/selenium_chrome_whitelist.py)
28 | * [selenium_chrome 用户名密码验证](./examples/http_proxy/selenium_chrome_username_password.py)
29 | * [selenium_firefox 白名单验证](./examples/http_proxy/selenium_firefox_whitelist.py)
30 | * [selenium_firefox 用户名密码验证](./examples/http_proxy/selenium_firefox_username_password.py)
31 | * [selenium_phantomjs 用户名密码验证](./examples/http_proxy/phantomjs_demo.py)
32 | * ProxyPool
33 | * [ProxyPool](./examples/http_proxy/proxy_pool.py)
34 |
35 | ## HTTP 隧道
36 |
37 | * Python2
38 | * [urllib2](./examples/http_proxy_tunnel/py2_urllib2.py)
39 | * [requests](./examples/http_proxy_tunnel/py2_requests.py)
40 | * Python3
41 | * [urllib](./examples/http_proxy_tunnel/py3_urllib.py)
42 | * [requests](./examples/http_proxy_tunnel/py3_requests.py)
43 | * [aiohttp](./examples/http_proxy_tunnel/py3_aiohttp.py)
44 | * [httpx](./examples/http_proxy_tunnel/py3_httpx.py)
45 | * [socket](./examples/http_proxy_tunnel/py3_socket.py)
46 | * [scrapy](./examples/http_proxy_tunnel/py3_scrapy)
47 | * [feapder](./examples/http_proxy_tunnel/py3_feapder.py)
48 | * [pyppeteer](./examples/http_proxy_tunnel/py3_pyppeteer.py)
49 | * Selenium
50 | * [selenium_chrome 白名单验证](./examples/http_proxy_tunnel/selenium_chrome_whitelist.py)
51 | * [selenium_chrome 用户名密码验证](./examples/http_proxy_tunnel/selenium_chrome_username_password.py)
52 | * [selenium_firefox 白名单验证](./examples/http_proxy_tunnel/selenium_firefox_whitelist.py)
53 | * [selenium_firefox 用户名密码验证](./examples/http_proxy_tunnel/selenium_firefox_username_password.py)
54 |
55 | ## Socks
56 | * Python2
57 | * [requests](./examples/socks_proxy/py2_requests.py)
58 | * Python3
59 | * [requests](./examples/socks_proxy/py3_requests.py)
60 | * Selenium
61 | * [selenium_chrome 白名单验证](./examples/socks_proxy/selenium_chrome_whitelist.py)
62 | * [selenium_phantomjs 用户名密码验证](./examples/socks_proxy/phantomjs_demo.py)
63 |
64 |
65 | # 技术支持
66 |
67 | 如果您发现代码有任何问题, 请提交 `Issue`。
68 |
69 | 欢迎提交 `Pull request` 以使代码样例更加完善。
70 |
71 | 获取更多关于调用 API 和代理服务器使用的资料,请参考[快代理文档中心](https://www.kuaidaili.com/helpcenter/)。
72 |
73 | * 技术支持微信:kuaidaili
74 | * 技术支持QQ:800849628
75 |
--------------------------------------------------------------------------------
/api-sdk/README.md:
--------------------------------------------------------------------------------
1 | # 简介
2 | 快代理api SDK
3 |
4 | # 依赖环境
5 | 1. python2.7 到 python3.7
6 | 2. 从[快代理](https://www.kuaidaili.com)购买相应产品
7 | 3. [获取订单的`secret_id`和`secret_key`](https://www.kuaidaili.com/usercenter/api/secret/)
8 |
9 | # 获取安装
10 | 安装 Python SDK 前,请先获取订单对应的`secret_id`和`secret_key`,请严格保管,避免泄露。
11 |
12 | ## 通过pip安装(推荐)
13 | 您可以通过`pip`将SDK安装到您的项目中:
14 | ```
15 | pip install kdl
16 | ```
17 |
18 | 如果您的项目环境尚未安装`pip`,可参考:
19 | * Ubuntu/Debian安装pip:`apt-get install python-setuptools`
20 | * CentOS安装pip:`yum install python-setuptools`
21 | * MacOS安装pip: `curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py`
22 |
23 | 您还可以参考[pip官网](https://pip.pypa.io/en/stable/installing/?spm=a3c0i.o32026zh.a3.6.74134958lLSo6o)进行安装。
24 |
25 | ## 通过源码包安装
26 | 前往 [Github 代码托管地址](https://github.com/kuaidaili/python-sdk/tree/master/api-sdk) 下载最新代码,解压后
27 |
28 | ```
29 | $ cd api-sdk
30 | $ python setup.py install
31 | ```
32 |
33 | ## 示例
34 | 以私密代理订单使用为例
35 | ``` python
36 | # -*- coding: utf-8 -*-
37 |
38 | """私密代理使用示例
39 | 接口鉴权说明:
40 | 目前支持的鉴权方式有 "token" 和 "hmacsha1" 两种,默认使用 "token"鉴权。
41 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
42 | """
43 |
44 | import kdl
45 |
46 | auth = kdl.Auth("secret_id", "secret_key")
47 | client = kdl.Client(auth, timeout=(8, 12), max_retries=3)
48 |
49 | # 获取订单到期时间, 返回时间字符串
50 | expire_time = client.get_order_expire_time()
51 | print("expire time", expire_time)
52 |
53 |
54 |
55 | # 获取ip白名单, 返回ip列表
56 | ip_whitelist = client.get_ip_whitelist()
57 | print("ip whitelist", ip_whitelist)
58 |
59 | # 设置ip白名单,参数类型为字符串或列表或元组
60 | # 成功则返回True, 否则抛出异常
61 | client.set_ip_whitelist([])
62 | client.set_ip_whitelist("171.113.244.40,171.113.244.41")
63 | print(client.get_ip_whitelist())
64 |
65 |
66 |
67 | client.set_ip_whitelist(tuple())
68 |
69 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
70 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/"
71 | # 返回ip列表
72 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话
73 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海'
74 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东')
75 | print("dps proxy: ", ips)
76 |
77 |
78 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict
79 | ips = client.get_dps(2, sign_type='simple', format='json')
80 | valids = client.check_dps_valid(ips)
81 | print("valids: ", valids)
82 |
83 | # 获取私密代理剩余时间: 返回 ip: seconds(剩余秒数) 组成的dict
84 | ips = client.get_dps(5, format='json')
85 | seconds = client.get_dps_valid_time(ips)
86 | print("seconds: ", seconds)
87 |
88 |
89 | # 获取计数版ip余额(仅私密代理计数版)
90 | balance = client.get_ip_balance(sign_type='hmacsha1')
91 | print("balance: ", balance)
92 |
93 | # 获取代理鉴权信息
94 | # 获取指定订单访问代理IP的鉴权信息。
95 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。
96 | # plain_text 为1 表示明文显示用户名和密码
97 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/
98 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple')
99 | print("proxyauthorization: ", proxyauthorization)
100 | ```
101 | 您可以在examples目录下找到更详细的示例
102 |
103 | ## 参考资料
104 |
105 | * [查看API列表](https://www.kuaidaili.com/doc/api/)
106 | * [了解API鉴权](https://www.kuaidaili.com/doc/api/auth/)
107 |
--------------------------------------------------------------------------------
/api-sdk/README.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | 快代理api SDK
3 | ===============
4 |
5 | ==========
6 | 依赖环境
7 | ==========
8 |
9 | 1. python2.7 到 python3.7
10 | 2. 从 `快代理 `_ 购买相应产品
11 | 3. `获取订单的secret_id和secret_key `_
12 |
13 | =========
14 | 获取安装:
15 | =========
16 | 安装 Python SDK 前,请先获取订单对应的`secret_id`和`secret_key`,请严格保管,避免泄露。
17 |
18 | 通过pip安装(推荐)
19 | ===================
20 | 您可以通过 ``pip`` 将SDK安装到您的项目中:
21 | | ``pip install kdl``
22 |
23 | 如果您的项目环境尚未安装 ``pip`` ,可参考:
24 | * Ubuntu/Debian安装pip:``apt-get install python-setuptools``
25 | * CentOS安装pip:``yum install python-setuptools``
26 | * MacOS安装pip: ``curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py; python get-pip.py``
27 |
28 | 您还可以参考 `pip官网 `_ 进行安装。
29 |
30 |
31 | 通过源码包安装
32 | ==============
33 | 前往 `Github 代码托管地址 `_ 下载最新代码,解压后:
34 |
35 | .. code-block:: console
36 |
37 | $ cd api-sdk
38 | $ python setup.py install
39 |
40 |
41 | 示例
42 | ====
43 | 以私密代理订单使用为例:
44 |
45 | .. code-block:: python
46 |
47 | # -*- coding: utf-8 -*-
48 |
49 | """
50 | 私密代理使用示例
51 | 接口鉴权说明:
52 | 目前支持的鉴权方式有 "token" 和 "hmacsha1" 两种,默认使用 "token"鉴权。
53 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
54 | """
55 |
56 | import kdl
57 |
58 | auth = kdl.Auth("secret_id", "secret_key")
59 | client = kdl.Client(auth)
60 |
61 | # 获取订单到期时间, 返回时间字符串
62 | expire_time = client.get_order_expire_time()
63 | print("expire time", expire_time)
64 |
65 | # 获取ip白名单, 返回ip列表
66 | ip_whitelist = client.get_ip_whitelist()
67 | print("ip whitelist", ip_whitelist)
68 |
69 | # 设置ip白名单,参数类型为字符串或列表或元组
70 | # 成功则返回True, 否则抛出异常
71 | client.set_ip_whitelist([])
72 | client.set_ip_whitelist("127.0.0.1, 192.168.0.139")
73 | print(client.get_ip_whitelist())
74 | client.set_ip_whitelist(tuple())
75 |
76 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
77 | # 具体有哪些参数请参考帮助中心: "https://help.kuaidaili.com/api/getdps/"
78 | # 返回ip列表
79 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话
80 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海'
81 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东')
82 | print("dps proxy: ", ips)
83 |
84 |
85 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict
86 | ips = client.get_dps(2, sign_type='token', format='json')
87 | valids = client.check_dps_valid(ips)
88 | print("valids: ", valids)
89 |
90 |
91 | # 获取计数版ip余额(仅私密代理计数版)
92 | balance = client.get_ip_balance(sign_type='hmacsha1')
93 | print("balance: ", balance)
94 |
95 | 您可以在examples目录下找到更详细的示例
96 |
97 | 参考资料
98 | ==========
99 |
100 | * `查看API列表 `_
101 | * `了解API鉴权 `_
--------------------------------------------------------------------------------
/api-sdk/examples/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
--------------------------------------------------------------------------------
/api-sdk/examples/use_dps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """私密代理使用示例
4 | 接口鉴权说明:
5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。
6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
7 | """
8 |
9 | import kdl
10 |
11 | auth = kdl.Auth("secret_id", "secret_key")
12 | client = kdl.Client(auth)
13 |
14 | # 获取订单到期时间, 返回时间字符串
15 | expire_time = client.get_order_expire_time()
16 | print("expire time", expire_time)
17 |
18 |
19 |
20 | # 获取ip白名单, 返回ip列表
21 | ip_whitelist = client.get_ip_whitelist()
22 | print("ip whitelist", ip_whitelist)
23 |
24 | # 设置ip白名单,参数类型为字符串或列表或元组
25 | # 成功则返回True, 否则抛出异常
26 | client.set_ip_whitelist([])
27 | client.set_ip_whitelist("171.113.244.40,171.113.244.41")
28 | print(client.get_ip_whitelist())
29 |
30 |
31 |
32 | client.set_ip_whitelist(tuple())
33 |
34 | # 提取私密代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
35 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/"
36 | # 返回ip列表
37 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话
38 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海'
39 | ips = client.get_dps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东')
40 | print("dps proxy: ", ips)
41 |
42 |
43 | # 检测私密代理有效性: 返回 ip: true/false 组成的dict
44 | ips = client.get_dps(2, sign_type='simple', format='json')
45 | valids = client.check_dps_valid(ips)
46 | print("valids: ", valids)
47 |
48 | # 获取私密代理剩余时间: 返回 ip: seconds(剩余秒数) 组成的dict
49 | ips = client.get_dps(5, format='json')
50 | seconds = client.get_dps_valid_time(ips)
51 | print("seconds: ", seconds)
52 |
53 |
54 | # 获取计数版ip余额(仅私密代理计数版)
55 | balance = client.get_ip_balance(sign_type='hmacsha1')
56 | print("balance: ", balance)
57 |
58 | # 获取代理鉴权信息
59 | # 获取指定订单访问代理IP的鉴权信息。
60 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。
61 | # plain_text 为1 表示明文显示用户名和密码
62 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/
63 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple')
64 | print("proxyauthorization: ", proxyauthorization)
--------------------------------------------------------------------------------
/api-sdk/examples/use_kps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """独享代理使用示例
4 | 接口鉴权说明:
5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。
6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
7 | """
8 |
9 | import kdl
10 |
11 | auth = kdl.Auth("secret_id", "secret_key")
12 | client = kdl.Client(auth)
13 |
14 | # 获取订单到期时间, 返回时间字符串
15 | expire_time = client.get_order_expire_time()
16 | print("expire time", expire_time)
17 |
18 | # 获取ip白名单, 返回ip列表
19 | ip_whitelist = client.get_ip_whitelist()
20 | print("ip whitelist", ip_whitelist)
21 |
22 | # 设置ip白名单,参数类型为字符串或列表或元组
23 | # 成功则返回True, 否则抛出异常
24 | client.set_ip_whitelist([])
25 | client.set_ip_whitelist("127.0.0.1, 192.168.0.139")
26 | print(client.get_ip_whitelist())
27 | client.set_ip_whitelist(tuple())
28 |
29 | # 提取独享代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
30 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getdps/"
31 | # 返回ip列表
32 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话
33 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海'
34 | ips = client.get_kps(2, sign_type='hmacsha1', format='json', pt=2, area='北京,上海,广东')
35 | print("kps proxy: ", ips)
36 |
37 | # 获取代理鉴权信息
38 | # 获取指定订单访问代理IP的鉴权信息。
39 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。
40 | # plain_text 为1 表示明文显示用户名和密码
41 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/
42 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple')
43 | print("proxyauthorization: ", proxyauthorization)
--------------------------------------------------------------------------------
/api-sdk/examples/use_ops.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """开放代理使用示例
4 | 接口鉴权说明:
5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。
6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
7 | """
8 |
9 | import sys
10 | sys.path.append("/mnt/hgfs/PycharmProjects/python-sdk-master/api-sdk")
11 | import kdl
12 |
13 | auth = kdl.Auth("secret_id", "secret_key")
14 | client = kdl.Client(auth)
15 |
16 | # 获取订单到期时间, 返回时间字符串
17 | expire_time = client.get_order_expire_time()
18 | print("expire time", expire_time)
19 |
20 |
21 | # 提取开放代理ip, 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
22 | # 具体有哪些参数请参考帮助中心: "https://help.kuaidaili.com/api/getdps/"
23 | # 返回ip列表
24 | # 注意:若您使用的是python2, 且在终端调用,或在文件中调用且没有加 "# -*- coding: utf-8 -*-" 的话
25 | # 传入area参数时,请传入unicode类型,如 area=u'北京,上海'
26 | # 若您是开放代理svip订单,请传入order_level='svip', 若您是开放代理专业版订单,请传入order_level='ent'
27 | ips = client.get_proxy(4, sign_type='simple', order_level='svip', format='json', pt=2, area='北京,上海,广东')
28 | print("ops proxy: ", ips)
29 |
30 |
31 |
32 | # 检测开放代理有效性
33 | ips = client.get_proxy(4, sign_type='simple', order_level='svip', format='json', pt=2, area='北京,上海,广东')
34 | valids = client.check_ops_valid(ips)
35 | print("valids: ", valids)
36 |
--------------------------------------------------------------------------------
/api-sdk/examples/use_tool.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 | from kdl.client import Client
4 | from kdl.auth import Auth
5 |
6 | auth = Auth("secret_id", "secret_key")
7 | client = Client(auth)
8 |
9 | # 提取User Agent 第一个参数为提取的数量, 其他参数以关键字参数的形式传入(不需要传入signature和timestamp)
10 | # 具体有哪些参数请参考帮助中心: "https://www.kuaidaili.com/doc/api/getua/"
11 | # 返回user agent列表
12 | ua = client.get_ua(10, browser="weixin")
13 | print("ua:", ua)
--------------------------------------------------------------------------------
/api-sdk/examples/use_tps.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """隧道代理使用示例
4 | 接口鉴权说明:
5 | 目前支持的鉴权方式有 "simple" 和 "hmacsha1" 两种,默认使用 "simple"鉴权。
6 | 所有方法均可添加关键字参数sign_type修改鉴权方式。
7 | """
8 |
9 | import kdl
10 | auth = kdl.Auth("secret_id","secret_key")
11 | client = kdl.Client(auth)
12 |
13 | expire_time = client.get_order_expire_time()
14 | print("expire time:",expire_time)
15 |
16 | # 获取ip白名单, 返回ip列表
17 | ip_whitelist = client.get_ip_whitelist()
18 | print("ip whitelist:", ip_whitelist)
19 |
20 | # 设置ip白名单,参数类型为字符串或列表或元组
21 | # 成功则返回True, 否则抛出异常
22 | client.set_ip_whitelist([])
23 | client.set_ip_whitelist("171.113.244.40")
24 | print(client.get_ip_whitelist())
25 |
26 | # 显示隧道代理当前的ip
27 | ip = client.tps_current_ip()
28 | print("current_ip:",ip)
29 |
30 |
31 | # 改变当前隧道ip
32 | new_ip = client.change_tps_ip()
33 | print("new_ip:",new_ip)
34 |
35 | # 获取代理鉴权信息
36 | # 获取指定订单访问代理IP的鉴权信息。
37 | # 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。
38 | # plain_text 为1 表示明文显示用户名和密码
39 | # 具体请看:https://www.kuaidaili.com/doc/api/getproxyauthorization/
40 | proxyauthorization = client.get_proxy_authorization(plain_text=1,sign_type='simple')
41 | print("proxyauthorization: ", proxyauthorization)
42 |
43 | # 获取隧道代理IP
44 | # 获取订单对应的隧道代理IP。
45 | # 具体参数请查看:https://www.kuaidaili.com/doc/api/gettps/
46 | tps_list = client.get_tps(2,sign_type='hmacsha1', format='json')
47 | print(tps_list)
48 |
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/api-sdk/kdl/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | __version__ = "0.2.21"
4 |
5 | from .client import Client
6 | from .auth import Auth
7 |
--------------------------------------------------------------------------------
/api-sdk/kdl/auth.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """封装auth对象
4 | 用于保存用户secret_id、secret_key,以及计算签名
5 | """
6 |
7 | import base64
8 | import hashlib
9 | import hmac
10 |
11 | class Auth(object):
12 | """用于保存用户secret_id、secret_key以及计算签名的对象。"""
13 |
14 | def __init__(self, secret_id, secret_key):
15 | self.secret_id = secret_id
16 | self.secret_key = secret_key
17 |
18 | @classmethod
19 | def get_string_to_sign(cls, method, endpoint, params):
20 | """ 生成签名原文字符串 """
21 | cls.clear_req_params(params)
22 | s = method + endpoint.split('.com')[1] + '?'
23 | query_str = '&'.join("%s=%s" % (k, params[k]) for k in sorted(params))
24 | return s + query_str
25 |
26 | @classmethod
27 | def clear_req_params(cls, params):
28 | if 'timeout' in params:
29 | del params['timeout']
30 | if 'max_retries' in params:
31 | del params['max_retries']
32 |
33 | def sign_str(self, raw_str, method=hashlib.sha1):
34 | """ 生成签名串 """
35 | try:
36 | hmac_str = hmac.new(self.secret_key.encode('utf8'), raw_str.encode('utf8'), method).digest()
37 | except UnicodeDecodeError as e:
38 | hmac_str = hmac.new(self.secret_key.encode('utf8'), raw_str, method).digest()
39 | return base64.b64encode(hmac_str)
40 |
41 |
--------------------------------------------------------------------------------
/api-sdk/kdl/client.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """将快代理所有api接口封装到Client
4 | """
5 |
6 | import json
7 | import os
8 | import time
9 | import requests
10 | from requests.adapters import HTTPAdapter
11 |
12 | from kdl.endpoint import EndPoint
13 | from kdl.exceptions import KdlException, KdlNameError, KdlTypeError, KdlStatusError
14 | from kdl.utils import OpsOrderLevel
15 |
16 |
17 | SECRET_PATH = './.secret'
18 |
19 |
20 | class Client:
21 | def __init__(self, auth, timeout=None, max_retries=None):
22 | self.auth = auth
23 | self.session = requests.Session()
24 | self.timeout = timeout or (6, 8) # default (connect_timeout, read_timeout)
25 | self.max_retries = max_retries
26 |
27 | def get_order_expire_time(self, sign_type="token", timeout=None, max_retries=None):
28 | """获取订单到期时间, 强制签名验证
29 | :return 订单过期时间字符串
30 | """
31 |
32 | endpoint = EndPoint.GetOrderExpireTime.value
33 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
34 | res = self._get_base_res("GET", endpoint, params)
35 |
36 | if isinstance(res, dict):
37 | return res['data']['expire_time']
38 | return res
39 |
40 | def get_proxy_authorization(self, plain_text=0, sign_type="token", timeout=None, max_retries=None):
41 | """获取指定订单访问代理IP的鉴权信息。
42 | 鉴权信息包含用户名密码,用于请求私密代理/独享代理/隧道代理时进行身份验证。
43 | :return 返回信息的字典
44 | """
45 | endpoint = EndPoint.GetProxyAuthorization.value
46 | params = self._get_params(endpoint, plaintext=plain_text, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
47 | res = self._get_base_res("GET", endpoint, params)
48 | if isinstance(res, dict):
49 | return res['data']
50 | return res
51 |
52 | def get_ip_whitelist(self, sign_type="token", timeout=None, max_retries=None):
53 | """获取订单的ip白名单, 强制签名验证
54 | :return ip白名单列表
55 | """
56 | endpoint = EndPoint.GetIpWhitelist.value
57 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
58 | res = self._get_base_res("GET", endpoint, params)
59 | if isinstance(res, dict):
60 | return res['data']['ipwhitelist']
61 | return res
62 |
63 | def set_ip_whitelist(self, iplist=None, sign_type="token", timeout=None, max_retries=None):
64 | """设置订单的ip白名单, 强制签名验证
65 | :param iplist参数类型为 str 或 list 或 tuple
66 | 如果为字符串则ip之间用逗号隔开
67 | :return 成功则返回True, 否则抛出异常
68 | """
69 |
70 | if iplist is None:
71 | raise KdlNameError("miss param: iplist")
72 | if not (isinstance(iplist, list) or isinstance(iplist, tuple) or isinstance(iplist, str)):
73 | raise KdlTypeError("iplist type error, should be a instance of list or tuple or str")
74 | if isinstance(iplist, list) or isinstance(iplist, tuple):
75 | iplist = ','.join(iplist)
76 | endpoint = EndPoint.SetIpWhitelist.value
77 | params = self._get_params(endpoint, iplist=iplist, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
78 | self._get_base_res("POST", endpoint, params)
79 | return True
80 |
81 | def tps_current_ip(self, sign_type="token", timeout=None, max_retries=None):
82 | """仅支持支持换IP周期>=1分钟的隧道代理订单
83 | 获取隧道当前的IP,默认“token”鉴权
84 | :param sign_type:默认token
85 | :return:返回ip地址。
86 | """
87 | endpoint = EndPoint.TpsCurrentIp.value
88 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
89 | res = self._get_base_res("GET", endpoint, params)
90 | return res['data']['current_ip']
91 |
92 | def change_tps_ip(self, sign_type="token", timeout=None, max_retries=None):
93 | """仅支持支持换IP周期>=1分钟的隧道代理订单
94 | :param sign_type: 默认token
95 | :return: 返回新的IP地址
96 | """
97 | endpoint = EndPoint.ChangeTpsIp.value
98 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
99 | res = self._get_base_res("GET", endpoint, params)
100 | return res['data']['new_ip']
101 |
102 | def get_tps(self, num=None, sign_type="token", **kwargs):
103 | """获取隧道代理IP, 默认"token"鉴权 https://www.kuaidaili.com/doc/api/gettps/
104 | :param num : 提取数量,int类型
105 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明
106 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回
107 | """
108 | if num is None:
109 | raise KdlNameError("miss param: num")
110 | if not isinstance(num, int):
111 | KdlTypeError("num should be a integer")
112 | endpoint = EndPoint.GetTps.value
113 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs)
114 | res = self._get_base_res("GET", endpoint, params)
115 | if isinstance(res, dict):
116 | return res['data']['proxy_list']
117 | return res
118 |
119 | def get_dps_valid_time(self, proxy=None, sign_type="token", **kwargs):
120 | """获取私密代理ip有效时间
121 | :param proxy: 私密代理列表, 格式: IP:PORT, eg: 113.120.61.166:22989,122.4.44.132:21808
122 | :param sign_type: 认证方式
123 | :return: 返回data部分, 格式为由'proxy: seconds(剩余秒数)'组成的列表
124 | """
125 | if not proxy:
126 | raise KdlNameError("miss param: proxy")
127 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str)):
128 | raise KdlTypeError("proxy should be a instance of list or tuple or str")
129 | if isinstance(proxy, list) or isinstance(proxy, tuple):
130 | proxy = ','.join(proxy)
131 | endpoint = EndPoint.GetDpsValidTime.value
132 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type)
133 | res = self._get_base_res("GET", endpoint, params)
134 | if isinstance(res, dict):
135 | return res['data']
136 | return res
137 |
138 | def get_dps(self, num=None, sign_type="token", **kwargs):
139 | """获取私密代理, 默认"token"鉴权
140 | :param num: 提取数量, int类型
141 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明
142 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回
143 | """
144 | if num is None:
145 | raise KdlNameError("miss param: num")
146 | if not isinstance(num, int):
147 | KdlTypeError("num should be a integer")
148 | endpoint = EndPoint.GetDpsProxy.value
149 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs)
150 | res = self._get_base_res("GET", endpoint, params)
151 | if isinstance(res, dict):
152 | return res['data']['proxy_list']
153 | return res
154 |
155 | def check_dps_valid(self, proxy=None, sign_type="token", **kwargs):
156 | """检测私密代理有效性, 强制签名验证
157 | :return 返回data部分, 格式为由'proxy: True/False'组成的dict
158 | """
159 | if not proxy:
160 | raise KdlNameError("miss param: proxy")
161 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str) or isinstance(proxy, unicode)):
162 | raise KdlTypeError("proxy should be a instance of list or tuple or str")
163 | if isinstance(proxy, list) or isinstance(proxy, tuple):
164 | proxy = ','.join(proxy)
165 | endpoint = EndPoint.CheckDpsValid.value
166 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type)
167 | res = self._get_base_res("GET", endpoint, params)
168 | if isinstance(res, dict):
169 | return res['data']
170 | return res
171 |
172 | def get_ip_balance(self, sign_type="token", timeout=None, max_retries=None):
173 | """获取计数版订单ip余额, 强制签名验证,
174 | 此接口只对按量付费订单和包年包月的集中提取型订单有效
175 | :return 返回data中的balance字段, int类型
176 | """
177 | endpoint = EndPoint.GetIpBalance.value
178 | params = self._get_params(endpoint, sign_type=sign_type, timeout=timeout, max_retries=max_retries)
179 | res = self._get_base_res("GET", endpoint, params)
180 | if isinstance(res, dict):
181 | return res['data']['balance']
182 | return res
183 |
184 | def get_kps(self, num=None, sign_type="token", **kwargs):
185 | """获取独享代理, 默认"token"鉴权
186 | :param num: 提取数量, sign_type: 鉴权方式
187 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明
188 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回
189 | """
190 | if num is None:
191 | raise KdlNameError("miss param: num")
192 | if not isinstance(num, int):
193 | KdlTypeError("num should be a integer")
194 | endpoint = EndPoint.GetKpsProxy.value
195 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs)
196 | res = self._get_base_res("GET", endpoint, params)
197 | if isinstance(res, dict):
198 | return res['data']['proxy_list']
199 | return res
200 |
201 | def get_proxy(self, num=None, order_level=OpsOrderLevel.NORMAL, sign_type="token", **kwargs):
202 | """获取开放代理, 默认不需要鉴权
203 | :param num: 提取数量, sign_type: 鉴权方式, order_level: 开放代理订单类型
204 | :param kwargs: 其他关键字参数,具体有那些参数请查看帮助中心api说明
205 | :return 若为json格式, 则返回data中proxy_list部分, 即proxy列表, 否则原样返回
206 | """
207 | if num is None:
208 | raise KdlNameError("miss param: num")
209 | if not isinstance(num, int):
210 | KdlTypeError("num should be a integer")
211 | endpoint = EndPoint.GetOpsProxyNormalOrVip.value
212 | if order_level == OpsOrderLevel.SVIP:
213 | endpoint = EndPoint.GetOpsProxySvip.value
214 | if order_level == OpsOrderLevel.PRO:
215 | endpoint = EndPoint.GetOpsProxyEnt.value
216 |
217 | params = self._get_params(endpoint, num=num, sign_type=sign_type, **kwargs)
218 | res = self._get_base_res("GET", endpoint, params)
219 | if isinstance(res, dict):
220 | return res['data']['proxy_list']
221 | return res
222 |
223 | def check_ops_valid(self, proxy=None, sign_type="token", **kwargs):
224 | """检测开放代理有效性, 强制签名验证
225 | :return 返回data部分, 格式为由'proxy: True/False'组成的列表
226 | """
227 | if not proxy:
228 | raise KdlNameError("miss param: proxy")
229 | if not (isinstance(proxy, list) or isinstance(proxy, tuple) or isinstance(proxy, str)):
230 | raise KdlTypeError("proxy should be a instance of list or tuple or str")
231 | if isinstance(proxy, list) or isinstance(proxy, tuple):
232 | proxy = ','.join(proxy)
233 | endpoint = EndPoint.CheckOpsValid.value
234 | params = self._get_params(endpoint, proxy=proxy, sign_type=sign_type)
235 | res = self._get_base_res("GET", endpoint, params)
236 | if isinstance(res, dict):
237 | return res['data']
238 | return res
239 |
240 | def get_ua(self, num=1, **kwargs):
241 | """获取User Agent
242 | :return 若为json格式, 则返回data中ua_list部分, 即user agent列表, 否则原样返回
243 | """
244 | endPoint = EndPoint.GetUA.value
245 | params = self._get_params(endPoint, num=num, sign_type="token", **kwargs)
246 | res = self._get_base_res("GET", endPoint, params)
247 | if isinstance(res, dict):
248 | return res['data']["ua_list"]
249 | return res
250 |
251 | def get_area_code(self, area, **kwargs):
252 | """获取指定地区编码
253 | :return:
254 | """
255 | endpoint = EndPoint.GetAreaCode.value
256 | params = self._get_params(endpoint, area=area, sign_type="token", **kwargs)
257 | res = self._get_base_res("GET", endpoint, params)
258 | if isinstance(res, dict):
259 | return res['data']
260 | return res
261 |
262 | def get_account_balance(self, **kwargs):
263 | """获取账户余额
264 | :return:
265 | """
266 | endpoint = EndPoint.GetAccountBalance.value
267 | params = self._get_params(endpoint, sign_type="token", **kwargs)
268 | res = self._get_base_res("GET", endpoint, params)
269 | if isinstance(res, dict):
270 | return res['data']
271 | return res
272 |
273 | def create_order(self, product, pay_type, **kwargs):
274 | """创建订单,自动从账户余额里结算费用
275 | :return:
276 | """
277 | if not (product and pay_type):
278 | raise KdlNameError('miss param: product or pay_type')
279 | endpoint = EndPoint.CreateOrder.value
280 | params = self._get_params(endpoint,product=product, pay_type=pay_type, sign_type="hmacsha1", **kwargs)
281 | res = self._get_base_res("GET", endpoint, params)
282 | return res
283 |
284 | def get_order_info(self, **kwargs):
285 | """获取订单的详细信息
286 | :return:
287 | """
288 | endpoint = EndPoint.GetOrderInfo.value
289 | params = self._get_params(endpoint, sign_type="hmacsha1", **kwargs)
290 | res = self._get_base_res("GET", endpoint, params)
291 | return res
292 |
293 | def set_auto_renew(self, autorenew, **kwargs):
294 | """开启/关闭自动续费
295 | :return:
296 | """
297 | if not autorenew:
298 | raise KdlNameError('miss param: autorenew')
299 | endpoint = EndPoint.SetAutoRenew.value
300 | params = self._get_params(endpoint, autorenew=autorenew, sign_type="hmacsha1", **kwargs)
301 | res = self._get_base_res("GET", endpoint, params)
302 | return res
303 |
304 | def close_order(self, **kwargs):
305 | """关闭指定订单, 此接口只对按量付费(后付费)订单有效
306 | :return:
307 | """
308 | endpoint = EndPoint.CloseOrder.value
309 | params = self._get_params(endpoint, sign_type="hmacsha1", **kwargs)
310 | res = self._get_base_res("GET", endpoint, params)
311 | return res
312 |
313 | def query_kps_city(self, serie, **kwargs):
314 | """查询独享代理有哪些城市可供开通。对于IP共享型还可查询到每个城市可开通的IP数量。
315 | :return:
316 | """
317 | if not serie:
318 | raise KdlNameError('miss params: serie')
319 | endpoint = EndPoint.QueryKpsCity.value
320 | params = self._get_params(endpoint, serie=serie, sign_type="hmacsha1", **kwargs)
321 | res = self._get_base_res("GET", endpoint, params)
322 | return res
323 |
324 | def _get_secret_token(self, timeout=None, max_retries=None):
325 | try:
326 | timeout = timeout or self.timeout
327 | max_retries = max_retries or self.max_retries
328 | self.session.mount('http://', HTTPAdapter(max_retries=max_retries))
329 | self.session.mount('https://', HTTPAdapter(max_retries=max_retries))
330 | r = self.session.post(url='https://' + EndPoint.GetSecretToken.value,
331 | data={'secret_id': self.auth.secret_id, 'secret_key': self.auth.secret_key},
332 | timeout=timeout)
333 | if r.status_code != 200:
334 | raise KdlStatusError(r.status_code, r.content.decode('utf8'))
335 | except requests.exceptions.RequestException as e:
336 | pass # TODO: 重试后失败 处理
337 | raise e
338 |
339 | res = json.loads(r.content.decode('utf8'))
340 | code, msg = res['code'], res['msg']
341 | if code != 0:
342 | raise KdlException(code, msg)
343 | secret_token = res['data']['secret_token']
344 | expire = str(res['data']['expire'])
345 | _time = '%.6f' % time.time()
346 | return secret_token, expire, _time
347 |
348 | def _read_secret_token(self):
349 | with open(SECRET_PATH, 'r') as f:
350 | token_info = f.read()
351 | secret_token, expire, _time = token_info.split('|')
352 | if float(_time) + float(expire) - 3 * 60 < time.time(): # 还有3分钟过期时更新
353 | secret_token, expire, _time = self._get_secret_token()
354 | with open(SECRET_PATH, 'w') as f:
355 | f.write(secret_token + '|' + expire + '|' + _time)
356 | return secret_token
357 |
358 | def get_secret_token(self):
359 | if os.path.exists(SECRET_PATH):
360 | secret_token = self._read_secret_token()
361 | else:
362 | secret_token, expire, _time = self._get_secret_token()
363 | with open(SECRET_PATH, 'w') as f:
364 | f.write(secret_token + '|' + expire + '|' + _time)
365 | return secret_token
366 |
367 | def _get_params(self, endpoint, **kwargs):
368 | """构造请求参数"""
369 | params = dict(secret_id=self.auth.secret_id)
370 | params.update(kwargs)
371 |
372 | sign_type = kwargs.get('sign_type', None)
373 | if not sign_type:
374 | return params
375 |
376 | if not self.auth.secret_key:
377 | raise KdlNameError("secret_key is required for signature")
378 |
379 | if sign_type == "hmacsha1":
380 | params['timestamp'] = int(time.time())
381 | if endpoint == EndPoint.SetIpWhitelist.value:
382 | raw_str = self.auth.get_string_to_sign("POST", endpoint, params.copy())
383 | else:
384 | raw_str = self.auth.get_string_to_sign("GET", endpoint, params.copy())
385 | params["signature"] = self.auth.sign_str(raw_str)
386 | elif sign_type == "token":
387 | secret_token = self.get_secret_token()
388 | params['signature'] = secret_token
389 | else:
390 | raise KdlNameError("unknown sign_type {}".format(sign_type))
391 |
392 | return params
393 |
394 | def _get_base_res(self, method, endpoint, params):
395 | """处理基础请求,
396 | 若响应为json格式则返回请求结果dict
397 | 否则直接返回原格式
398 | """
399 | try:
400 | r = None
401 | timeout = params.get('timeout', '') or self.timeout
402 | max_retries = params.get('max_retries', '') or self.max_retries
403 | self.session.mount('http://', HTTPAdapter(max_retries=max_retries))
404 | self.session.mount('https://', HTTPAdapter(max_retries=max_retries))
405 | self.auth.clear_req_params(params)
406 |
407 | if method == "GET":
408 | r = requests.get("https://" + endpoint, params=params, timeout=timeout)
409 | elif method == "POST":
410 | r = requests.post("https://" + endpoint, data=params, headers={"Content-Type": "application/x-www-form-urlencoded"}, timeout=timeout)
411 | if r.status_code != 200:
412 | raise KdlStatusError(r.status_code, r.content.decode('utf8'))
413 | try:
414 | res = json.loads(r.content.decode('utf8'))
415 | code, msg = res['code'], res['msg']
416 | if code != 0:
417 | raise KdlException(code, msg)
418 | return res
419 |
420 | except ValueError as e:
421 | # 返回结果不是json格式, 直接返回
422 | if r.content.decode('utf8').strip().startswith("ERROR"):
423 | raise KdlException(-3, r.content)
424 | return r.content.decode('utf8')
425 | except Exception as e:
426 | raise e
427 |
--------------------------------------------------------------------------------
/api-sdk/kdl/endpoint.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """枚举各个api的主机+路径
4 | """
5 |
6 | from enum import Enum, unique
7 |
8 |
9 | @unique
10 | class EndPoint(Enum):
11 | """ 各个api的主机+路径 """
12 | GetOrderExpireTime = "dev.kdlapi.com/api/getorderexpiretime"
13 | GetIpWhitelist = "dev.kdlapi.com/api/getipwhitelist" # 获取IP白名单
14 | SetIpWhitelist = "dev.kdlapi.com/api/setipwhitelist" # 设置IP白名单
15 | GetKpsProxy = "kps.kdlapi.com/api/getkps"
16 | GetDpsProxy = "dps.kdlapi.com/api/getdps"
17 | GetOpsProxyNormalOrVip = "dev.kdlapi.com/api/getproxy"
18 | GetOpsProxySvip = "svip.kdlapi.com/api/getproxy"
19 | GetOpsProxyEnt = "ent.kdlapi.com/api/getproxy"
20 | CheckDpsValid = "dps.kdlapi.com/api/checkdpsvalid"
21 | CheckOpsValid = "dev.kdlapi.com/api/checkopsvalid"
22 | GetIpBalance = "dps.kdlapi.com/api/getipbalance"
23 | GetDpsValidTime = "dps.kdlapi.com/api/getdpsvalidtime"
24 | TpsCurrentIp = "tps.kdlapi.com/api/tpscurrentip" # 获取当前隧道代理IP
25 | ChangeTpsIp = "tps.kdlapi.com/api/changetpsip" # 更改当前隧道代理IP
26 | GetTps = "tps.kdlapi.com/api/gettps" # 获取隧道代理IP
27 | GetProxyAuthorization = "dev.kdlapi.com/api/getproxyauthorization" # 获取代理鉴权信息
28 |
29 | # 工具接口
30 | GetUA = "www.kuaidaili.com/api/getua" # 获取User Agent
31 | GetAreaCode = "dev.kdlapi.com/api/getareacode" # 获取指定地区编码
32 | GetAccountBalance = "dev.kdlapi.com/api/getaccountbalance" # 获取账户余额
33 |
34 | # 订单相关接口
35 | CreateOrder = "dev.kdlapi.com/api/createorder" # 创建订单
36 | GetOrderInfo = "dev.kdlapi.com/api/getorderinfo" # 获取订单信息
37 | SetAutoRenew = "dev.kdlapi.com/api/setautorenew" # 开启/关闭自动续费
38 | CloseOrder = "dev.kdlapi.com/api/closeorder" # 关闭订单
39 | QueryKpsCity = "dev.kdlapi.com/api/querykpscity" # 查询独享代理城市信息
40 |
41 | GetSecretToken = "auth.kdlapi.com/api/get_secret_token" # 获取token
42 |
--------------------------------------------------------------------------------
/api-sdk/kdl/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """快代理自定义异常"""
4 |
5 | import sys
6 |
7 |
8 | class KdlException(Exception):
9 | """异常类"""
10 |
11 | def __init__(self, code=None, message=None):
12 | self.code = code
13 | if sys.version_info[0] < 3 and isinstance(message, unicode):
14 | message = message.encode("utf8")
15 | self.message = message
16 | self._hint_message = "[KdlException] code: {} message: {}".format(self.code, self.message)
17 |
18 | @property
19 | def hint_message(self):
20 | return self._hint_message
21 |
22 | @hint_message.setter
23 | def hint_message(self, value):
24 | self._hint_message = value
25 |
26 | def __str__(self):
27 | if sys.version_info[0] < 3 and isinstance(self.hint_message, unicode):
28 | self.hint_message = self.hint_message.encode("utf8")
29 | return self.hint_message
30 |
31 |
32 | class KdlStatusError(KdlException):
33 | """状态码异常类"""
34 | def __init__(self, code, message):
35 | super(KdlStatusError, self).__init__(code, message)
36 | self.hint_message = "[KdlStatusError] status_code: {}, message: {}".format(self.code, self.message)
37 |
38 |
39 | class KdlNameError(KdlException):
40 | """参数异常类"""
41 | def __init__(self, message, code=-2):
42 | super(KdlNameError, self).__init__(code, message)
43 | self.hint_message = "[KdlNameError] message: {}".format(self.message)
44 |
45 |
46 | class KdlTypeError(KdlException):
47 | """类型异常类"""
48 | def __init__(self, message, code=-1):
49 | super(KdlTypeError, self).__init__(code, message)
50 | self.hint_message = "[KdlTypeError] message: {}".format(self.message)
51 |
--------------------------------------------------------------------------------
/api-sdk/kdl/utils.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """枚举开放代理订单级别
4 | """
5 |
6 | class OpsOrderLevel(object):
7 | """开放代理订单级别"""
8 | NORMAL = "dev" # 普通
9 | VIP = "dev" # vip
10 | SVIP = "svip" # svip
11 | PRO = "ent" # 专业版
12 |
--------------------------------------------------------------------------------
/api-sdk/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import sys
4 | from setuptools import setup
5 |
6 | if sys.version_info < (3,0):
7 | long_description_file = open('README.rst').read()
8 | else:
9 | long_description_file=open('README.rst', encoding='UTF-8').read()
10 |
11 | setup(
12 | name='kdl',
13 | version='0.2.21',
14 | description=(
15 | 'kuaidaili api sdk python, site: https://www.kuaidaili.com'
16 | ),
17 |
18 | long_description=long_description_file,
19 | author='kuaidaili-dev',
20 | author_email='service@kuaidaili.com',
21 | license='BSD License',
22 | packages= [
23 | 'kdl',
24 | ],
25 | platforms='any',
26 | install_requires=[
27 | 'requests'
28 | ],
29 | url='https://github.com/kuaidaili/python-sdk/api-sdk',
30 | classifiers=[
31 | 'Development Status :: 4 - Beta',
32 | 'Operating System :: OS Independent',
33 | 'Intended Audience :: Developers',
34 | 'License :: OSI Approved :: BSD License',
35 | 'Programming Language :: Python',
36 | 'Programming Language :: Python :: Implementation',
37 | 'Programming Language :: Python :: 2',
38 | 'Programming Language :: Python :: 2.7',
39 | 'Programming Language :: Python :: 3',
40 | 'Programming Language :: Python :: 3.4',
41 | 'Programming Language :: Python :: 3.5',
42 | 'Programming Language :: Python :: 3.6',
43 | 'Programming Language :: Python :: 3.7',
44 | 'Topic :: Software Development :: Libraries'
45 | ],
46 | )
47 |
--------------------------------------------------------------------------------
/api-sdk/test.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | """python api sdk单元测试
4 | """
5 |
6 | from kdl import Auth, Client
7 |
8 | import unittest
9 |
10 | from kdl.exceptions import KdlException
11 | import re
12 |
13 |
14 | secret_id = ""
15 | secret_key = ""
16 |
17 |
18 |
19 | ip_pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
20 | ip_port_pattern = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]):\d{2,5}$"
21 | time_pattern = "(((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-(0?[13578]|1[02])-(0?[1-9]|[12]\\d|3[01]))|((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-(0?[13456789]|1[012])-(0?[1-9]|[12]\\d|30))|((01[0-9]{2}|0[2-9][0-9]{2}|[1-9][0-9]{3})-0?2-(0?[1-9]|1\\d|2[0-8]))|(((1[6-9]|[2-9]\\d)(0[48]|[2468][048]|[13579][26])|((04|08|12|16|[2468][048]|[3579][26])00))-0?2-29)) (20|21|22|23|[0-1]?\\d):[0-5]?\\d:[0-5]?\\d"
22 |
23 |
24 | def is_valid_str(str, pattern):
25 | """判断格式是否正确"""
26 | if str and re.match(pattern, str):
27 | return True
28 | return False
29 |
30 |
31 | def is_valid_ip_list(lis, pattern):
32 | """判断返回的ip列表或者ip加端口列表是否格式正确"""
33 | if not lis:
34 | return True
35 |
36 | for i in lis:
37 | flag = is_valid_str(i, pattern)
38 | if not flag :
39 | return False
40 | return True
41 |
42 |
43 | class TestBase(unittest.TestCase):
44 | """单元测试基类,所有单元测试类从此类继承"""
45 |
46 | name = "单元测试"
47 |
48 | @classmethod
49 | def setUpClass(cls):
50 | print('%s测试开始' % cls.name)
51 |
52 | @classmethod
53 | def tearDownClass(cls):
54 | print('%s测试结束' % cls.name)
55 |
56 | def setUp(self):
57 | self.auth = Auth(secret_id, secret_key)
58 | self.client = Client(self.auth, timeout=(5, 6), max_retries=3)
59 |
60 | def test_get_expire_time(self):
61 | """ 获取订单过期时间 """
62 | expire_time = self.client.get_order_expire_time(sign_type='hmacsha1')
63 | # assert isinstance(expire_time, unicode) or isinstance(expire_time, str)
64 | print(expire_time)
65 | assert isinstance(expire_time, str) and is_valid_str(expire_time,time_pattern)
66 |
67 |
68 | class TestBase2(TestBase):
69 | """具有获取IP白名单api和设置IP白名单的api,获取鉴权信息api的类,
70 | 目前只有私密代理,独享代理,隧道代理"""
71 |
72 | def test_get_ip_whitelist(self):
73 | """ 获取ip白名单 """
74 | ip_whitelist = self.client.get_ip_whitelist()
75 | print(ip_whitelist)
76 | assert isinstance(ip_whitelist, list) and is_valid_ip_list(ip_whitelist,ip_pattern)
77 |
78 | def test_set_ip_whitelist(self):
79 | """ 设置ip白名单 """
80 | self.client.set_ip_whitelist([])
81 | ip_whitelist = self.client.get_ip_whitelist()
82 | assert len(ip_whitelist) == 0
83 | set_ip_list = ["171.113.144.44", "171.113.244.41"]
84 | self.client.set_ip_whitelist(set_ip_list)
85 | ip_whitelist = self.client.get_ip_whitelist()
86 | set_ip_list.reverse()
87 | assert len(ip_whitelist) == 2 and isinstance(ip_whitelist, list) and is_valid_ip_list(ip_whitelist,ip_pattern) and ip_whitelist == set_ip_list
88 | self.client.set_ip_whitelist([])
89 |
90 | def test_get_proxy_authorization(self):
91 | data = self.client.get_proxy_authorization(plain_text=1, sign_type='token')
92 | assert isinstance(data, dict)
93 | print(data)
94 |
95 |
96 | class TestDpsOrder(TestBase2):
97 | """ 私密代理 """
98 |
99 | name = "私密代理测试"
100 |
101 | def test_get_proxy(self):
102 | """ 获取私密代理 """
103 | ips = self.client.get_dps(2, sign_type='hmacsha1', format='text', area='云南,广东', pt=2, f_citycode=1, )
104 | # ips = self.client.get_dps(2, format='text')
105 | print(ips)
106 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern)
107 |
108 | def test_check_dps_valid(self):
109 | """检测是否有效"""
110 | ips = self.client.get_dps(2, format='json', area='北京,上海')
111 | print(ips)
112 | is_valid = self.client.check_dps_valid(ips)
113 | assert isinstance(is_valid, dict)
114 |
115 | def test_get_ip_balance(self):
116 | """检测还剩多少ip地址可以提取"""
117 | balance = self.client.get_ip_balance()
118 | assert isinstance(balance, int)
119 |
120 | def test_get_dps_valid_time(self):
121 | ips = self.client.get_dps(5, format='json', sign_type="hmacsha1")
122 | print("ips: ", ips)
123 | seconds = self.client.get_dps_valid_time(ips, sign_type="hmacsha1")
124 | print("seconds: ", seconds)
125 | assert isinstance(seconds, dict)
126 |
127 | def test_get_secret_token(self):
128 | secret_token = self.client.get_secret_token()
129 | print(secret_token)
130 |
131 |
132 | class TestKpsOrder(TestBase):
133 | """ 独享代理 """
134 | name = '独享代理'
135 |
136 | def test_get_proxy(self):
137 | """ 获取私密代理 """
138 | ips = self.client.get_kps(2, sign_type='token', format='json', area='云南,广东', pt=2, f_citycode=1)
139 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern)
140 |
141 | def test_check_dps_valid(self):
142 | """检测是否有效"""
143 | ips = self.client.get_kps(2, format='json', area='北京', pt=2)
144 | with self.assertRaises(KdlException):
145 | self.client.check_dps_valid(ips)
146 |
147 | def test_get_ip_balance(self):
148 | """检测还剩多少ip地址可以提取"""
149 | with self.assertRaises(KdlException):
150 | self.client.get_ip_balance()
151 |
152 | class TestOpsOrder(TestBase):
153 | """ 开放代理 """
154 |
155 | name = '开放代理'
156 |
157 | def test_get_ip_whitelist(self):
158 | """ 获取ip白名单 """
159 | with self.assertRaises(KdlException):
160 | self.client.get_ip_whitelist()
161 |
162 | def test_get_proxy(self):
163 | """ 获取私密代理 """
164 | ips = self.client.get_proxy(2, order_level='vip', sign_type='hmacsha1', format='json', area='云南,广东', pt=2,
165 | f_citycode=1)
166 | assert isinstance(ips, list) or isinstance(ips, str) or isinstance(ips.encode('utf8'), str) or isinstance(ips.encode('utf8'), bytes) and is_valid_ip_list(ips,ip_port_pattern)
167 |
168 | def test_check_ops_valid(self):
169 | """检测是否有效"""
170 | ips = self.client.get_proxy(2, format='json', area='北京', pt=2)
171 | is_valid = self.client.check_ops_valid(ips)
172 | assert isinstance(is_valid, dict)
173 |
174 | def test_get_ip_balance(self):
175 | """检测还剩多少ip地址可以提取"""
176 | with self.assertRaises(KdlException):
177 | self.client.get_ip_balance()
178 |
179 | class TestTpsOrder(TestBase2):
180 | name = "隧道代理"
181 |
182 | def test_get_tps_ip(self):
183 | """获取当前隧道ip"""
184 | current_ip = self.client.tps_current_ip(sign_type='hmacsha1')
185 | assert len(current_ip) == 0 or (len(current_ip.split('.')) == 4 and is_valid_str(current_ip,ip_pattern))
186 |
187 | def test_change_tcp_ip(self):
188 | """立即改变隧道ip"""
189 | new_ip = self.client.change_tps_ip()
190 | assert len(new_ip.split('.')) == 4 and is_valid_str(new_ip,ip_pattern)
191 |
192 | def test_get_tps(self):
193 | tps_list = self.client.get_tps(2,sign_type='hmacsha1', format='json')
194 | assert isinstance(tps_list, list)
195 |
196 |
197 |
198 | class TestExpiredKpsOrder(unittest.TestCase):
199 | """ 过期订单 """
200 |
201 | name = "过期订单"
202 |
203 | def test_get_expire_time(self):
204 | with self.assertRaises(KdlException):
205 | self.client.get_order_expire_time()
206 |
207 | def test_get_ip_whitelist(self):
208 | with self.assertRaises(KdlException):
209 | self.client.get_ip_whitelist()
210 |
211 | def test_set_ip_whitelist(self):
212 | with self.assertRaises(KdlException):
213 | self.client.set_ip_whitelist("127.0.0.1")
214 |
215 | def test_get_proxy(self):
216 | with self.assertRaises(KdlException):
217 | self.client.get_kps(1)
218 |
219 |
220 | class TestNoApiKeyOrder(unittest.TestCase):
221 | """ 不提供apiKey,仅能成功调用不需要signature的api """
222 | name = "不提供apiKey,仅能成功调用不需要signature的api "
223 |
224 | def test_get_ip_whitelist(self):
225 | """ 获取ip白名单 """
226 | with self.assertRaises(NameError):
227 | self.client.get_ip_whitelist()
228 |
229 | def test_get_proxy(self):
230 | """ 获取私密代理 """
231 | with self.assertRaises(NameError):
232 | self.client.get_proxy(2, order_level='vip', sign_type='hmacsha1', format='json', area='云南,广东', pt=2,
233 | f_citycode=1)
234 | with self.assertRaises(NameError):
235 | self.client.get_proxy(2, order_level='vip', sign_type='simple', format='json', area='云南,广东', pt=2,
236 | f_citycode=1)
237 | ips = self.client.get_proxy(2, order_level='vip', format='json', area='云南,广东', pt=2,
238 | f_citycode=1)
239 |
240 | assert isinstance(ips, list) and is_valid_ip_list(ips,ip_port_pattern)
241 |
242 |
243 | def test_check_ops_valid(self):
244 | ips = self.client.get_proxy(2, format='json', area='北京', pt=2)
245 | with self.assertRaises(NameError):
246 | is_valid = self.client.check_ops_valid(ips)
247 | assert isinstance(is_valid, dict)
248 |
249 | def test_get_ip_balance(self):
250 | with self.assertRaises(NameError):
251 | self.client.get_ip_balance()
252 |
253 |
254 | if __name__ == '__main__':
255 | suite = unittest.TestLoader().loadTestsFromTestCase(TestTpsOrder)
256 | unittest.TextTestRunner(verbosity=2).run(suite)
257 |
258 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # 快代理SDK - Python
2 |
3 | * [调用API](#调用api)
4 | * [python/api_urllib2.py](#pythonapi_urllib2py)
5 | * [python/api_urllib.py](#pythonapi_urllibpy)
6 | * [python/api_requests.py](#pythonapi_requestspy)
7 | * [Http代理-Python2部分:](#http代理-python2部分)
8 | * [python/proxy_urllib2.py](#pythonproxy_urllib2py)
9 | * [python/proxy_requests.py](#pythonproxy_requestspy)
10 | * [python/selenium_chrome_http.py](#pythonselenium_chrome_httppy)
11 | * [python/selenium_phantomjs_http.py](#pythonselenium_phantomjs_httppy)
12 | * [python/selenium_phantomjs_http_auth.py](#pythonselenium_phantomjs_http_authpy)
13 | * [Http代理-Python3部分:](#http代理-python3部分)
14 | * [python3/http_urllib.py](#python3http_urllibpy)
15 | * [python3/http_requests.py](#python3http_requestspy)
16 | * [Http代理-Scrapy部分:](#http代理-scrapy部分)
17 | * [scrapy/scrapy_proxy/scrapy_proxy/middlewares.py](#scrapyscrapy_proxyscrapy_proxymiddlewarespy)
18 | * [scrapy/scrapy_proxy/scrapy_proxy/settings.py](#scrapyscrapy_proxyscrapy_proxysettingspy)
19 | * [scrapy/scrapy_proxy/scrapy_proxy/spiders/main.py](#scrapyscrapy_proxyscrapy_proxyspidersmainpy)
20 | * [Socks代理-Python2部分:](#socks代理-python2部分)
21 | * [python/socks_requests.py](#pythonsocks_requestspy)
22 | * [python/socks_urllib2.py](#pythonsocks_urllib2py)
23 | * [python/selenium_chrome_sock5.py](#pythonselenium_chrome_sock5py)
24 | * [python/selenium_phantomjs_sock5.py](#pythonselenium_phantomjs_sock5py)
25 | * [python/selenium_phantomjs_sock5_auth.py](#pythonselenium_phantomjs_sock5_authpy)
26 | * [Socks代理-Python3部分:](#socks代理-python3部分)
27 | * [python3/proxy_requests_socks.py](#python3proxy_requests_sockspy)
28 | * [python3/proxy_urllib.py](#python3proxy_urllibpy)
29 | * [隧道代理-Python2部分:](#隧道代理-Python2部分)
30 | * [python/tps_proxy_urllib2.py](#pythontps_proxy_urllib2py)
31 | * [python/tps_proxy_request.py](#pythontps_proxy_requestpy)
32 | * [隧道代理-python3部分](#隧道代理-python3部分)
33 | * [python3/tps_proxy_request.py](#python3tps_proxy_requestpy)
34 | * [python3/tps_proxy_urllib.py](#python3tps_proxy_urllibpy)
35 | * [隧道代理-Scrapy部分](#隧道代理-Scrapy部分)
36 | * [scrapy_proxy/scrapy_proxy/middlewares.py](#scrapy_proxyscrapy_proxymiddlewarespy)
37 | * [scrapy_proxy/scrapy_proxy/settings.py](#scrapy_proxyscrapy_proxysettingspy)
38 | * [scrapy_proxy/scrapy_proxy/spiders/main.py](#scrapy_proxyscrapy_proxyspidersmainpy)
39 |
40 | * [技术支持](#技术支持)
41 |
42 |
43 | ## 调用API
44 |
45 | ### python/api_urllib2.py
46 | 使用urllib2调用api示例
47 | ```
48 | 使用提示: 运行环境要求 python2.6/2.7
49 | ```
50 |
51 | ### python/api_urllib.py
52 | 使用urllib调用api示例
53 | ```
54 | 使用提示: 运行环境要求 python3.x
55 | ```
56 |
57 | ### python/api_requests.py
58 | 使用requests库调用api示例
59 | ```
60 | 使用提示:
61 | * 此样例支持 python 2.6—2.7以及3.3—3.7
62 | * requests不是python原生库,需要安装才能使用: pip install requests
63 | ```
64 |
65 | ## Http代理-Python2部分:
66 |
67 | ### python/proxy_urllib2.py
68 | 使用urllib2请求Http代理服务器, 支持访问http和https网页, 推荐使用
69 | ```
70 | 使用提示: 运行环境要求 python2.6/2.7
71 | ```
72 |
73 | ### python/proxy_requests.py
74 | 使用requests请求Http代理服务器, 支持使用白名单访问http和https网页, 使用用户名密码不支持访问https网页
75 | ```
76 | 使用提示: requests不是python原生库, 需要安装才能使用: pip install requests
77 | ```
78 |
79 | ### python/selenium_chrome_http.py
80 | 以`白名单`认证形式使用selenium库和Chrome驱动请求Http代理服务器
81 | ```
82 | 使用提示:
83 | * 基于白名单的http/https代理Chrome
84 | * 运行环境要求`python2.x + selenium + Chrome + Chromedriver + xvfb`
85 | * 安装xvfb:`pip install xvfbwrapper`
86 | * Ubuntu下开发环境配置参考: https://christopher.su/2015/selenium-chromedriver-ubuntu/
87 | ```
88 |
89 | ### python/selenium_phantomjs_http.py
90 | 以`白名单`认证形式使用selenium库和PhantomJS驱动请求Http代理服务器
91 | ```
92 | 使用提示:
93 | * 基于白名单的http/https代理PhantomJS
94 | * 运行环境要求`python2.x + selenium + PhantomJS`
95 | * `selenium + PhantomJS` 可以直接使用pip安装
96 | ```
97 |
98 | ### python/selenium_phantomjs_http_auth.py
99 | 以`用户名密码`认证形式使用selenium库和PhantomJS驱动请求Http代理服务器
100 | ```
101 | 使用提示:
102 | * 基于密码认证的http/https代理PhantomJS
103 | * 运行环境要求`python2.x + selenium + PhantomJS`
104 | * `selenium + PhantomJS` 可以直接使用pip安装
105 | ```
106 |
107 | ## Http代理-Python3部分:
108 |
109 | ### python3/http_urllib.py
110 | 使用`urllib`库请求Http代理服务器, 支持访问http和https网页
111 | ```
112 | 使用提示:
113 | * 基于urllib的代码样例同时支持访问http和https网页,推荐使用
114 | * 运行环境要求 python3.x
115 | ```
116 |
117 | ### python3/http_requests.py
118 | 使用`requests`库请求Http代理服务器, 支持使用白名单访问http,https网页, 使用用户名密码不支持访问https网页
119 | ```
120 | 使用提示:
121 | * 基于requests的代码样例支持使用白名单访问http,https网页,使用用户名密码不支持访问https网页
122 | * requests不是python原生库,需要安装才能使用: pip install requests
123 | ```
124 |
125 | ## Http代理-Scrapy部分:
126 | scrapy项目标准目录结构如下:
127 |
128 | 
129 |
130 | ### scrapy/scrapy_proxy/scrapy_proxy/middlewares.py
131 | 设置代理
132 |
133 | ### scrapy/scrapy_proxy/scrapy_proxy/settings.py
134 | 使代理生效
135 |
136 | ### scrapy/scrapy_proxy/scrapy_proxy/spiders/main.py
137 | 使用代理
138 | ```
139 | 使用提示:
140 | * http/https网页均可适用
141 | * scrapy不是python原生库,需要安装才能使用: pip install scrapy
142 | * 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl main
143 | ```
144 |
145 | ## Socks代理-Python2部分:
146 |
147 | ### python/socks_requests.py
148 | 使用`requests`库请求Socks代理服务器
149 | ```
150 | 使用提示:
151 | * http/https网页均可适用
152 | * 运行环境要求: requests >= 2.10.0
153 | * socks支持是`requests`的额外特性,需要安装才能使用: pip install requests[socks]
154 | ```
155 |
156 | ### python/socks_urllib2.py
157 | 使用`urllib2`库请求Socks代理服务器
158 | ```
159 | 使用提示:
160 | * 运行环境要求 python2.6 / 2.7
161 | * http/https网页均可适用
162 | * 使用此样例需要安装PySocks:pip install PySocks
163 | ```
164 |
165 | ### python/selenium_chrome_sock5.py
166 | 以`白名单`认证形式使用selenium库和Chrome驱动请求Socks代理服务器
167 | ```
168 | 使用提示:
169 | * 运行环境要求 python2.x + selenium + chrome + chrome driver + xvfb
170 | * socks5代理网页均可适用
171 | * 安装xvfb:pip install xvfbwrapper
172 | * 开发环境配置参考: https://christopher.su/2015/selenium-chromedriver-ubuntu/
173 | ```
174 |
175 | ### python/selenium_phantomjs_sock5.py
176 | 以`白名单`认证形式使用selenium库和PhantomJS驱动请求Socks代理服务器
177 | ```
178 | 使用提示:
179 | * 运行环境要求: python2.x
180 | * socks5代理网页均可适用
181 | * 使用此样例需要安装 selenium、PhantomJS
182 | * PhantomJS 可以直接使用pip安装
183 | ```
184 |
185 | ### python/selenium_phantomjs_sock5_auth.py
186 | 以`用户名密码`认证形式使用selenium库和PhantomJS驱动请求Socks代理服务器
187 | ```
188 | 使用提示:
189 | * 运行环境要求 python2.x
190 | * socks5代理http/https网页均可适用
191 | * 使用此样例需要安装 selenium、PhantomJS
192 | * PhantomJS 可以直接使用pip安装
193 | ```
194 |
195 | ## Socks代理-Python3部分:
196 |
197 | ### python3/proxy_requests_socks.py
198 | 使用`requests`库请求Socks代理服务器, http/https网页均适用
199 | ```
200 | 使用提示:
201 | * http/https网页均可适用
202 | * 运行环境要求:requests >= 2.10.0
203 | * socks支持是requests的额外特性,需要安装才能使用: pip install requests[socks]
204 | ```
205 |
206 | ### python3/proxy_urllib.py
207 | 使用`urllib`库请求Socks代理服务器, http/https网页均适用
208 | ```
209 | 使用提示:
210 | * http/https网页均可适用
211 | * 请先安装socks: pip install pysocks
212 | ```
213 |
214 | ## 隧道代理-Python2部分
215 | ### python/tps_proxy_urllib2.py
216 | 使用urllib2请求隧道代理服务器, 支持访问http和https网页, 推荐使用
217 | ```
218 | 使用提示: 运行环境要求 python2.6/2.7
219 | ```
220 |
221 | ### python/tps_proxy_request.py
222 | 使用requests请求隧道代理服务器, 支持访问http和https网页。推荐使用
223 | ```
224 | 使用提示:
225 | * 基于requests的代码样例支持支持访问http和https网页。推荐使用
226 | * requests不是python原生库,需要安装才能使用: pip install requests
227 | ```
228 |
229 | ## 隧道代理-python3部分
230 | ### python3/tps_proxy_request.py
231 | 使用requests请求隧道代理服务器, 支持访问http和https网页。推荐使用
232 | ```
233 | 使用提示:
234 | * 基于requests的代码样例支持支持访问http和https网页。推荐使用
235 | *
236 | ```
237 |
238 | ### python3/tps_proxy_urllib.py
239 | 使用urllib2请求隧道代理服务器, 支持访问http和https网页, 推荐使用
240 | ```
241 | 使用提示: 运行环境要求 python3
242 | ```
243 |
244 | ## 隧道代理-Scrapy部分:
245 | scrapy项目标准目录结构如下:
246 |
247 | 
248 |
249 | ### scrapy_proxy/scrapy_proxy/middlewares.py
250 | 设置代理
251 |
252 | ### scrapy_proxy/scrapy_proxy/settings.py
253 | 使代理生效
254 |
255 | ### scrapy_proxy/scrapy_proxy/spiders/main.py
256 | 使用代理
257 |
258 | ```
259 | 使用提示:
260 | * http/https网页均可适用
261 | * scrapy不是python原生库,需要安装才能使用: pip install scrapy
262 | * 在第一级scrapy_proxy目录下运行如下命令查看结果:scrapy crawl main
263 | ```
264 |
265 |
266 |
267 |
268 | ## 技术支持
269 |
270 | 如果您发现代码有任何问题, 请提交`Issue`。
271 |
272 | 欢迎提交`Pull request`以使代码样例更加完善。
273 |
274 | 获取更多关于调用API和代理服务器使用的资料,请参考[开发者指南](https://help.kuaidaili.com/dev/api/)。
275 |
276 | * 技术支持微信:kuaidaili
277 | * 技术支持QQ:800849628
278 |
--------------------------------------------------------------------------------
/examples/api/py2_urllib2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #-*- coding: utf-8 -*-
3 |
4 | """使用urllib2调用API接口
5 | """
6 |
7 | import urllib2
8 | import zlib
9 |
10 | #api链接
11 | api_url = "http://dev.kdlapi.com/api/getproxy/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=100&protocol=1&method=2&an_ha=1&sep=1"
12 |
13 | req = urllib2.Request(api_url)
14 | req.add_header("Accept-Encoding", "Gzip") #使用gzip压缩传输数据让访问更快
15 | r = urllib2.urlopen(req)
16 |
17 | print r.code #获取Reponse的返回码
18 | content_encoding = r.headers.getheader("Content-Encoding")
19 | if content_encoding and "gzip" in content_encoding:
20 | print zlib.decompress(r.read(), 16+zlib.MAX_WBITS) #获取页面内容
21 | else:
22 | print r.read() #获取页面内容
--------------------------------------------------------------------------------
/examples/api/py3_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """使用requests请求代理服务器
5 | 请求http和https网页均适用
6 | """
7 |
8 | import requests
9 | import random
10 |
11 | page_url = "http://dev.kdlapi.com/testproxy" # 要访问的目标网页
12 | # API接口,返回格式为json
13 | api_url = "http://dps.kdlapi.com/api/getdps?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&format=json&sep=1"
14 |
15 | # API接口返回的ip
16 | proxy_ip = requests.get(api_url).json()['data']['proxy_list']
17 |
18 | # 用户名密码认证(私密代理/独享代理)
19 | username = "username"
20 | password = "password"
21 |
22 | proxies = {
23 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)},
24 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {'user': username, 'pwd': password, 'proxy': random.choice(proxy_ip)}
25 | }
26 | headers = {
27 | "Accept-Encoding": "Gzip", # 使用gzip压缩传输数据让访问更快
28 | }
29 | r = requests.get(page_url, proxies=proxies, headers=headers)
30 | print(r.status_code) # 获取Response的返回码
31 |
32 | if r.status_code == 200:
33 | r.enconding = "utf-8" # 设置返回内容的编码
34 | print(r.content) # 获取页面内容
35 |
--------------------------------------------------------------------------------
/examples/api/py3_urllib.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """使用urllib.request调用API接口(在python3中urllib2被改为urllib.request)
5 | """
6 |
7 | import urllib.request
8 | import zlib
9 |
10 | #api链接
11 | api_url = "http://dev.kdlapi.com/api/getproxy/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=100&protocol=1&method=2&an_ha=1&sep=1"
12 |
13 | headers = {"Accept-Encoding": "Gzip"} #使用gzip压缩传输数据让访问更快
14 |
15 | req = urllib.request.Request(url=api_url, headers=headers)
16 |
17 | # 请求api链接
18 | res = urllib.request.urlopen(req)
19 |
20 | print(res.code) # 获取Reponse的返回码
21 | content_encoding = res.headers.get('Content-Encoding')
22 | if content_encoding and "gzip" in content_encoding:
23 | print(zlib.decompress(res.read(), 16 + zlib.MAX_WBITS).decode('utf-8')) #获取页面内容
24 | else:
25 | print(res.read().decode('utf-8')) #获取页面内容
--------------------------------------------------------------------------------
/examples/http_proxy/phantomjs_demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | #先下载phantomjs包文件,再填入phantomjs.exe的路径 (路径不要包含中文, 下载地址:https://mirrors.huaweicloud.com/phantomjs/)
8 | executable_path = '${executable_path}'
9 | service_args=[
10 | '--proxy=host:port', #此处替换您的代理ip,如59.38.241.25:23918
11 | '--proxy-type=http',
12 | '--proxy-auth=username:password' #用户名密码
13 | ]
14 | driver=webdriver.PhantomJS(service_args=service_args,executable_path=executable_path)
15 | driver.get('https://dev.kdlapi.com/testproxy')
16 |
17 | print(driver.page_source)
18 | time.sleep(3)
19 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy/proxy_pool.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 |
4 |
5 | import time
6 | import random
7 | import threading
8 |
9 | import requests
10 |
11 |
12 | class ProxyPool():
13 |
14 | def __init__(self, secret_id, secret_token, proxy_count):
15 | self.secret_id = secret_id
16 | self.signature = secret_token
17 | self.proxy_count = proxy_count if proxy_count < 50 else 50 # 池子维护的IP总数,建议一般不要超过50
18 | self.alive_proxy_list = [] # 活跃IP列表
19 |
20 | def _fetch_proxy_list(self, count):
21 | """调用快代理API获取代理IP列表"""
22 | try:
23 | res = requests.get("http://dps.kdlapi.com/api/getdps/?secret_id=%s&signature=%s&num=%s&pt=1&sep=1&f_et=1&format=json" % (self.secret_id, self.signature, count))
24 | return [proxy.split(',') for proxy in res.json().get('data').get('proxy_list')]
25 | except:
26 | print("API获取IP异常,请检查订单")
27 | return []
28 |
29 | def _init_proxy(self):
30 | """初始化IP池"""
31 | self.alive_proxy_list = self._fetch_proxy_list(self.proxy_count)
32 |
33 | def add_alive_proxy(self, add_count):
34 | """导入新的IP, 参数为新增IP数"""
35 | self.alive_proxy_list.extend(self._fetch_proxy_list(add_count))
36 |
37 | def get_proxy(self):
38 | """从IP池中获取IP"""
39 | return random.choice(self.alive_proxy_list)[0] if self.alive_proxy_list else ""
40 |
41 | def run(self):
42 | sleep_seconds = 1
43 | self._init_proxy()
44 | while True:
45 | for proxy in self.alive_proxy_list:
46 | proxy[1] = float(proxy[1]) - sleep_seconds # proxy[1]代表此IP的剩余可用时间
47 | if proxy[1] <= 3:
48 | self.alive_proxy_list.remove(proxy) # IP还剩3s时丢弃此IP
49 | if len(self.alive_proxy_list) < self.proxy_count:
50 | self.add_alive_proxy(self.proxy_count - len(self.alive_proxy_list))
51 | time.sleep(sleep_seconds)
52 |
53 | def start(self):
54 | """开启子线程更新IP池"""
55 | t = threading.Thread(target=self.run)
56 | t.setDaemon(True) # 将子线程设为守护进程,主线程不会等待子线程结束,主线程结束子线程立刻结束
57 | t.start()
58 |
59 |
60 | def parse_url(proxy):
61 | # 用户名密码认证(私密代理/独享代理)
62 | username = "username"
63 | password = "password"
64 | proxies = {
65 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password,"proxy": proxy},
66 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password,"proxy": proxy}
67 | }
68 |
69 | # 白名单方式(需提前设置白名单)
70 | # proxies = {
71 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
72 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip}
73 | # }
74 |
75 | # 要访问的目标网页
76 | target_url = "https://dev.kdlapi.com/testproxy"
77 | # 使用代理IP发送请求
78 | response = requests.get(target_url, proxies=proxies)
79 | # 获取页面内容
80 | if response.status_code == 200:
81 | print(response.text)
82 |
83 |
84 | if __name__ == '__main__':
85 | proxy_pool = ProxyPool('o1fjh1re9o28876h7c08', 'xxxxxx', 30) # 订单SecretId, 签名(secret_token), 池子中维护的IP数
86 | proxy_pool.start()
87 | time.sleep(1) # 等待IP池初始化
88 |
89 | proxy = proxy_pool.get_proxy() # 从IP池中提取IP
90 | if proxy:
91 | parse_url(proxy)
--------------------------------------------------------------------------------
/examples/http_proxy/py2_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 提取代理API接口,获取1个代理IP
12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
13 |
14 | # 获取API接口返回的代理IP
15 | proxy_ip = requests.get(api_url).text
16 |
17 | # 用户名密码认证(私密代理/独享代理)
18 | username = "username"
19 | password = "password"
20 | proxies = {
21 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
22 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
23 | }
24 |
25 | # 白名单方式(需提前设置白名单)
26 | # proxies = {
27 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
28 | # "https": "https://%(proxy)s/" % {"proxy": proxy_ip}
29 | # }
30 |
31 | # 要访问的目标网页
32 | target_url = "https://dev.kdlapi.com/testproxy"
33 |
34 | # 使用代理IP发送请求
35 | response = requests.get(target_url, proxies=proxies)
36 |
37 | # 获取页面内容
38 | if response.status_code == 200:
39 | print response.text
--------------------------------------------------------------------------------
/examples/http_proxy/py2_urllib2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用urllib2请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import urllib2
10 | import ssl
11 |
12 | # 全局取消证书验证,避免访问https网页报错
13 | ssl._create_default_https_context = ssl._create_unverified_context
14 |
15 | # 提取代理API接口,获取1个代理IP
16 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
17 |
18 | # 获取API接口返回的IP
19 | proxy_ip = urllib2.urlopen(api_url).read()
20 |
21 | # 用户名密码认证(私密代理/独享代理)
22 | username = "username"
23 | password = "password"
24 | proxies = {
25 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
26 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
27 | }
28 |
29 | # 白名单方式(需提前设置白名单)
30 | # proxies = {
31 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
32 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip}
33 | # }
34 |
35 | # 要访问的目标网页
36 | target_url = "https://dev.kdlapi.com/testproxy"
37 |
38 | # 使用代理IP发送请求
39 | proxy_support = urllib2.ProxyHandler(proxies)
40 | opener = urllib2.build_opener(proxy_support)
41 | urllib2.install_opener(opener)
42 | response = urllib2.urlopen(target_url)
43 |
44 | # 获取页面内容
45 | if response.code == 200:
46 | print response.read()
--------------------------------------------------------------------------------
/examples/http_proxy/py3_aiohttp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用aiohttp请求代理服务器
6 | 请求http和https网页均适用
7 |
8 | """
9 | import random
10 | import asyncio
11 |
12 |
13 | import aiohttp
14 | import requests
15 |
16 | page_url = "http://icanhazip.com/" # 要访问的目标网页
17 |
18 | # API接口,返回格式为json
19 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=5&pt=1&format=json&sep=1" # API接口
20 |
21 | # API接口返回的proxy_list
22 | proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
23 |
24 | # 用户名密码认证(私密代理/独享代理)
25 | username = "username"
26 | password = "password"
27 |
28 | proxy_auth = aiohttp.BasicAuth(username, password)
29 |
30 |
31 | async def fetch(url):
32 | async with aiohttp.ClientSession() as session:
33 | async with session.get(url, proxy="http://" + random.choice(proxy_list), proxy_auth=proxy_auth) as resp:
34 | content = await resp.read()
35 | print(f"status_code: {resp.status}, content: {content}")
36 |
37 |
38 | def run():
39 | loop = asyncio.get_event_loop()
40 | # 异步发出5次请求
41 | tasks = [fetch(page_url) for _ in range(5)]
42 | loop.run_until_complete(asyncio.wait(tasks))
43 |
44 |
45 | if __name__ == '__main__':
46 | run()
--------------------------------------------------------------------------------
/examples/http_proxy/py3_feapder.py:
--------------------------------------------------------------------------------
1 | import feapder
2 |
3 |
4 | class Py3Feapder(feapder.AirSpider):
5 | def start_requests(self):
6 | yield feapder.Request("https://dev.kdlapi.com/testproxy")
7 |
8 | def download_midware(self, request):
9 | # 提取代理API接口,获取1个代理IP
10 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
11 |
12 | # 获取API接口返回的代理IP
13 | proxy_ip = feapder.Request(api_url).get_response().text
14 |
15 | # 用户名密码认证(私密代理/独享代理)
16 | username = "username"
17 | password = "password"
18 | proxies = {
19 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
20 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
21 | }
22 |
23 | # 白名单认证(需提前设置白名单)
24 | # proxies = {
25 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
26 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip}
27 | # }
28 |
29 | request.proxies = proxies
30 | return request
31 |
32 | def parse(self, request, response):
33 | print(response.text)
34 |
35 |
36 | if __name__ == "__main__":
37 | Py3Feapder().start()
38 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_httpx.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import random
10 | import asyncio
11 |
12 | import httpx
13 | import requests
14 |
15 | page_url = "http://icanhazip.com/" # 要访问的目标网页
16 |
17 | # API接口,返回格式为json
18 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&pt=1&format=json&sep=1" # API接口
19 |
20 | # API接口返回的proxy_list
21 | proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
22 |
23 | # 用户名密码认证(私密代理/独享代理)
24 | username = "username"
25 | password = "password"
26 |
27 |
28 | async def fetch(url):
29 | proxies = {
30 | "http": f"http://{username}:{password}@{random.choice(proxy_list)}",
31 | "https": f"http://{username}:{password}@{random.choice(proxy_list)}",
32 | }
33 | async with httpx.AsyncClient(proxies=proxies, timeout=10) as client:
34 | resp = await client.get(url)
35 | print(f"status_code: {resp.status_code}, content: {resp.content}")
36 |
37 |
38 | def run():
39 | loop = asyncio.get_event_loop()
40 | # 异步发出5次请求
41 | tasks = [fetch(page_url) for _ in range(5)]
42 | loop.run_until_complete(asyncio.wait(tasks))
43 |
44 |
45 | if __name__ == '__main__':
46 | run()
--------------------------------------------------------------------------------
/examples/http_proxy/py3_playwright.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 | """
4 | 使用requests请求代理服务器
5 | 使用Playwright添加IP代理
6 | """
7 | import requests
8 | from playwright.sync_api import sync_playwright
9 |
10 |
11 | # 通过API获取代理
12 | def get_proxy(api, params):
13 | r = requests.get(api, params=params)
14 | if r.status_code == 200:
15 | return r.text
16 | else:
17 | return None
18 |
19 |
20 | # 使用Playwright添加私密代理
21 | def playwright_use_proxy(proxy_server):
22 | if not proxy_server:
23 | print('获取代理失败')
24 | return
25 | with sync_playwright() as p:
26 | browser = p.chromium.launch(proxy={"server": f'http://{proxy_server}'})
27 | page = browser.new_page()
28 | page.goto("https://dev.kdlapi.com/testproxy")
29 | content = page.content()
30 | browser.close()
31 | return content
32 |
33 |
34 | def main():
35 | # 定义API配置
36 | params = {
37 | 'num': 1,
38 | 'pt': 1,
39 | 'sep': 1,
40 | 'secret_id': 'your secret_id',
41 | 'signature': 'yoru signature',
42 | }
43 | api = 'https://dps.kdlapi.com/api/getdps/'
44 | proxy = get_proxy(api, params)
45 | content = playwright_use_proxy(proxy)
46 | print(content)
47 |
48 |
49 | if __name__ == '__main__':
50 | main()
51 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_pyppeteer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 |
4 | """
5 | 请求http和https网页均适用
6 | """
7 |
8 | import asyncio
9 |
10 | import requests
11 | from pyppeteer import launch
12 |
13 | # 提取代理API接口,获取1个代理IP
14 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
15 | # 获取API接口返回的代理IP
16 | proxy_ip = requests.get(api_url).text
17 | proxy = "http://" + proxy_ip
18 |
19 |
20 | def accounts():
21 | # 用户名密码认证(私密代理/独享代理)
22 | username = "username"
23 | password = "password"
24 | account = {"username": username, "password": password}
25 | return account
26 |
27 |
28 | async def main():
29 | # 要访问的目标网页
30 | target_url = "https://dev.kdlapi.com/testproxy"
31 |
32 | browser = await launch({'headless': False, 'args': ['--disable-infobars', '--proxy-server=' + proxy]})
33 | page = await browser.newPage()
34 | await page.authenticate(accounts()) # 白名单方式,注释本行(需提前设置白名单)
35 | await page.setViewport({'width': 1920, 'height': 1080})
36 | # 使用代理IP发送请求
37 | await page.goto(target_url)
38 | await asyncio.sleep(209)
39 | await browser.close()
40 |
41 | asyncio.get_event_loop().run_until_complete(main())
42 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 提取代理API接口,获取1个代理IP
12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
13 |
14 | # 获取API接口返回的代理IP
15 | proxy_ip = requests.get(api_url).text
16 |
17 | # 用户名密码认证(私密代理/独享代理)
18 | username = "username"
19 | password = "password"
20 | proxies = {
21 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
22 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
23 | }
24 |
25 | # 白名单方式(需提前设置白名单)
26 | # proxies = {
27 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
28 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip}
29 | # }
30 |
31 | # 要访问的目标网页
32 | target_url = "https://dev.kdlapi.com/testproxy"
33 |
34 | # 使用代理IP发送请求
35 | response = requests.get(target_url, proxies=proxies)
36 |
37 | # 获取页面内容
38 | if response.status_code == 200:
39 | print(response.text)
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = tutorial.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = tutorial
12 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaidaili/python-sdk/87d895b68c3ec1aed905d524d02f842ae6426468/examples/http_proxy/py3_scrapy/tutorial/__init__.py
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/items.py:
--------------------------------------------------------------------------------
1 | # Define here the models for your scraped items
2 | #
3 | # See documentation in:
4 | # https://docs.scrapy.org/en/latest/topics/items.html
5 |
6 | import scrapy
7 |
8 |
9 | class TutorialItem(scrapy.Item):
10 | # define the fields for your item here like:
11 | # name = scrapy.Field()
12 | pass
13 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/middlewares.py:
--------------------------------------------------------------------------------
1 | # Define here the models for your spider middleware
2 | #
3 | # See documentation in:
4 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
5 |
6 | from scrapy import signals
7 | from .myextend import pro
8 | import random
9 | # useful for handling different item types with a single interface
10 | from itemadapter import is_item, ItemAdapter
11 |
12 |
13 | class TutorialSpiderMiddleware:
14 | # Not all methods need to be defined. If a method is not defined,
15 | # scrapy acts as if the spider middleware does not modify the
16 | # passed objects.
17 |
18 | @classmethod
19 | def from_crawler(cls, crawler):
20 | # This method is used by Scrapy to create your spiders.
21 | s = cls()
22 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
23 | return s
24 |
25 | def process_spider_input(self, response, spider):
26 | # Called for each response that goes through the spider
27 | # middleware and into the spider.
28 |
29 | # Should return None or raise an exception.
30 | return None
31 |
32 | def process_spider_output(self, response, result, spider):
33 | # Called with the results returned from the Spider, after
34 | # it has processed the response.
35 |
36 | # Must return an iterable of Request, or item objects.
37 | for i in result:
38 | yield i
39 |
40 | def process_spider_exception(self, response, exception, spider):
41 | # Called when a spider or process_spider_input() method
42 | # (from other spider middleware) raises an exception.
43 |
44 | # Should return either None or an iterable of Request or item objects.
45 | pass
46 |
47 | def process_start_requests(self, start_requests, spider):
48 | # Called with the start requests of the spider, and works
49 | # similarly to the process_spider_output() method, except
50 | # that it doesn’t have a response associated.
51 |
52 | # Must return only requests (not items).
53 | for r in start_requests:
54 | yield r
55 |
56 | def spider_opened(self, spider):
57 | spider.logger.info('Spider opened: %s' % spider.name)
58 |
59 |
60 | class TutorialDownloaderMiddleware:
61 | # Not all methods need to be defined. If a method is not defined,
62 | # scrapy acts as if the downloader middleware does not modify the
63 | # passed objects.
64 |
65 | @classmethod
66 | def from_crawler(cls, crawler):
67 | # This method is used by Scrapy to create your spiders.
68 | s = cls()
69 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
70 | return s
71 |
72 | def process_request(self, request, spider):
73 | # Called for each request that goes through the downloader
74 | # middleware.
75 |
76 | # Must either:
77 | # - return None: continue processing this request
78 | # - or return a Response object
79 | # - or return a Request object
80 | # - or raise IgnoreRequest: process_exception() methods of
81 | # installed downloader middleware will be called
82 | return None
83 |
84 | def process_response(self, request, response, spider):
85 | # Called with the response returned from the downloader.
86 |
87 | # Must either;
88 | # - return a Response object
89 | # - return a Request object
90 | # - or raise IgnoreRequest
91 | return response
92 |
93 | def process_exception(self, request, exception, spider):
94 | # Called when a download handler or a process_request()
95 | # (from other downloader middleware) raises an exception.
96 |
97 | # Must either:
98 | # - return None: continue processing this exception
99 | # - return a Response object: stops process_exception() chain
100 | # - return a Request object: stops process_exception() chain
101 | pass
102 |
103 | def spider_opened(self, spider):
104 | spider.logger.info('Spider opened: %s' % spider.name)
105 |
106 |
107 | class ProxyDownloaderMiddleware:
108 |
109 | def process_request(self, request, spider):
110 | proxy = random.choice(pro.proxy_list)
111 |
112 | # 用户名密码认证(私密代理/独享代理)
113 | username = "username"
114 | password = "password"
115 | request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy}
116 |
117 | # 白名单认证(私密代理/独享代理)
118 | # request.meta['proxy'] = "http://%(proxy)s/" % {"proxy": proxy}
119 | return None
120 |
121 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/myextend.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -- coding: utf-8 --
3 | import time
4 | import threading
5 |
6 | import requests
7 | from scrapy import signals
8 |
9 | # 提取代理IP的api
10 | api_url = 'http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=10&pt=1&format=json&sep=1'
11 | foo = True
12 |
13 | class Proxy:
14 |
15 | def __init__(self, ):
16 | self._proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
17 |
18 | @property
19 | def proxy_list(self):
20 | return self._proxy_list
21 |
22 | @proxy_list.setter
23 | def proxy_list(self, list):
24 | self._proxy_list = list
25 |
26 |
27 | pro = Proxy()
28 | print(pro.proxy_list)
29 |
30 |
31 | class MyExtend:
32 |
33 | def __init__(self, crawler):
34 | self.crawler = crawler
35 | # 将自定义方法绑定到scrapy信号上,使程序与spider引擎同步启动与关闭
36 | # scrapy信号文档: https://www.osgeo.cn/scrapy/topics/signals.html
37 | # scrapy自定义拓展文档: https://www.osgeo.cn/scrapy/topics/extensions.html
38 | crawler.signals.connect(self.start, signals.engine_started)
39 | crawler.signals.connect(self.close, signals.spider_closed)
40 |
41 | @classmethod
42 | def from_crawler(cls, crawler):
43 | return cls(crawler)
44 |
45 | def start(self):
46 | t = threading.Thread(target=self.extract_proxy)
47 | t.start()
48 |
49 | def extract_proxy(self):
50 | while foo:
51 | pro.proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
52 | #设置每15秒提取一次ip
53 | time.sleep(15)
54 |
55 | def close(self):
56 | global foo
57 | foo = False
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/pipelines.py:
--------------------------------------------------------------------------------
1 | # Define your item pipelines here
2 | #
3 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
4 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
5 |
6 |
7 | # useful for handling different item types with a single interface
8 | from itemadapter import ItemAdapter
9 |
10 |
11 | class TutorialPipeline:
12 | def process_item(self, item, spider):
13 | return item
14 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/settings.py:
--------------------------------------------------------------------------------
1 | # Scrapy settings for tutorial project
2 | #
3 | # For simplicity, this file contains only settings considered important or
4 | # commonly used. You can find more settings consulting the documentation:
5 | #
6 | # https://docs.scrapy.org/en/latest/topics/settings.html
7 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
8 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
9 |
10 | BOT_NAME = 'tutorial'
11 |
12 | SPIDER_MODULES = ['tutorial.spiders']
13 | NEWSPIDER_MODULE = 'tutorial.spiders'
14 |
15 |
16 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
17 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)'
18 |
19 | # Obey robots.txt rules
20 | ROBOTSTXT_OBEY = False
21 |
22 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
23 | #CONCURRENT_REQUESTS = 32
24 |
25 | # Configure a delay for requests for the same website (default: 0)
26 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
27 | # See also autothrottle settings and docs
28 | #DOWNLOAD_DELAY = 3
29 | # The download delay setting will honor only one of:
30 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
31 | #CONCURRENT_REQUESTS_PER_IP = 16
32 |
33 | # Disable cookies (enabled by default)
34 | #COOKIES_ENABLED = False
35 |
36 | # Disable Telnet Console (enabled by default)
37 | #TELNETCONSOLE_ENABLED = False
38 |
39 | # Override the default request headers:
40 | #DEFAULT_REQUEST_HEADERS = {
41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
42 | # 'Accept-Language': 'en',
43 | #}
44 |
45 | # Enable or disable spider middlewares
46 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
47 | #SPIDER_MIDDLEWARES = {
48 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543,
49 | #}
50 |
51 | # Enable or disable downloader middlewares
52 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
53 | DOWNLOADER_MIDDLEWARES = {
54 | 'tutorial.middlewares.ProxyDownloaderMiddleware': 100,
55 | }
56 | LOG_LEVEL = 'WARNING'
57 | # Enable or disable extensions
58 | # See https://docs.scrapy.org/en/latest/topics/extensions.html
59 | EXTENSIONS = {
60 | 'tutorial.myextend.MyExtend': 300,
61 | }
62 |
63 | # Configure item pipelines
64 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
65 | #ITEM_PIPELINES = {
66 | # 'tutorial.pipelines.TutorialPipeline': 300,
67 | #}
68 |
69 | # Enable and configure the AutoThrottle extension (disabled by default)
70 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
71 | #AUTOTHROTTLE_ENABLED = True
72 | # The initial download delay
73 | #AUTOTHROTTLE_START_DELAY = 5
74 | # The maximum download delay to be set in case of high latencies
75 | #AUTOTHROTTLE_MAX_DELAY = 60
76 | # The average number of requests Scrapy should be sending in parallel to
77 | # each remote server
78 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
79 | # Enable showing throttling stats for every response received:
80 | #AUTOTHROTTLE_DEBUG = False
81 |
82 | # Enable and configure HTTP caching (disabled by default)
83 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
84 | #HTTPCACHE_ENABLED = True
85 | #HTTPCACHE_EXPIRATION_SECS = 0
86 | #HTTPCACHE_DIR = 'httpcache'
87 | #HTTPCACHE_IGNORE_HTTP_CODES = []
88 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
89 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/examples/http_proxy/py3_scrapy/tutorial/spiders/kdl_spiders.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -- coding: utf-8 --
3 | import scrapy
4 |
5 | class KdlSpider(scrapy.spiders.Spider):
6 | name = "kdl"
7 |
8 | def start_requests(self):
9 | url = "https://dev.kdlapi.com/testproxy"
10 | yield scrapy.Request(url, callback=self.parse)
11 |
12 | def parse(self, response):
13 | print(response.status)
--------------------------------------------------------------------------------
/examples/http_proxy/py3_urllib.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用urllib请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import urllib.request
10 | import ssl
11 |
12 | # 全局取消证书验证,避免访问https网页报错
13 | ssl._create_default_https_context = ssl._create_unverified_context
14 |
15 | # 提取代理API接口,获取1个代理IP
16 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=1&sep=1"
17 |
18 | # 获取API接口返回的IP
19 | proxy_ip = urllib.request.urlopen(api_url).read().decode('utf-8')
20 |
21 | # 用户名密码认证(私密代理/独享代理)
22 | username = "username"
23 | password = "password"
24 | proxies = {
25 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
26 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
27 | }
28 |
29 | # 白名单方式(需提前设置白名单)
30 | # proxies = {
31 | # "http": "http://%(proxy)s/" % {"proxy": proxy_ip},
32 | # "https": "http://%(proxy)s/" % {"proxy": proxy_ip}
33 | # }
34 |
35 | # 要访问的目标网页
36 | target_url = "https://dev.kdlapi.com/testproxy"
37 |
38 | # 使用代理IP发送请求
39 | proxy_support = urllib.request.ProxyHandler(proxies)
40 | opener = urllib.request.build_opener(proxy_support)
41 | urllib.request.install_opener(opener)
42 | response = urllib.request.urlopen(target_url)
43 |
44 | # 获取页面内容
45 | if response.code == 200:
46 | print(response.read().decode('utf-8'))
--------------------------------------------------------------------------------
/examples/http_proxy/py3_websocket.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用HTTP代理发送websocket请求
6 | """
7 | import gzip
8 | import zlib
9 |
10 | import websocket
11 |
12 | OPCODE_DATA = (websocket.ABNF.OPCODE_TEXT, websocket.ABNF.OPCODE_BINARY)
13 |
14 | url = "ws://echo.websocket.org/"
15 |
16 | proxies = {
17 | "http_proxy_host": "59.38.241.25",
18 | "http_proxy_port": 23916,
19 | "http_proxy_auth": ("username", "password"),
20 | }
21 |
22 | ws = websocket.create_connection(url, **proxies)
23 |
24 |
25 | def recv():
26 | try:
27 | frame = ws.recv_frame()
28 | except websocket.WebSocketException:
29 | return websocket.ABNF.OPCODE_CLOSE, None
30 | if not frame:
31 | raise websocket.WebSocketException("Not a valid frame %s" % frame)
32 | elif frame.opcode in OPCODE_DATA:
33 | return frame.opcode, frame.data
34 | elif frame.opcode == websocket.ABNF.OPCODE_CLOSE:
35 | ws.send_close()
36 | return frame.opcode, None
37 | elif frame.opcode == websocket.ABNF.OPCODE_PING:
38 | ws.pong(frame.data)
39 | return frame.opcode, frame.data
40 |
41 | return frame.opcode, frame.data
42 |
43 |
44 | def recv_ws():
45 | opcode, data = recv()
46 | if opcode == websocket.ABNF.OPCODE_CLOSE:
47 | return
48 | if opcode == websocket.ABNF.OPCODE_TEXT and isinstance(data, bytes):
49 | data = str(data, "utf-8")
50 | if isinstance(data, bytes) and len(data) > 2 and data[:2] == b'\037\213': # gzip magick
51 | try:
52 | data = "[gzip] " + str(gzip.decompress(data), "utf-8")
53 | except Exception:
54 | pass
55 | elif isinstance(data, bytes):
56 | try:
57 | data = "[zlib] " + str(zlib.decompress(data, -zlib.MAX_WBITS), "utf-8")
58 | except Exception:
59 | pass
60 | if isinstance(data, bytes):
61 | data = repr(data)
62 |
63 | print("< " + data)
64 |
65 |
66 | def main():
67 | print("Press Ctrl+C to quit")
68 | while True:
69 | message = input("> ")
70 | ws.send(message)
71 | recv_ws()
72 |
73 |
74 | if __name__ == "__main__":
75 | try:
76 | main()
77 | except KeyboardInterrupt:
78 | print('\nbye')
79 | except Exception as e:
80 | print(e)
--------------------------------------------------------------------------------
/examples/http_proxy/py3_websocket_short.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- encoding: utf-8 -*-
3 |
4 | #!/usr/bin/env python
5 | # -*- encoding: utf-8 -*-
6 |
7 | import ssl
8 | import websocket
9 |
10 |
11 | def on_message(ws, message):
12 | print(message)
13 |
14 |
15 | def on_error(ws, error):
16 | print(error)
17 |
18 |
19 | def on_open(ws):
20 | data = '{}' # 此处填入您需要传给目标网站的json格式参数,如{"type":"web","data":{"_id":"xxxx"}}
21 | ws.send(data)
22 |
23 |
24 | def on_close(*args):
25 | print("### closed ###")
26 |
27 |
28 | proxies = {
29 | "http_proxy_host": "59.38.241.25",
30 | "http_proxy_port": 23916,
31 | "http_proxy_auth": ("username", "password"),
32 | }
33 |
34 |
35 | def start():
36 | websocket.enableTrace(True)
37 | target_url = 'ws://127.0.0.1:5000/socket.io/?EIO=4&transport=websocket' # 此处替换您的目标网站
38 | ws = websocket.WebSocketApp(
39 | url = target_url,
40 | header = [
41 | "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
42 | ],
43 | on_message=on_message,
44 | on_error=on_error,
45 | on_close=on_close,
46 | )
47 | ws.on_open = on_open
48 | ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE}, **proxies)
49 |
50 |
51 | if __name__ == "__main__":
52 | start()
53 |
--------------------------------------------------------------------------------
/examples/http_proxy/selenium_chrome_username_password.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import string
6 | import zipfile
7 | import time
8 |
9 |
10 | def create_proxyauth_extension(proxy_host, proxy_port, proxy_username, proxy_password, scheme='http', plugin_path=None):
11 | """代理认证插件
12 |
13 | args:
14 | proxy_host (str): 你的代理地址或者域名(str类型)
15 | proxy_port (int): 代理端口号(int类型)
16 | # 用户名密码认证(私密代理/独享代理)
17 | proxy_username (str):用户名(字符串)
18 | proxy_password (str): 密码 (字符串)
19 | kwargs:
20 | scheme (str): 代理方式 默认http
21 | plugin_path (str): 扩展的绝对路径
22 |
23 | return str -> plugin_path
24 | """
25 |
26 | if plugin_path is None:
27 | plugin_path = 'vimm_chrome_proxyauth_plugin.zip'
28 |
29 | manifest_json = """
30 | {
31 | "version": "1.0.0",
32 | "manifest_version": 2,
33 | "name": "Chrome Proxy",
34 | "permissions": [
35 | "proxy",
36 | "tabs",
37 | "unlimitedStorage",
38 | "storage",
39 | "",
40 | "webRequest",
41 | "webRequestBlocking"
42 | ],
43 | "background": {
44 | "scripts": ["background.js"]
45 | },
46 | "minimum_chrome_version":"22.0.0"
47 | }
48 | """
49 |
50 | background_js = string.Template(
51 | """
52 | var config = {
53 | mode: "fixed_servers",
54 | rules: {
55 | singleProxy: {
56 | scheme: "${scheme}",
57 | host: "${host}",
58 | port: parseInt(${port})
59 | },
60 | bypassList: ["foobar.com"]
61 | }
62 | };
63 |
64 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
65 |
66 | function callbackFn(details) {
67 | return {
68 | authCredentials: {
69 | username: "${username}",
70 | password: "${password}"
71 | }
72 | };
73 | }
74 |
75 | chrome.webRequest.onAuthRequired.addListener(
76 | callbackFn,
77 | {urls: [""]},
78 | ['blocking']
79 | );
80 | """
81 | ).substitute(
82 | host=proxy_host,
83 | port=proxy_port,
84 | username=proxy_username,
85 | password=proxy_password,
86 | scheme=scheme,
87 | )
88 | with zipfile.ZipFile(plugin_path, 'w') as zp:
89 | zp.writestr("manifest.json", manifest_json)
90 | zp.writestr("background.js", background_js)
91 | return plugin_path
92 |
93 |
94 | proxyauth_plugin_path = create_proxyauth_extension(
95 | proxy_host="${proxy_ip}", # 代理IP
96 | proxy_port="${proxy_port}", # 端口号
97 | # 用户名密码(私密代理/独享代理)
98 | proxy_username="${username}",
99 | proxy_password="${password}"
100 | )
101 |
102 |
103 | options = webdriver.ChromeOptions()
104 | options.add_extension(proxyauth_plugin_path)
105 | # ${chromedriver_path}: chromedriver驱动存放路径
106 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", options=options)
107 | driver.get("https://dev.kdlapi.com/testproxy")
108 |
109 | # 获取页面内容
110 | print(driver.page_source)
111 |
112 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
113 | time.sleep(3)
114 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy/selenium_chrome_whitelist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | options = webdriver.ChromeOptions()
8 | options.add_argument('--proxy-server=http://${ip:port}') # 代理IP:端口号
9 | # ${chromedriver_path}: chromedriver驱动存放路径
10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", options=options)
11 | driver.get("https://dev.kdlapi.com/testproxy")
12 |
13 | # 获取页面内容
14 | print(driver.page_source)
15 |
16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
17 | time.sleep(3)
18 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy/selenium_firefox_username_password.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | import time
5 |
6 | from seleniumwire import webdriver # pip install selenium-wire
7 |
8 | username = 'username' # 请替换您的用户名和密码
9 | password = 'password'
10 | proxy_ip = '59.38.241.25:23916' # 请替换您提取到的代理ip
11 | options = {
12 | 'proxy': {
13 | 'http': "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
14 | 'https': "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
15 | }
16 | }
17 |
18 | driver = webdriver.Firefox(seleniumwire_options=options,executable_path="${geckodriver_path}")
19 |
20 | driver.get('https://dev.kdlapi.com/testproxy')
21 |
22 | # 获取页面内容
23 | print(driver.page_source)
24 |
25 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
26 | time.sleep(3)
27 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy/selenium_firefox_whitelist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | fp = webdriver.FirefoxProfile()
8 | proxy = '${ip:port}'
9 | ip, port = proxy.split(":")
10 | port = int(port)
11 |
12 | # 设置代理配置
13 | fp.set_preference('network.proxy.type', 1)
14 | fp.set_preference('network.proxy.http', ip)
15 | fp.set_preference('network.proxy.http_port', port)
16 | fp.set_preference('network.proxy.ssl', ip)
17 | fp.set_preference('network.proxy.ssl_port', port)
18 |
19 | driver = webdriver.Firefox(executable_path="${geckodriver_path}", firefox_profile=fp)
20 | driver.get('https://dev.kdlapi.com/testproxy')
21 |
22 | # 获取页面内容
23 | print(driver.page_source)
24 |
25 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
26 | time.sleep(3)
27 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py2_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求隧道服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 隧道域名:端口号
12 | tunnel = "tpsXXX.kdlapi.com:15818"
13 |
14 | # 用户名密码方式
15 | username = "username"
16 | password = "password"
17 | proxies = {
18 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
19 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
20 | }
21 |
22 | # 白名单方式(需提前设置白名单)
23 | # proxies = {
24 | # "http": "http://%(proxy)s/" % {"proxy": tunnel},
25 | # "https": "http://%(proxy)s/" % {"proxy": tunnel}
26 | # }
27 |
28 | # 要访问的目标网页
29 | target_url = "https://dev.kdlapi.com/testproxy"
30 |
31 | # 使用隧道域名发送请求
32 | response = requests.get(target_url, proxies=proxies)
33 |
34 | # 获取页面内容
35 | if response.status_code == 200:
36 | print response.text
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py2_urllib2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用urllib2请求隧道服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import urllib2
10 | import ssl
11 |
12 | # 全局取消证书验证,避免访问https网页报错
13 | ssl._create_default_https_context = ssl._create_unverified_context
14 |
15 | # 隧道域名:端口号
16 | tunnel = "tpsXXX.kdlapi.com:15818"
17 |
18 | # 用户名密码方式
19 | username = "username"
20 | password = "password"
21 | proxies = {
22 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
23 | "https": "https://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
24 | }
25 |
26 | # 白名单方式(需提前设置白名单)
27 | # proxies = {
28 | # "http": "http://%(proxy)s/" % {"proxy": tunnel},
29 | # "https": "https://%(proxy)s/" % {"proxy": tunnel}
30 | # }
31 |
32 | # 要访问的目标网页
33 | target_url = "https://dev.kdlapi.com/testproxy"
34 |
35 | # 使用隧道域名发送请求
36 | proxy_support = urllib2.ProxyHandler(proxies)
37 | opener = urllib2.build_opener(proxy_support)
38 | urllib2.install_opener(opener)
39 | response = urllib2.urlopen(target_url)
40 |
41 | # 获取页面内容
42 | if response.code == 200:
43 | print response.read()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_aiohttp.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用aiohttp请求代理服务器
6 | 请求http和https网页均适用
7 |
8 | """
9 |
10 | import aiohttp
11 | import asyncio
12 | # asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) windows系统请求https网站报错时调用此方法
13 |
14 | page_url = "https://dev.kdlapi.com/testproxy" # 要访问的目标网页
15 |
16 | # 隧道域名:端口号
17 | tunnel = "tpsXXX.kdlapi.com:15818"
18 |
19 | # 用户名和密码方式
20 | username = "username"
21 | password = "password"
22 |
23 | proxy_auth = aiohttp.BasicAuth(username, password)
24 |
25 | async def fetch(session, url):
26 | async with session.get(url, proxy="http://"+tunnel, proxy_auth=proxy_auth) as response:
27 | return await response.text()
28 |
29 | async def main():
30 | # aiohttp默认使用严格的HTTPS协议检查。可以通过将ssl设置为False来放松认证检查
31 | # async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl=False)) as session:
32 | async with aiohttp.ClientSession() as session:
33 | html = await fetch(session, page_url)
34 | print(html)
35 |
36 | if __name__ == '__main__':
37 | loop = asyncio.get_event_loop()
38 | loop.run_until_complete(main())
39 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_feapder.py:
--------------------------------------------------------------------------------
1 | import feapder
2 |
3 |
4 | class Py3Feapder(feapder.AirSpider):
5 | def start_requests(self):
6 | yield feapder.Request("https://dev.kdlapi.com/testproxy")
7 |
8 | def download_midware(self, request):
9 | # 隧道域名:端口号
10 | tunnel = "XXX.kdlapi.com:15818"
11 |
12 | # 用户名密码认证
13 | username = "username"
14 | password = "password"
15 | proxies = {
16 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
17 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
18 | }
19 |
20 | # 白名单认证(需提前设置白名单)
21 | # proxies = {
22 | # "http": "http://%(proxy)s/" % {"proxy": tunnel},
23 | # "https": "http://%(proxy)s/" % {"proxy": tunnel}
24 | # }
25 |
26 | request.proxies = proxies
27 | return request
28 |
29 | def parse(self, request, response):
30 | print(response.text)
31 |
32 |
33 | if __name__ == "__main__":
34 | Py3Feapder().start()
35 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_httpx.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import httpx
10 |
11 | # 隧道域名:端口号
12 | tunnel = "tpsXXX.kdlapi.com:15818"
13 |
14 | # 用户名和密码方式
15 | username = "username"
16 | password = "password"
17 |
18 | proxy_url = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
19 |
20 | proxies = httpx.Proxy(
21 | url=proxy_url,
22 | mode="DEFAULT"
23 | )
24 |
25 | with httpx.Client(proxies=proxies) as client:
26 | r = client.get('http://dev.kdlapi.com/testproxy')
27 | print(r.text)
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_pyppeteer.py:
--------------------------------------------------------------------------------
1 | #!/#!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 请求http和https网页均适用
6 | """
7 | import asyncio
8 |
9 | from pyppeteer import launch
10 | # 隧道服务器
11 | proxy_raw = "tpsXXX.kdlapi.com:15818"
12 |
13 |
14 | def accounts():
15 | # 用户名密码, 若已添加白名单则不需要添加
16 | username = "username"
17 | password = "password"
18 | account = {"username": username, "password": password}
19 | return account
20 |
21 |
22 | async def main():
23 | # 要访问的目标网页
24 | target_url = "https://dev.kdlapi.com/testproxy"
25 |
26 | browser = await launch({'headless': False, 'args': ['--disable-infobars', '--proxy-server=' + proxy_raw]})
27 | page = await browser.newPage()
28 |
29 | await page.authenticate(accounts()) # 白名单方式,注释本行(需提前设置白名单)
30 | await page.setViewport({'width': 1920, 'height': 1080})
31 | # 使用代理IP发送请求
32 | await page.goto(target_url)
33 | await asyncio.sleep(209)
34 | await browser.close()
35 |
36 | asyncio.get_event_loop().run_until_complete(main())
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求隧道服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 隧道域名:端口号
12 | tunnel = "tpsXXX.kdlapi.com:15818"
13 |
14 | # 用户名密码方式
15 | username = "username"
16 | password = "password"
17 | proxies = {
18 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
19 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
20 | }
21 |
22 | # 白名单方式(需提前设置白名单)
23 | # proxies = {
24 | # "http": "http://%(proxy)s/" % {"proxy": tunnel},
25 | # "https": "http://%(proxy)s/" % {"proxy": tunnel}
26 | # }
27 |
28 | # 要访问的目标网页
29 | target_url = "https://dev.kdlapi.com/testproxy"
30 |
31 | # 使用隧道域名发送请求
32 | response = requests.get(target_url, proxies=proxies)
33 |
34 | # 获取页面内容
35 | if response.status_code == 200:
36 | print(response.text)
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/scrapy.cfg:
--------------------------------------------------------------------------------
1 | # Automatically created by: scrapy startproject
2 | #
3 | # For more information about the [deploy] section see:
4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
5 |
6 | [settings]
7 | default = tutorial.settings
8 |
9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = tutorial
12 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kuaidaili/python-sdk/87d895b68c3ec1aed905d524d02f842ae6426468/examples/http_proxy_tunnel/py3_scrapy/tutorial/__init__.py
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/items.py:
--------------------------------------------------------------------------------
1 | # Define here the models for your scraped items
2 | #
3 | # See documentation in:
4 | # https://docs.scrapy.org/en/latest/topics/items.html
5 |
6 | import scrapy
7 |
8 |
9 | class TutorialItem(scrapy.Item):
10 | # define the fields for your item here like:
11 | # name = scrapy.Field()
12 | pass
13 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/middlewares.py:
--------------------------------------------------------------------------------
1 | # Define here the models for your spider middleware
2 | #
3 | # See documentation in:
4 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
5 |
6 | from scrapy import signals
7 |
8 | # useful for handling different item types with a single interface
9 | from itemadapter import is_item, ItemAdapter
10 |
11 |
12 | class TutorialSpiderMiddleware:
13 | # Not all methods need to be defined. If a method is not defined,
14 | # scrapy acts as if the spider middleware does not modify the
15 | # passed objects.
16 |
17 | @classmethod
18 | def from_crawler(cls, crawler):
19 | # This method is used by Scrapy to create your spiders.
20 | s = cls()
21 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
22 | return s
23 |
24 | def process_spider_input(self, response, spider):
25 | # Called for each response that goes through the spider
26 | # middleware and into the spider.
27 |
28 | # Should return None or raise an exception.
29 | return None
30 |
31 | def process_spider_output(self, response, result, spider):
32 | # Called with the results returned from the Spider, after
33 | # it has processed the response.
34 |
35 | # Must return an iterable of Request, or item objects.
36 | for i in result:
37 | yield i
38 |
39 | def process_spider_exception(self, response, exception, spider):
40 | # Called when a spider or process_spider_input() method
41 | # (from other spider middleware) raises an exception.
42 |
43 | # Should return either None or an iterable of Request or item objects.
44 | pass
45 |
46 | def process_start_requests(self, start_requests, spider):
47 | # Called with the start requests of the spider, and works
48 | # similarly to the process_spider_output() method, except
49 | # that it doesn’t have a response associated.
50 |
51 | # Must return only requests (not items).
52 | for r in start_requests:
53 | yield r
54 |
55 | def spider_opened(self, spider):
56 | spider.logger.info('Spider opened: %s' % spider.name)
57 |
58 |
59 | class TutorialDownloaderMiddleware:
60 | # Not all methods need to be defined. If a method is not defined,
61 | # scrapy acts as if the downloader middleware does not modify the
62 | # passed objects.
63 |
64 | @classmethod
65 | def from_crawler(cls, crawler):
66 | # This method is used by Scrapy to create your spiders.
67 | s = cls()
68 | crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
69 | return s
70 |
71 | def process_request(self, request, spider):
72 | # Called for each request that goes through the downloader
73 | # middleware.
74 |
75 | # Must either:
76 | # - return None: continue processing this request
77 | # - or return a Response object
78 | # - or return a Request object
79 | # - or raise IgnoreRequest: process_exception() methods of
80 | # installed downloader middleware will be called
81 | return None
82 |
83 | def process_response(self, request, response, spider):
84 | # Called with the response returned from the downloader.
85 |
86 | # Must either;
87 | # - return a Response object
88 | # - return a Request object
89 | # - or raise IgnoreRequest
90 | return response
91 |
92 | def process_exception(self, request, exception, spider):
93 | # Called when a download handler or a process_request()
94 | # (from other downloader middleware) raises an exception.
95 |
96 | # Must either:
97 | # - return None: continue processing this exception
98 | # - return a Response object: stops process_exception() chain
99 | # - return a Request object: stops process_exception() chain
100 | pass
101 |
102 | def spider_opened(self, spider):
103 | spider.logger.info('Spider opened: %s' % spider.name)
104 |
105 |
106 | class ProxyDownloaderMiddleware:
107 | _proxy = ('XXX.kdlapi.com', '15818')
108 |
109 | def process_request(self, request, spider):
110 |
111 | # 用户名密码认证
112 | username = "username"
113 | password = "password"
114 | request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": ':'.join(ProxyDownloaderMiddleware._proxy)}
115 |
116 | # 白名单认证
117 | # request.meta['proxy'] = "http://%(proxy)s/" % {"proxy": proxy}
118 |
119 | request.headers["Connection"] = "close"
120 | return None
121 |
122 | def process_exception(self, request, exception, spider):
123 | """捕获407异常"""
124 | if "'status': 407" in exception.__str__(): # 不同版本的exception的写法可能不一样,可以debug出当前版本的exception再修改条件
125 | from scrapy.resolver import dnscache
126 | dnscache.__delitem__(ProxyDownloaderMiddleware._proxy[0]) # 删除proxy host的dns缓存
127 | return exception
128 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/pipelines.py:
--------------------------------------------------------------------------------
1 | # Define your item pipelines here
2 | #
3 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
4 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
5 |
6 |
7 | # useful for handling different item types with a single interface
8 | from itemadapter import ItemAdapter
9 |
10 |
11 | class TutorialPipeline:
12 | def process_item(self, item, spider):
13 | return item
14 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/settings.py:
--------------------------------------------------------------------------------
1 | # Scrapy settings for tutorial project
2 | #
3 | # For simplicity, this file contains only settings considered important or
4 | # commonly used. You can find more settings consulting the documentation:
5 | #
6 | # https://docs.scrapy.org/en/latest/topics/settings.html
7 | # https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
8 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
9 |
10 | BOT_NAME = 'tutorial'
11 |
12 | SPIDER_MODULES = ['tutorial.spiders']
13 | NEWSPIDER_MODULE = 'tutorial.spiders'
14 |
15 |
16 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
17 | #USER_AGENT = 'tutorial (+http://www.yourdomain.com)'
18 |
19 | # Obey robots.txt rules
20 | ROBOTSTXT_OBEY = True
21 |
22 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
23 | #CONCURRENT_REQUESTS = 32
24 |
25 | # Configure a delay for requests for the same website (default: 0)
26 | # See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
27 | # See also autothrottle settings and docs
28 | #DOWNLOAD_DELAY = 3
29 | # The download delay setting will honor only one of:
30 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
31 | #CONCURRENT_REQUESTS_PER_IP = 16
32 |
33 | # Disable cookies (enabled by default)
34 | #COOKIES_ENABLED = False
35 |
36 | # Disable Telnet Console (enabled by default)
37 | #TELNETCONSOLE_ENABLED = False
38 |
39 | # Override the default request headers:
40 | #DEFAULT_REQUEST_HEADERS = {
41 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
42 | # 'Accept-Language': 'en',
43 | #}
44 |
45 | # Enable or disable spider middlewares
46 | # See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
47 | #SPIDER_MIDDLEWARES = {
48 | # 'tutorial.middlewares.TutorialSpiderMiddleware': 543,
49 | #}
50 |
51 | # Enable or disable downloader middlewares
52 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
53 | DOWNLOADER_MIDDLEWARES = {
54 | 'tutorial.middlewares.ProxyDownloaderMiddleware': 100,
55 | }
56 |
57 | # Enable or disable extensions
58 | # See https://docs.scrapy.org/en/latest/topics/extensions.html
59 | #EXTENSIONS = {
60 | # 'scrapy.extensions.telnet.TelnetConsole': None,
61 | #}
62 |
63 | # Configure item pipelines
64 | # See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
65 | #ITEM_PIPELINES = {
66 | # 'tutorial.pipelines.TutorialPipeline': 300,
67 | #}
68 |
69 | # Enable and configure the AutoThrottle extension (disabled by default)
70 | # See https://docs.scrapy.org/en/latest/topics/autothrottle.html
71 | #AUTOTHROTTLE_ENABLED = True
72 | # The initial download delay
73 | #AUTOTHROTTLE_START_DELAY = 5
74 | # The maximum download delay to be set in case of high latencies
75 | #AUTOTHROTTLE_MAX_DELAY = 60
76 | # The average number of requests Scrapy should be sending in parallel to
77 | # each remote server
78 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
79 | # Enable showing throttling stats for every response received:
80 | #AUTOTHROTTLE_DEBUG = False
81 |
82 | # Enable and configure HTTP caching (disabled by default)
83 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
84 | #HTTPCACHE_ENABLED = True
85 | #HTTPCACHE_EXPIRATION_SECS = 0
86 | #HTTPCACHE_DIR = 'httpcache'
87 | #HTTPCACHE_IGNORE_HTTP_CODES = []
88 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
89 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_scrapy/tutorial/spiders/kdl_spider.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -- coding: utf-8 --
3 |
4 | import scrapy
5 |
6 | class KdlSpider(scrapy.spiders.Spider):
7 | name = "kdl"
8 |
9 | def start_requests(self):
10 | url = "https://dev.kdlapi.com/testproxy"
11 | yield scrapy.Request(url, callback=self.parse)
12 |
13 | def parse(self, response):
14 | print(response.text)
15 |
16 |
17 | # 如scrapy报ssl异常"('SSL routines', 'ssl3_get_record', 'wrong version number')", 您可以尝试打开以下代码来解决
18 | # from OpenSSL import SSL
19 | # from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
20 | #
21 | # init = ScrapyClientContextFactory.__init__
22 | # def init2(self, *args, **kwargs):
23 | # init(self, *args, **kwargs)
24 | # self.method = SSL.SSLv23_METHOD
25 | # ScrapyClientContextFactory.__init__ = init2
26 |
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_socket.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用socket请求隧道服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import socket
10 | import socks # pip install PySocks
11 |
12 | socks.set_default_proxy(socks.HTTP, addr='tpsXXX.kdlapi.com', port=15818, username='username',password='password') # 设置代理类型为HTPP
13 | # socks.set_default_proxy(socks.SOCKS5, addr='tpsXXX.kdlapi.com', port=20818) # 设置代理类型为socks
14 | socket.socket = socks.socksocket # 把代理添加到socket
15 |
16 |
17 | def main():
18 | sock = socket.socket()
19 | sock.connect(('dev.kdlapi.com', 80)) # 连接
20 | # 按照http协议格式完整构造http request
21 | request = 'GET https://dev.kdlapi.com/testproxy \r\nUser-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36\r\n Connection: close' # 包含method, url, headers
22 |
23 | response = b'' # 接收数据
24 | sock.send(request.encode()) # 发送请求
25 | chunk = sock.recv(1024) # 一次接收1024字节数据
26 | while chunk: # 循环接收数据,若没有数据了说明已接收完
27 | response += chunk
28 | chunk = sock.recv(1024)
29 | print(response.decode())
30 |
31 |
32 | if __name__ == '__main__':
33 | main()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/py3_urllib.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用urllib请求隧道服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import urllib.request
10 | import ssl
11 |
12 | # 全局取消证书验证,避免访问https网页报错
13 | ssl._create_default_https_context = ssl._create_unverified_context
14 |
15 | # 隧道域名:端口号
16 | tunnel = "tpsXXX.kdlapi.com:15818"
17 |
18 | # 用户名密码方式
19 | username = "username"
20 | password = "password"
21 | proxies = {
22 | "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
23 | "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
24 | }
25 |
26 | # 白名单方式(需提前设置白名单)
27 | # proxies = {
28 | # "http": "http://%(proxy)s/" % {"proxy": tunnel},
29 | # "https": "http://%(proxy)s/" % {"proxy": tunnel}
30 | # }
31 |
32 | # 要访问的目标网页
33 | target_url = "https://dev.kdlapi.com/testproxy"
34 |
35 | # 使用隧道域名发送请求
36 | proxy_support = urllib.request.ProxyHandler(proxies)
37 | opener = urllib.request.build_opener(proxy_support)
38 | urllib.request.install_opener(opener)
39 | response = urllib.request.urlopen(target_url)
40 |
41 | # 获取页面内容
42 | if response.code == 200:
43 | print(response.read().decode('utf-8'))
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/selenium_chrome_username_password.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import string
6 | import zipfile
7 | import time
8 |
9 |
10 | def create_proxyauth_extension(tunnelhost, tunnelport, proxy_username, proxy_password, scheme='http', plugin_path=None):
11 | """代理认证插件
12 |
13 | args:
14 | tunnelhost (str): 你的代理地址或者域名(str类型)
15 | tunnelport (int): 代理端口号(int类型)
16 | proxy_username (str):用户名(字符串)
17 | proxy_password (str): 密码 (字符串)
18 | kwargs:
19 | scheme (str): 代理方式 默认http
20 | plugin_path (str): 扩展的绝对路径
21 |
22 | return str -> plugin_path
23 | """
24 |
25 | if plugin_path is None:
26 | plugin_path = 'vimm_chrome_proxyauth_plugin.zip'
27 |
28 | manifest_json = """
29 | {
30 | "version": "1.0.0",
31 | "manifest_version": 2,
32 | "name": "Chrome Proxy",
33 | "permissions": [
34 | "proxy",
35 | "tabs",
36 | "unlimitedStorage",
37 | "storage",
38 | "",
39 | "webRequest",
40 | "webRequestBlocking"
41 | ],
42 | "background": {
43 | "scripts": ["background.js"]
44 | },
45 | "minimum_chrome_version":"22.0.0"
46 | }
47 | """
48 |
49 | background_js = string.Template(
50 | """
51 | var config = {
52 | mode: "fixed_servers",
53 | rules: {
54 | singleProxy: {
55 | scheme: "${scheme}",
56 | host: "${host}",
57 | port: parseInt(${port})
58 | },
59 | bypassList: ["foobar.com"]
60 | }
61 | };
62 |
63 | chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
64 |
65 | function callbackFn(details) {
66 | return {
67 | authCredentials: {
68 | username: "${username}",
69 | password: "${password}"
70 | }
71 | };
72 | }
73 |
74 | chrome.webRequest.onAuthRequired.addListener(
75 | callbackFn,
76 | {urls: [""]},
77 | ['blocking']
78 | );
79 | """
80 | ).substitute(
81 | host=tunnelhost,
82 | port=tunnelport,
83 | username=proxy_username,
84 | password=proxy_password,
85 | scheme=scheme,
86 | )
87 | with zipfile.ZipFile(plugin_path, 'w') as zp:
88 | zp.writestr("manifest.json", manifest_json)
89 | zp.writestr("background.js", background_js)
90 | return plugin_path
91 |
92 |
93 | proxyauth_plugin_path = create_proxyauth_extension(
94 | tunnelhost="${tunnelhost}", # 隧道域名
95 | tunnelport="${tunnelport}", # 端口号
96 | proxy_username="${username}", # 用户名
97 | proxy_password="${password}" # 密码
98 | )
99 |
100 |
101 | chrome_options = webdriver.ChromeOptions()
102 | chrome_options.add_extension(proxyauth_plugin_path)
103 | # ${chromedriver_path}: chromedriver驱动存放路径
104 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options)
105 | driver.get("https://dev.kdlapi.com/testproxy")
106 |
107 | # 获取页面内容
108 | print(driver.page_source)
109 |
110 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
111 | time.sleep(3)
112 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/selenium_chrome_whitelist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | chrome_options = webdriver.ChromeOptions()
8 | chrome_options.add_argument('--proxy-server=http://${tunnelhost:tunnelport}') # 隧道域名:端口号
9 | # ${chromedriver_path}: chromedriver驱动存放路径
10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options)
11 | driver.get("https://dev.kdlapi.com/testproxy")
12 |
13 | # 获取页面内容
14 | print(driver.page_source)
15 |
16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
17 | time.sleep(3)
18 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/selenium_firefox_username_password.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | import time
5 | from seleniumwire import webdriver # pip install selenium-wire
6 |
7 | options = {
8 | 'proxy': {
9 | 'http': 'http://username:password@tpsXXX.kdlapi.com:15818',
10 | 'https': 'http://username:password@tpsXXX.kdlapi.com:15818',
11 | }
12 | }
13 | driver = webdriver.Firefox(seleniumwire_options=options,executable_path="${geckodriver_path}")
14 |
15 | driver.get('https://dev.kdlapi.com/testproxy')
16 |
17 | # 获取页面内容
18 | print(driver.page_source)
19 |
20 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
21 | time.sleep(3)
22 | driver.close()
--------------------------------------------------------------------------------
/examples/http_proxy_tunnel/selenium_firefox_whitelist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | import time
5 | from selenium import webdriver
6 |
7 |
8 | fp = webdriver.FirefoxProfile()
9 | proxy_ip = "tpsXXX.kdlapi.com" # 隧道服务器域名
10 | proxy_port = 15818 # 端口号
11 |
12 | fp.set_preference('network.proxy.type', 1)
13 | fp.set_preference('network.proxy.http', proxy_ip)
14 | fp.set_preference('network.proxy.http_port', proxy_port)
15 | fp.set_preference('network.proxy.ssl', proxy_ip)
16 | fp.set_preference('network.proxy.ssl_port', proxy_port)
17 |
18 | driver = webdriver.Firefox(executable_path="${geckodriver_path}", firefox_profile=fp)
19 | driver.get('https://dev.kdlapi.com/testproxy')
20 |
21 | # 获取页面内容
22 | print(driver.page_source)
23 |
24 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
25 | time.sleep(3)
26 | driver.close()
--------------------------------------------------------------------------------
/examples/socks_proxy/phantomjs_demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | #先下载phantomjs包文件,再填入phantomjs.exe的路径 (路径不要包含中文)
8 | executable_path = '${executable_path}'
9 | service_args=[
10 | '--proxy=host:port', #此处替换您的代理ip,如59.38.241.25:23918
11 | '--proxy-type=socks5',
12 | '--proxy-auth=username:password' #用户名密码
13 | ]
14 | driver=webdriver.PhantomJS(service_args=service_args,executable_path=executable_path)
15 | driver.get('https://dev.kdlapi.com/testproxy')
16 |
17 | print(driver.page_source)
18 | time.sleep(3)
19 | driver.close()
--------------------------------------------------------------------------------
/examples/socks_proxy/py2_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 提取代理API接口,获取1个代理IP
12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=2&sep=1"
13 |
14 | # 获取API接口返回的代理IP
15 | proxy_ip = requests.get(api_url).text
16 |
17 | # 用户名密码认证(私密代理/独享代理)
18 | username = "username"
19 | password = "password"
20 | proxies = {
21 | "http": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
22 | "https": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
23 | }
24 |
25 | # 白名单方式(需提前设置白名单)
26 | # proxies = {
27 | # "http": "socks5://%(proxy)s/" % {"proxy": proxy_ip},
28 | # "https": "socks5://%(proxy)s/" % {"proxy": proxy_ip}
29 | # }
30 |
31 | # 要访问的目标网页
32 | target_url = "https://dev.kdlapi.com/testproxy"
33 |
34 | # 使用代理IP发送请求
35 | response = requests.get(target_url, proxies=proxies)
36 |
37 | # 获取页面内容
38 | if response.status_code == 200:
39 | print response.text
--------------------------------------------------------------------------------
/examples/socks_proxy/py3_requests.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """
5 | 使用requests请求代理服务器
6 | 请求http和https网页均适用
7 | """
8 |
9 | import requests
10 |
11 | # 提取代理API接口,获取1个代理IP
12 | api_url = "http://dps.kdlapi.com/api/getdps/?secret_id=o1fjh1re9o28876h7c08&signature=xxxxx&num=1&pt=2&sep=1"
13 |
14 | # 获取API接口返回的代理IP
15 | proxy_ip = requests.get(api_url).text
16 |
17 | # 用户名密码认证(私密代理/独享代理)
18 | username = "username"
19 | password = "password"
20 | proxies = {
21 | "http": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip},
22 | "https": "socks5://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy_ip}
23 | }
24 |
25 | # 白名单方式(需提前设置白名单)
26 | # proxies = {
27 | # "http": "socks5://%(proxy)s/" % {"proxy": proxy_ip},
28 | # "https": "socks5://%(proxy)s/" % {"proxy": proxy_ip}
29 | # }
30 |
31 | # 要访问的目标网页
32 | target_url = "https://dev.kdlapi.com/testproxy"
33 |
34 | # 使用代理IP发送请求
35 | response = requests.get(target_url, proxies=proxies)
36 |
37 | # 获取页面内容
38 | if response.status_code == 200:
39 | print(response.text)
--------------------------------------------------------------------------------
/examples/socks_proxy/selenium_chrome_whitelist.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | from selenium import webdriver
5 | import time
6 |
7 | chrome_options = webdriver.ChromeOptions()
8 | chrome_options.add_argument('--proxy-server=socks5://${ip:port}') # 代理IP:端口号
9 | # ${chromedriver_path}: chromedriver驱动存放路径
10 | driver = webdriver.Chrome(executable_path="${chromedriver_path}", chrome_options=chrome_options)
11 | driver.get("https://dev.kdlapi.com/testproxy")
12 |
13 | # 获取页面内容
14 | print(driver.page_source)
15 |
16 | # 延迟3秒后关闭当前窗口,如果是最后一个窗口则退出
17 | time.sleep(3)
18 | driver.close()
--------------------------------------------------------------------------------