├── .gitignore ├── LICENSE ├── README.md ├── async_proxy_pool ├── __init__.py ├── config.py ├── crawler.py ├── database.py ├── logger.py ├── scheduler.py ├── utils.py ├── validator.py ├── webapi_flask.py └── webapi_sanic.py ├── client.py ├── requirements.txt ├── server_flask.py ├── server_sanic.py └── test └── test_proxy.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | .idea 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | .static_storage/ 57 | .media/ 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # for vscode 108 | .vscode/ 109 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018~now chenjiandongx 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Async Proxy Pool

2 |

3 | 异步爬虫代理池，以 Python asyncio 为基础，旨在充分利用 Python 的异步性能。 4 |

5 | 6 | ### 运行环境 7 | 8 | 项目使用了 [sanic](https://github.com/channelcat/sanic)，（也提供了 Flask）一个异步网络框架。所以建议运行 Python 环境为 Python3.5+，并且 sanic 不支持 Windows 系统，Windows 用户（比如我 😄）可以考虑使用 Ubuntu on Windows。 9 | 10 | 11 | ### 如何使用 12 | 13 | #### 安装 Redis 14 | 项目数据库使用了 [Redis](https://redis.io/)，Redis 是一个开源（BSD 许可）的，内存中的数据结构存储系统，它可以用作数据库、缓存和消息中间件。所以请确保运行环境已经正确安装了 Redis。安装方法请参照官网指南。 15 | 16 | #### 下载项目源码 17 | ```bash 18 | $ git clone https://github.com/chenjiandongx/async-proxy-pool.git 19 | ``` 20 | 21 | #### 安装依赖 22 | 使用 requirements.txt 23 | ```bash 24 | $ pip install -r requirements.txt 25 | ``` 26 | 27 | #### 配置文件 28 | 配置文件 [config.py](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/config.py)，保存了项目所使用到的所有配置项。如下所示，用户可以根据需求自行更改。不然按默认即可。 29 | ```python 30 | #!/usr/bin/env python 31 | # coding=utf-8 32 | 33 | # 请求超时时间（秒） 34 | REQUEST_TIMEOUT = 15 35 | # 请求延迟时间（秒） 36 | REQUEST_DELAY = 0 37 | 38 | # redis 地址 39 | REDIS_HOST = "localhost" 40 | # redis 端口 41 | REDIS_PORT = 6379 42 | # redis 密码 43 | REDIS_PASSWORD = None 44 | # redis set key 45 | REDIS_KEY = "proxies:ranking" 46 | # redis 连接池最大连接量 47 | REDIS_MAX_CONNECTION = 20 48 | 49 | # REDIS SCORE 最大分数 50 | MAX_SCORE = 10 51 | # REDIS SCORE 最小分数 52 | MIN_SCORE = 0 53 | # REDIS SCORE 初始分数 54 | INIT_SCORE = 9 55 | 56 | # server web host 57 | SERVER_HOST = "localhost" 58 | # server web port 59 | SERVER_PORT = 3289 60 | # 是否开启日志记录 61 | SERVER_ACCESS_LOG = True 62 | 63 | # 批量测试数量 64 | VALIDATOR_BATCH_COUNT = 256 65 | # 校验器测试网站，可以定向改为自己想爬取的网站，如新浪，知乎等 66 | VALIDATOR_BASE_URL = "https://httpbin.org/" 67 | # 校验器循环周期（分钟） 68 | VALIDATOR_RUN_CYCLE = 15 69 | 70 | 71 | # 爬取器循环周期（分钟） 72 | CRAWLER_RUN_CYCLE = 30 73 | # 请求 headers 74 | HEADERS = { 75 | "X-Requested-With": "XMLHttpRequest", 76 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " 77 | "(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 78 | } 79 | ``` 80 | 81 | ### 运行项目 82 | 83 | **运行客户端，启动收集器和校验器** 84 | ```bash 85 | # 可设置校验网站环境变量 set/export VALIDATOR_BASE_URL="https://example.com" 86 | $ python client.py 87 | 2018-05-16 23:41:39,234 - Crawler working... 88 | 2018-05-16 23:41:40,509 - Crawler √ http://202.83.123.33:3128 89 | 2018-05-16 23:41:40,509 - Crawler √ http://123.53.118.122:61234 90 | 2018-05-16 23:41:40,510 - Crawler √ http://212.237.63.84:8888 91 | 2018-05-16 23:41:40,510 - Crawler √ http://36.73.102.245:8080 92 | 2018-05-16 23:41:40,511 - Crawler √ http://78.137.90.253:8080 93 | 2018-05-16 23:41:40,512 - Crawler √ http://5.45.70.39:1490 94 | 2018-05-16 23:41:40,512 - Crawler √ http://117.102.97.162:8080 95 | 2018-05-16 23:41:40,513 - Crawler √ http://109.185.149.65:8080 96 | 2018-05-16 23:41:40,513 - Crawler √ http://189.39.143.172:20183 97 | 2018-05-16 23:41:40,514 - Crawler √ http://186.225.112.62:20183 98 | 2018-05-16 23:41:40,514 - Crawler √ http://189.126.66.154:20183 99 | ... 100 | 2018-05-16 23:41:55,866 - Validator working... 101 | 2018-05-16 23:41:56,951 - Validator × https://114.113.126.82:80 102 | 2018-05-16 23:41:56,953 - Validator × https://114.199.125.242:80 103 | 2018-05-16 23:41:56,955 - Validator × https://114.228.75.17:6666 104 | 2018-05-16 23:41:56,957 - Validator × https://115.227.3.86:9000 105 | 2018-05-16 23:41:56,960 - Validator × https://115.229.88.191:9000 106 | 2018-05-16 23:41:56,964 - Validator × https://115.229.89.100:9000 107 | 2018-05-16 23:41:56,966 - Validator × https://103.18.180.194:8080 108 | 2018-05-16 23:41:56,967 - Validator × https://115.229.90.207:9000 109 | 2018-05-16 23:41:56,968 - Validator × https://103.216.144.17:8080 110 | 2018-05-16 23:41:56,969 - Validator × https://117.65.43.29:31588 111 | 2018-05-16 23:41:56,971 - Validator × https://103.248.232.135:8080 112 | 2018-05-16 23:41:56,972 - Validator × https://117.94.69.166:61234 113 | 2018-05-16 23:41:56,975 - Validator × https://103.26.56.109:8080 114 | ... 115 | ``` 116 | 117 | **运行服务器，启动 web 服务** 118 | 119 | #### Sanic 120 | ```bash 121 | $ python server_sanic.py 122 | [2018-05-16 23:36:22 +0800] [108] [INFO] Goin' Fast @ http://localhost:3289 123 | [2018-05-16 23:36:22 +0800] [108] [INFO] Starting worker [108] 124 | ``` 125 | 126 | #### Flask 127 | ```bash 128 | $ python server_flask.py 129 | * Serving Flask app "async_proxy_pool.webapi_flask" (lazy loading) 130 | * Environment: production 131 | WARNING: Do not use the development server in a production environment. 132 | Use a production WSGI server instead. 133 | * Debug mode: on 134 | * Restarting with stat 135 | * Debugger is active! 136 | * Debugger PIN: 322-954-449 137 | * Running on http://localhost:3289/ (Press CTRL+C to quit) 138 | ``` 139 | 140 | ### 总体架构 141 | 142 | 项目主要几大模块分别是爬取模块，存储模块，校验模块，调度模块，接口模块。 143 | 144 | [爬取模块](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/crawler.py)：负责爬取代理网站，并将所得到的代理存入到数据库，每个代理的初始化权值为 INIT_SCORE。 145 | 146 | [存储模块](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/database.py)：封装了 Redis 操作的一些接口，提供 Redis 连接池。 147 | 148 | [校验模块](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/validator.py)：验证代理 IP 是否可用，如果代理可用则权值 +1，最大值为 MAX_SCORE。不可用则权值 -1，直至权值为 0 时将代理从数据库中删除。 149 | 150 | [调度模块](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/scheduler.py)：负责调度爬取器和校验器的运行。 151 | 152 | [接口模块](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/webapi.py)：使用 sanic 提供 **WEB API** 。 153 | 154 | 155 | `/` 156 | 157 | 欢迎页面 158 | ```bash 159 | $ http http://localhost:3289/ 160 | HTTP/1.1 200 OK 161 | Connection: keep-alive 162 | Content-Length: 42 163 | Content-Type: application/json 164 | Keep-Alive: 5 165 | 166 | { 167 | "Welcome": "This is a proxy pool system." 168 | } 169 | ``` 170 | 171 | 172 | **`/pop`** 173 | 174 | 随机返回一个代理，分三次尝试。 175 | 1. 尝试返回权值为 MAX_SCORE，也就是最新可用的代理。 176 | 2. 尝试返回随机权值在 (MAX_SCORE -3) - MAX_SCORE 之间的代理。 177 | 3. 尝试返回权值在 0 - MAX_SCORE 之间的代理 178 | ```bash 179 | $ http http://localhost:3289/pop 180 | HTTP/1.1 200 OK 181 | Connection: keep-alive 182 | Content-Length: 38 183 | Content-Type: application/json 184 | Keep-Alive: 5 185 | 186 | { 187 | "http": "http://46.48.105.235:8080" 188 | } 189 | ``` 190 | 191 | 192 | **`/get/`** 193 | 194 | 返回指定数量的代理，权值从大到小排序。 195 | ```bash 196 | $ http http://localhost:3289/get/10 197 | HTTP/1.1 200 OK 198 | Connection: keep-alive 199 | Content-Length: 393 200 | Content-Type: application/json 201 | Keep-Alive: 5 202 | 203 | [ 204 | { 205 | "http": "http://94.177.214.215:3128" 206 | }, 207 | { 208 | "http": "http://94.139.242.70:53281" 209 | }, 210 | { 211 | "http": "http://94.130.92.40:3128" 212 | }, 213 | { 214 | "http": "http://82.78.28.139:8080" 215 | }, 216 | { 217 | "http": "http://82.222.153.227:9090" 218 | }, 219 | { 220 | "http": "http://80.211.228.238:8888" 221 | }, 222 | { 223 | "http": "http://80.211.180.224:3128" 224 | }, 225 | { 226 | "http": "http://79.101.98.2:53281" 227 | }, 228 | { 229 | "http": "http://66.96.233.182:8080" 230 | }, 231 | { 232 | "http": "http://61.228.45.165:8080" 233 | } 234 | ] 235 | ``` 236 | 237 | 238 | **`/count`** 239 | 240 | 返回代理池中所有代理总数 241 | ```bash 242 | $ http http://localhost:3289/count 243 | HTTP/1.1 200 OK 244 | Connection: keep-alive 245 | Content-Length: 15 246 | Content-Type: application/json 247 | Keep-Alive: 5 248 | 249 | { 250 | "count": "698" 251 | } 252 | ``` 253 | 254 | 255 | **`/count/`** 256 | 257 | 返回指定权值代理总数 258 | ```bash 259 | $ http http://localhost:3289/count/10 260 | HTTP/1.1 200 OK 261 | Connection: keep-alive 262 | Content-Length: 15 263 | Content-Type: application/json 264 | Keep-Alive: 5 265 | 266 | { 267 | "count": "143" 268 | } 269 | 270 | ``` 271 | 272 | 273 | **`/clear/`** 274 | 275 | 删除权值小于等于 score 的代理 276 | ```bash 277 | $ http http://localhost:3289/clear/0 278 | HTTP/1.1 200 OK 279 | Connection: keep-alive 280 | Content-Length: 22 281 | Content-Type: application/json 282 | Keep-Alive: 5 283 | 284 | { 285 | "Clear": "Successful" 286 | } 287 | ``` 288 | 289 | 290 | ### 扩展代理爬取网站 291 | 292 | 在 crawler.py 文件里新增你自己的爬取方法。 293 | ```python 294 | class Crawler: 295 | 296 | @staticmethod 297 | def run(): 298 | ... 299 | 300 | # 新增你自己的爬取方法 301 | @staticmethod 302 | @collect_funcs # 加入装饰器用于最后运行函数 303 | def crawl_xxx(): 304 | # 爬取逻辑 305 | ``` 306 | 307 | ### 选择其他 web 框架 308 | 309 | 本项目使用了 Sanic，但是开发者完全可以根据自己的需求选择其他 web 框架，web 模块是完全独立的，替换框架不会影响到项目的正常运行。需要如下步骤。 310 | 311 | 1. 在 [webapi.py](https://github.com/chenjiandongx/async-proxy-pool/blob/master/async_proxy_pool/webapi.py) 里更换框架。 312 | 2. 在 [server.py](https://github.com/chenjiandongx/async-proxy-pool/blob/master/server.py) 里修改 app 启动细节。 313 | 314 | 315 | ### Sanic 性能测试 316 | 317 | 使用 [wrk](https://github.com/wg/wrk) 进行服务器压力测试。基准测试 30 秒, 使用 12 个线程, 并发 400 个 http 连接。 318 | 319 | 测试 http://127.0.0.1:3289/pop 320 | ```bash 321 | $ wrk -t12 -c400 -d30s http://127.0.0.1:3289/pop 322 | Running 30s test @ http://127.0.0.1:3289/pop 323 | 12 threads and 400 connections 324 | Thread Stats Avg Stdev Max +/- Stdev 325 | Latency 350.37ms 118.99ms 660.41ms 60.94% 326 | Req/Sec 98.18 35.94 277.00 79.43% 327 | 33694 requests in 30.10s, 4.77MB read 328 | Socket errors: connect 0, read 340, write 0, timeout 0 329 | Requests/sec: 1119.44 330 | Transfer/sec: 162.23KB 331 | ``` 332 | 333 | 测试 http://127.0.0.1:3289/get/10 334 | ```bash 335 | Running 30s test @ http://127.0.0.1:3289/get/10 336 | 12 threads and 400 connections 337 | Thread Stats Avg Stdev Max +/- Stdev 338 | Latency 254.90ms 95.43ms 615.14ms 63.51% 339 | Req/Sec 144.84 61.52 320.00 66.58% 340 | 46538 requests in 30.10s, 22.37MB read 341 | Socket errors: connect 0, read 28, write 0, timeout 0 342 | Requests/sec: 1546.20 343 | Transfer/sec: 761.02KB 344 | ``` 345 | 346 | 性能还算不错，再测试一下没有 Redis 操作的 http://127.0.0.1:3289/ 347 | ```bash 348 | $ wrk -t12 -c400 -d30s http://127.0.0.1:3289/ 349 | Running 30s test @ http://127.0.0.1:3289/ 350 | 12 threads and 400 connections 351 | Thread Stats Avg Stdev Max +/- Stdev 352 | Latency 127.86ms 41.71ms 260.69ms 55.22% 353 | Req/Sec 258.56 92.25 520.00 68.90% 354 | 92766 requests in 30.10s, 13.45MB read 355 | Requests/sec: 3081.87 356 | Transfer/sec: 457.47KB 357 | ``` 358 | ⭐️ **Requests/sec: 3081.87** 359 | 360 | 关闭 sanic 日志记录，测试 http://127.0.0.1:3289/ 361 | ```bash 362 | $ wrk -t12 -c400 -d30s http://127.0.0.1:3289/ 363 | Running 30s test @ http://127.0.0.1:3289/ 364 | 12 threads and 400 connections 365 | Thread Stats Avg Stdev Max +/- Stdev 366 | Latency 34.63ms 12.66ms 96.28ms 58.07% 367 | Req/Sec 0.96k 137.29 2.21k 73.29% 368 | 342764 requests in 30.10s, 49.69MB read 369 | Requests/sec: 11387.89 370 | Transfer/sec: 1.65MB 371 | ``` 372 | ⭐️ **Requests/sec: 11387.89** 373 | 374 | 375 | ### 实际代理性能测试 376 | 377 | [test_proxy.py](https://github.com/chenjiandongx/async-proxy-pool/blob/master/test/test_proxy.py) 用于测试实际代理性能 378 | 379 | #### 运行代码 380 | 381 | ```bash 382 | $ cd test 383 | $ python test_proxy.py 384 | 385 | # 可设置的环境变量 386 | TEST_COUNT = os.environ.get("TEST_COUNT") or 1000 387 | TEST_WEBSITE = os.environ.get("TEST_WEBSITE") or "https://httpbin.org/" 388 | TEST_PROXIES = os.environ.get("TEST_PROXIES") or "http://localhost:3289/get/20" 389 | ``` 390 | 391 | #### 实测效果 392 | 393 | **https://httpbin.org/** 394 | ``` 395 | 测试代理： http://localhost:3289/get/20 396 | 测试网站： https://httpbin.org/ 397 | 测试次数： 1000 398 | 成功次数： 1000 399 | 失败次数： 0 400 | 成功率： 1.0 401 | ``` 402 | 403 | **https://taobao.com** 404 | ``` 405 | 测试代理： http://localhost:3289/get/20 406 | 测试网站： https://taobao.com/ 407 | 测试次数： 1000 408 | 成功次数： 984 409 | 失败次数： 16 410 | 成功率： 0.984 411 | ``` 412 | 413 | **https://baidu.com** 414 | ``` 415 | 测试代理： http://localhost:3289/get/20 416 | 测试网站： https://baidu.com 417 | 测试次数： 1000 418 | 成功次数： 975 419 | 失败次数： 25 420 | 成功率： 0.975 421 | ``` 422 | 423 | **https://zhihu.com** 424 | ``` 425 | 测试代理： http://localhost:3289/get/20 426 | 测试网站： https://zhihu.com 427 | 测试次数： 1000 428 | 成功次数： 1000 429 | 失败次数： 0 430 | 成功率： 1.0 431 | ``` 432 | 433 | 可以看到其实性能是非常棒的，成功率极高。 😉 434 | 435 | 436 | ### 实际应用示例 437 | 438 | ```python 439 | import random 440 | 441 | import requests 442 | 443 | # 确保已经启动 sanic 服务 444 | # 获取多个然后随机选一个 445 | 446 | try: 447 | proxies = requests.get("http://localhost:3289/get/20").json() 448 | req = requests.get("https://example.com", proxies=random.choice(proxies)) 449 | except: 450 | raise 451 | 452 | # 或者单独弹出一个 453 | 454 | try: 455 | proxy = requests.get("http://localhost:3289/pop").json() 456 | req = requests.get("https://example.com", proxies=proxy) 457 | except: 458 | raise 459 | ``` 460 | 461 | 462 | ### aiohttp 的坑 463 | 464 | 整个项目都是基于 aiohttp 这个异步网络库的，在这个项目的文档中，关于代理的介绍是这样的。 465 | 466 | ![](https://user-images.githubusercontent.com/19553554/40276465-745db54a-5c3d-11e8-8662-0c73fdf4fe88.png) 467 | 468 | **划重点：aiohttp supports HTTP/HTTPS proxies** 469 | 470 | 但是，它根本就不支持 https 代理好吧，在它的代码中是这样写的。 471 | 472 | ![](https://user-images.githubusercontent.com/19553554/40276470-a0d46a6a-5c3d-11e8-871d-a053c81fec56.png) 473 | 474 | **划重点：Only http proxies are supported** 475 | 476 | 我的心情可以说是十分复杂的。😲 不过只有 http 代理效果也不错没什么太大影响，参见上面的测试数据。 477 | 478 | 479 | ### 参考借鉴项目 480 | 481 | ✨🍰✨ 482 | 483 | * [ProxyPool](https://github.com/WiseDoge/ProxyPool) 484 | * [proxy_pool](https://github.com/jhao104/proxy_pool) 485 | 486 | ### License 487 | 488 | MIT [©chenjiandongx](https://github.com/chenjiandongx) 489 | -------------------------------------------------------------------------------- /async_proxy_pool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenjiandongx/async-proxy-pool/b6869e39ab949700b90b84df58489c41f8d6e3e2/async_proxy_pool/__init__.py -------------------------------------------------------------------------------- /async_proxy_pool/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | # 请求超时时间（秒） 5 | REQUEST_TIMEOUT = 15 6 | # 请求延迟时间（秒） 7 | REQUEST_DELAY = 0 8 | 9 | # redis 地址 10 | REDIS_HOST = "localhost" 11 | # redis 端口 12 | REDIS_PORT = 6379 13 | # redis 密码 14 | REDIS_PASSWORD = None 15 | # redis set key 16 | REDIS_KEY = "proxies:ranking" 17 | # redis 连接池最大连接量 18 | REDIS_MAX_CONNECTION = 20 19 | 20 | # REDIS SCORE 最大分数 21 | MAX_SCORE = 10 22 | # REDIS SCORE 最小分数 23 | MIN_SCORE = 0 24 | # REDIS SCORE 初始分数 25 | INIT_SCORE = 9 26 | 27 | # server web host 28 | SERVER_HOST = "localhost" 29 | # server web port 30 | SERVER_PORT = 3289 31 | # 是否开启日志记录 32 | SERVER_ACCESS_LOG = True 33 | 34 | # 批量测试数量 35 | VALIDATOR_BATCH_COUNT = 256 36 | # 校验器测试网站，可以定向改为自己想爬取的网站，如新浪，知乎等 37 | VALIDATOR_BASE_URL = "http://baidu.com" 38 | # 校验器循环周期（分钟） 39 | VALIDATOR_RUN_CYCLE = 15 40 | 41 | 42 | # 爬取器循环周期（分钟） 43 | CRAWLER_RUN_CYCLE = 30 44 | # 请求 headers 45 | HEADERS = { 46 | "X-Requested-With": "XMLHttpRequest", 47 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " 48 | "(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 49 | } 50 | -------------------------------------------------------------------------------- /async_proxy_pool/crawler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import re 5 | 6 | import pyquery 7 | 8 | from .utils import requests 9 | from .database import RedisClient 10 | from .logger import logger 11 | 12 | 13 | redis_conn = RedisClient() 14 | all_funcs = [] 15 | 16 | 17 | def collect_funcs(func): 18 | """ 19 | 装饰器，用于收集爬虫函数 20 | """ 21 | all_funcs.append(func) 22 | return func 23 | 24 | 25 | class Crawler: 26 | @staticmethod 27 | def run(): 28 | """ 29 | 启动收集器 30 | """ 31 | logger.info("Crawler working...") 32 | for func in all_funcs: 33 | for proxy in func(): 34 | redis_conn.add_proxy(proxy) 35 | logger.info("Crawler √ {}".format(proxy)) 36 | logger.info("Crawler resting...") 37 | 38 | @staticmethod 39 | @collect_funcs 40 | def crawl_66ip(): 41 | """ 42 | 66ip 代理：http://www.66ip.cn 43 | """ 44 | url = ( 45 | "http://www.66ip.cn/nmtq.php?getnum=100&isp=0" 46 | "&anonymoustype=0&area=0&proxytype={}&api=66ip" 47 | ) 48 | pattern = "\d+\.\d+.\d+\.\d+:\d+" 49 | 50 | items = [(0, "http://{}"), (1, "https://{}")] 51 | for item in items: 52 | proxy_type, host = item 53 | html = requests(url.format(proxy_type)) 54 | if html: 55 | for proxy in re.findall(pattern, html): 56 | yield host.format(proxy) 57 | 58 | @staticmethod 59 | @collect_funcs 60 | def crawl_xici(): 61 | """ 62 | 西刺代理：http://www.xicidaili.com 63 | """ 64 | url = "http://www.xicidaili.com/{}" 65 | 66 | items = [] 67 | for page in range(1, 21): 68 | items.append(("wt/{}".format(page), "http://{}:{}")) 69 | items.append(("wn/{}".format(page), "https://{}:{}")) 70 | 71 | for item in items: 72 | proxy_type, host = item 73 | html = requests(url.format(proxy_type)) 74 | if html: 75 | doc = pyquery.PyQuery(html) 76 | for proxy in doc("table tr").items(): 77 | ip = proxy("td:nth-child(2)").text() 78 | port = proxy("td:nth-child(3)").text() 79 | if ip and port: 80 | yield host.format(ip, port) 81 | 82 | @staticmethod 83 | @collect_funcs 84 | def crawl_kuaidaili(): 85 | """ 86 | 快代理：https://www.kuaidaili.com 87 | """ 88 | url = "https://www.kuaidaili.com/free/{}" 89 | 90 | items = ["inha/1/"] 91 | for proxy_type in items: 92 | html = requests(url.format(proxy_type)) 93 | if html: 94 | doc = pyquery.PyQuery(html) 95 | for proxy in doc(".table-bordered tr").items(): 96 | ip = proxy("[data-title=IP]").text() 97 | port = proxy("[data-title=PORT]").text() 98 | if ip and port: 99 | yield "http://{}:{}".format(ip, port) 100 | 101 | @staticmethod 102 | @collect_funcs 103 | def crawl_ip3366(): 104 | """ 105 | 云代理：http://www.ip3366.net 106 | """ 107 | url = "http://www.ip3366.net/?stype=1&page={}" 108 | 109 | items = [p for p in range(1, 8)] 110 | for page in items: 111 | html = requests(url.format(page)) 112 | if html: 113 | doc = pyquery.PyQuery(html) 114 | for proxy in doc(".table-bordered tr").items(): 115 | ip = proxy("td:nth-child(1)").text() 116 | port = proxy("td:nth-child(2)").text() 117 | schema = proxy("td:nth-child(4)").text() 118 | if ip and port and schema: 119 | yield "{}://{}:{}".format(schema.lower(), ip, port) 120 | 121 | @staticmethod 122 | @collect_funcs 123 | def crawl_data5u(): 124 | """ 125 | 无忧代理：http://www.data5u.com/ 126 | """ 127 | url = "http://www.data5u.com/" 128 | 129 | html = requests(url) 130 | if html: 131 | doc = pyquery.PyQuery(html) 132 | for index, item in enumerate(doc("li ul").items()): 133 | if index > 0: 134 | ip = item("span:nth-child(1)").text() 135 | port = item("span:nth-child(2)").text() 136 | schema = item("span:nth-child(4)").text() 137 | if ip and port and schema: 138 | yield "{}://{}:{}".format(schema, ip, port) 139 | 140 | @staticmethod 141 | @collect_funcs 142 | def crawl_iphai(): 143 | """ 144 | ip 海代理：http://www.iphai.com 145 | """ 146 | url = "http://www.iphai.com/free/{}" 147 | 148 | items = ["ng", "np", "wg", "wp"] 149 | for proxy_type in items: 150 | html = requests(url.format(proxy_type)) 151 | if html: 152 | doc = pyquery.PyQuery(html) 153 | for item in doc(".table-bordered tr").items(): 154 | ip = item("td:nth-child(1)").text() 155 | port = item("td:nth-child(2)").text() 156 | schema = item("td:nth-child(4)").text().split(",")[0] 157 | if ip and port and schema: 158 | yield "{}://{}:{}".format(schema.lower(), ip, port) 159 | 160 | @staticmethod 161 | @collect_funcs 162 | def crawl_swei360(): 163 | """ 164 | 360 代理：http://www.swei360.com 165 | """ 166 | url = "http://www.swei360.com/free/?stype={}" 167 | 168 | items = [p for p in range(1, 5)] 169 | for proxy_type in items: 170 | html = requests(url.format(proxy_type)) 171 | if html: 172 | doc = pyquery.PyQuery(html) 173 | for item in doc(".table-bordered tr").items(): 174 | ip = item("td:nth-child(1)").text() 175 | port = item("td:nth-child(2)").text() 176 | schema = item("td:nth-child(4)").text() 177 | if ip and port and schema: 178 | yield "{}://{}:{}".format(schema.lower(), ip, port) 179 | 180 | 181 | crawler = Crawler() 182 | -------------------------------------------------------------------------------- /async_proxy_pool/database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import random 5 | 6 | import redis 7 | 8 | from .config import ( 9 | REDIS_KEY, 10 | REDIS_PORT, 11 | REDIS_PASSWORD, 12 | REDIS_HOST, 13 | REDIS_MAX_CONNECTION, 14 | MAX_SCORE, 15 | MIN_SCORE, 16 | INIT_SCORE, 17 | ) 18 | 19 | 20 | class RedisClient: 21 | """ 22 | 代理池依赖了 Redis 数据库，使用了其`有序集合`的数据结构 23 | （可按分数排序，key 值不能重复） 24 | """ 25 | 26 | def __init__(self, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD): 27 | conn_pool = redis.ConnectionPool( 28 | host=host, 29 | port=port, 30 | password=password, 31 | max_connections=REDIS_MAX_CONNECTION, 32 | ) 33 | self.redis = redis.Redis(connection_pool=conn_pool) 34 | 35 | def add_proxy(self, proxy, score=INIT_SCORE): 36 | """ 37 | 新增一个代理，初始化分数 INIT_SCORE < MAX_SCORE，确保在 38 | 运行完收集器后还没运行校验器就获取代理，导致获取到分数虽为 MAX_SCORE, 39 | 但实际上确是未经验证，不可用的代理 40 | 41 | :param proxy: 新增代理 42 | :param score: 初始化分数 43 | """ 44 | if not self.redis.zscore(REDIS_KEY, proxy): 45 | self.redis.zadd(REDIS_KEY, proxy, score) 46 | 47 | def reduce_proxy_score(self, proxy): 48 | """ 49 | 验证未通过，分数减一 50 | 51 | :param proxy: 验证代理 52 | """ 53 | score = self.redis.zscore(REDIS_KEY, proxy) 54 | if score and score > MIN_SCORE: 55 | self.redis.zincrby(REDIS_KEY, proxy, -1) 56 | else: 57 | self.redis.zrem(REDIS_KEY, proxy) 58 | 59 | def increase_proxy_score(self, proxy): 60 | """ 61 | 验证通过，分数加一 62 | 63 | :param proxy: 验证代理 64 | """ 65 | score = self.redis.zscore(REDIS_KEY, proxy) 66 | if score and score < MAX_SCORE: 67 | self.redis.zincrby(REDIS_KEY, proxy, 1) 68 | 69 | def pop_proxy(self): 70 | """ 71 | 返回一个代理 72 | """ 73 | # 第一次尝试取分数最高，也就是最新可用的代理 74 | first_chance = self.redis.zrangebyscore(REDIS_KEY, MAX_SCORE, MAX_SCORE) 75 | if first_chance: 76 | return random.choice(first_chance) 77 | 78 | else: 79 | # 第二次尝试取 7-10 分数的任意一个代理 80 | second_chance = self.redis.zrangebyscore( 81 | REDIS_KEY, MAX_SCORE - 3, MAX_SCORE 82 | ) 83 | if second_chance: 84 | return random.choice(second_chance) 85 | # 最后一次就随便取咯 86 | else: 87 | last_chance = self.redis.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE) 88 | if last_chance: 89 | return random.choice(last_chance) 90 | 91 | def get_proxies(self, count=1): 92 | """ 93 | 返回指定数量代理，分数由高到低排序 94 | 95 | :param count: 代理数量 96 | """ 97 | proxies = self.redis.zrevrange(REDIS_KEY, 0, count - 1) 98 | for proxy in proxies: 99 | yield proxy.decode("utf-8") 100 | 101 | def count_all_proxies(self): 102 | """ 103 | 返回所有代理总数 104 | """ 105 | return self.redis.zcard(REDIS_KEY) 106 | 107 | def count_score_proxies(self, score): 108 | """ 109 | 返回指定分数代理总数 110 | 111 | :param score: 代理分数 112 | """ 113 | if 0 <= score <= 10: 114 | proxies = self.redis.zrangebyscore(REDIS_KEY, score, score) 115 | return len(proxies) 116 | return -1 117 | 118 | def clear_proxies(self, score): 119 | """ 120 | 删除分数小于等于 score 的代理 121 | """ 122 | if 0 <= score <= 10: 123 | proxies = self.redis.zrangebyscore(REDIS_KEY, 0, score) 124 | for proxy in proxies: 125 | self.redis.zrem(REDIS_KEY, proxy) 126 | return True 127 | return False 128 | 129 | def all_proxies(self): 130 | """ 131 | 返回全部代理 132 | """ 133 | return self.redis.zrangebyscore(REDIS_KEY, MIN_SCORE, MAX_SCORE) 134 | -------------------------------------------------------------------------------- /async_proxy_pool/logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import logging 5 | 6 | 7 | def get_logger(): 8 | """ 9 | 创建日志实例 10 | """ 11 | formatter = logging.Formatter("%(asctime)s - %(message)s") 12 | logger = logging.getLogger("monitor") 13 | logger.setLevel(logging.INFO) 14 | 15 | ch = logging.StreamHandler() 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | return logger 19 | 20 | 21 | logger = get_logger() 22 | -------------------------------------------------------------------------------- /async_proxy_pool/scheduler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import time 5 | 6 | import schedule 7 | 8 | from .config import CRAWLER_RUN_CYCLE, VALIDATOR_RUN_CYCLE 9 | 10 | from .crawler import crawler 11 | from .validator import validator 12 | from .logger import logger 13 | 14 | 15 | def run_schedule(): 16 | """ 17 | 启动客户端 18 | """ 19 | # 启动收集器 20 | schedule.every(CRAWLER_RUN_CYCLE).minutes.do(crawler.run).run() 21 | # 启动验证器 22 | schedule.every(VALIDATOR_RUN_CYCLE).minutes.do(validator.run).run() 23 | 24 | while True: 25 | try: 26 | schedule.run_pending() 27 | time.sleep(1) 28 | except KeyboardInterrupt: 29 | logger.info("You have canceled all jobs") 30 | return 31 | -------------------------------------------------------------------------------- /async_proxy_pool/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import asyncio 5 | 6 | import aiohttp 7 | 8 | from .config import HEADERS, REQUEST_TIMEOUT, REQUEST_DELAY 9 | 10 | 11 | LOOP = asyncio.get_event_loop() 12 | 13 | 14 | async def _get_page(url, sleep): 15 | """ 16 | 获取并返回网页内容 17 | """ 18 | async with aiohttp.ClientSession() as session: 19 | try: 20 | await asyncio.sleep(sleep) 21 | async with session.get( 22 | url, headers=HEADERS, timeout=REQUEST_TIMEOUT 23 | ) as resp: 24 | return await resp.text() 25 | except: 26 | return "" 27 | 28 | 29 | def requests(url, sleep=REQUEST_DELAY): 30 | """ 31 | 请求方法，用于获取网页内容 32 | 33 | :param url: 请求链接 34 | :param sleep: 延迟时间（秒） 35 | """ 36 | html = LOOP.run_until_complete(asyncio.gather(_get_page(url, sleep))) 37 | if html: 38 | return "".join(html) 39 | -------------------------------------------------------------------------------- /async_proxy_pool/validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import os 5 | import asyncio 6 | 7 | import aiohttp 8 | 9 | from .config import VALIDATOR_BASE_URL, VALIDATOR_BATCH_COUNT, REQUEST_TIMEOUT 10 | from .logger import logger 11 | from .database import RedisClient 12 | 13 | 14 | VALIDATOR_BASE_URL = os.environ.get("VALIDATOR_BASE_URL") or VALIDATOR_BASE_URL 15 | 16 | 17 | class Validator: 18 | def __init__(self): 19 | self.redis = RedisClient() 20 | 21 | async def test_proxy(self, proxy): 22 | """ 23 | 测试代理 24 | 25 | :param proxy: 指定代理 26 | """ 27 | async with aiohttp.ClientSession() as session: 28 | try: 29 | if isinstance(proxy, bytes): 30 | proxy = proxy.decode("utf8") 31 | async with session.get( 32 | VALIDATOR_BASE_URL, proxy=proxy, timeout=REQUEST_TIMEOUT 33 | ) as resp: 34 | if resp.status == 200: 35 | self.redis.increase_proxy_score(proxy) 36 | logger.info("Validator √ {}".format(proxy)) 37 | else: 38 | self.redis.reduce_proxy_score(proxy) 39 | logger.info("Validator × {}".format(proxy)) 40 | except: 41 | self.redis.reduce_proxy_score(proxy) 42 | logger.info("Validator × {}".format(proxy)) 43 | 44 | def run(self): 45 | """ 46 | 启动校验器 47 | """ 48 | logger.info("Validator working...") 49 | logger.info("Validator website is {}".format(VALIDATOR_BASE_URL)) 50 | proxies = self.redis.all_proxies() 51 | loop = asyncio.get_event_loop() 52 | for i in range(0, len(proxies), VALIDATOR_BATCH_COUNT): 53 | _proxies = proxies[i : i + VALIDATOR_BATCH_COUNT] 54 | tasks = [self.test_proxy(proxy) for proxy in _proxies] 55 | if tasks: 56 | loop.run_until_complete(asyncio.wait(tasks)) 57 | logger.info("Validator resting...") 58 | 59 | 60 | validator = Validator() 61 | -------------------------------------------------------------------------------- /async_proxy_pool/webapi_flask.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from flask import Flask, jsonify 5 | from async_proxy_pool.database import RedisClient 6 | 7 | app = Flask(__name__) 8 | redis_conn = RedisClient() 9 | 10 | 11 | @app.route("/") 12 | def index(): 13 | return jsonify({"Welcome": "This is a proxy pool system."}) 14 | 15 | 16 | @app.route("/pop") 17 | def pop_proxy(): 18 | proxy = redis_conn.pop_proxy().decode("utf8") 19 | if proxy[:5] == "https": 20 | return jsonify({"https": proxy}) 21 | else: 22 | return jsonify({"http": proxy}) 23 | 24 | 25 | @app.route("/get/") 26 | def get_proxy(count): 27 | res = [] 28 | for proxy in redis_conn.get_proxies(count): 29 | if proxy[:5] == "https": 30 | res.append({"https": proxy}) 31 | else: 32 | res.append({"http": proxy}) 33 | return jsonify(res) 34 | 35 | 36 | @app.route("/count") 37 | def count_all_proxies(): 38 | count = redis_conn.count_all_proxies() 39 | return jsonify({"count": str(count)}) 40 | 41 | 42 | @app.route("/count/") 43 | def count_score_proxies(score): 44 | count = redis_conn.count_score_proxies(score) 45 | return jsonify({"count": str(count)}) 46 | 47 | 48 | @app.route("/clear/") 49 | def clear_proxies(score): 50 | if redis_conn.clear_proxies(score): 51 | return jsonify({"Clear": "Successful"}) 52 | return jsonify({"Clear": "Score should >= 0 and <= 10"}) 53 | -------------------------------------------------------------------------------- /async_proxy_pool/webapi_sanic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from sanic import Sanic 5 | from sanic.response import json 6 | 7 | from async_proxy_pool.database import RedisClient 8 | 9 | app = Sanic() 10 | redis_conn = RedisClient() 11 | 12 | 13 | @app.route("/") 14 | async def index(request): 15 | return json({"Welcome": "This is a proxy pool system."}) 16 | 17 | 18 | @app.route("/pop") 19 | async def pop_proxy(request): 20 | proxy = redis_conn.pop_proxy().decode("utf8") 21 | if proxy[:5] == "https": 22 | return json({"https": proxy}) 23 | else: 24 | return json({"http": proxy}) 25 | 26 | 27 | @app.route("/get/") 28 | async def get_proxy(request, count): 29 | res = [] 30 | for proxy in redis_conn.get_proxies(count): 31 | if proxy[:5] == "https": 32 | res.append({"https": proxy}) 33 | else: 34 | res.append({"http": proxy}) 35 | return json(res) 36 | 37 | 38 | @app.route("/count") 39 | async def count_all_proxies(request): 40 | count = redis_conn.count_all_proxies() 41 | return json({"count": str(count)}) 42 | 43 | 44 | @app.route("/count/") 45 | async def count_score_proxies(request, score): 46 | count = redis_conn.count_score_proxies(score) 47 | return json({"count": str(count)}) 48 | 49 | 50 | @app.route("/clear/") 51 | async def clear_proxies(request, score): 52 | if redis_conn.clear_proxies(score): 53 | return json({"Clear": "Successful"}) 54 | return json({"Clear": "Score should >= 0 and <= 10"}) 55 | -------------------------------------------------------------------------------- /client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from async_proxy_pool.scheduler import run_schedule 5 | 6 | 7 | run_schedule() 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | redis<=2.10.6 2 | aiohttp 3 | schedule 4 | pyquery 5 | requests 6 | flask 7 | sanic;sys_platform!='win32' 8 | -------------------------------------------------------------------------------- /server_flask.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from async_proxy_pool.webapi_flask import app 5 | from async_proxy_pool.config import SERVER_HOST, SERVER_PORT, SERVER_ACCESS_LOG 6 | 7 | # 启动服务端 Flask app 8 | app.run(host=SERVER_HOST, port=SERVER_PORT, debug=SERVER_ACCESS_LOG) 9 | -------------------------------------------------------------------------------- /server_sanic.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from async_proxy_pool.webapi_sanic import app 5 | from async_proxy_pool.config import SERVER_HOST, SERVER_PORT, SERVER_ACCESS_LOG 6 | 7 | # 启动服务端 Sanic app 8 | app.run(host=SERVER_HOST, port=SERVER_PORT, access_log=SERVER_ACCESS_LOG) 9 | -------------------------------------------------------------------------------- /test/test_proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import os 5 | import random 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | import requests 9 | 10 | 11 | HEADERS = { 12 | "X-Requested-With": "XMLHttpRequest", 13 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " 14 | "(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36", 15 | } 16 | 17 | SUCCESS = 0 18 | FAIL = 0 19 | TIMEOUT = 15 20 | 21 | TEST_COUNT = os.environ.get("TEST_COUNT") or 1000 22 | TEST_WEBSITE = os.environ.get("TEST_WEBSITE") or "https://zhihu.com" 23 | TEST_PROXIES = os.environ.get("TEST_PROXIES") or "http://localhost:3289/get/20" 24 | 25 | 26 | def get_proxies(): 27 | _proxies = requests.get(TEST_PROXIES, timeout=TIMEOUT).json() 28 | for proxy in _proxies: 29 | if "http" in proxy.keys(): 30 | proxy["https"] = proxy["http"] 31 | return _proxies 32 | 33 | 34 | def test_one_proxy(proxy): 35 | global SUCCESS, FAIL 36 | try: 37 | req = requests.get( 38 | TEST_WEBSITE, proxies=proxy, timeout=TIMEOUT, headers=HEADERS 39 | ) 40 | if req.status_code == 200: 41 | SUCCESS += 1 42 | else: 43 | FAIL += 1 44 | except: 45 | FAIL += 1 46 | 47 | 48 | if __name__ == "__main__": 49 | proxies = get_proxies() 50 | tasks = [random.choice(proxies) for _ in range(int(TEST_COUNT))] 51 | with ThreadPoolExecutor(max_workers=64) as executor: 52 | executor.map(test_one_proxy, tasks) 53 | print("测试代理：", TEST_PROXIES) 54 | print("测试网站：", TEST_WEBSITE) 55 | print("测试次数：", TEST_COUNT) 56 | print("成功次数：", SUCCESS) 57 | print("失败次数：", FAIL) 58 | print("成功率：", SUCCESS / TEST_COUNT) 59 | --------------------------------------------------------------------------------