├── 1024
    ├── requestments.txt
    └── new1024spider.py
├── 91user
    ├── config
    │   ├── __init__.py
    │   └── uids.py
    ├── run.py
    └── user.py
├── kuaishou
    ├── config
    │   ├── __init__.py
    │   ├── user.py
    │   └── ua_mobile.txt
    ├── lib
    │   ├── __init__.py
    │   └── crawler.py
    ├── run.py
    ├── ks_down.py
    └── ks_video.py
├── proxy_pool
    ├── _config.yml
    ├── cli
    │   ├── start.sh
    │   └── proxyPool.py
    ├── .travis.yml
    ├── requirements.txt
    ├── doc
    │   ├── block_ips.md
    │   ├── release_notes.md
    │   └── introduce.md
    ├── docker-compose.yml
    ├── Config
    │   ├── __init__.py
    │   ├── ConfigGetter.py
    │   └── setting.py
    ├── DB
    │   ├── __init__.py
    │   ├── MongodbClient.py
    │   ├── DbClient.py
    │   ├── RedisClient.py
    │   └── SsdbClient.py
    ├── Dockerfile
    ├── ProxyGetter
    │   ├── __init__.py
    │   ├── CheckProxy.py
    │   └── getFreeProxy.py
    ├── Test
    │   ├── __init__.py
    │   ├── testProxyClass.py
    │   ├── testWebRequest.py
    │   ├── testConfig.py
    │   ├── testLogHandler.py
    │   └── testGetFreeProxy.py
    ├── __init__.py
    ├── Api
    │   ├── __init__.py
    │   └── ProxyApi.py
    ├── Manager
    │   ├── __init__.py
    │   └── ProxyManager.py
    ├── test.py
    ├── ProxyHelper
    │   ├── __init__.py
    │   ├── ProxyUtil.py
    │   └── Proxy.py
    ├── Util
    │   ├── __init__.py
    │   ├── utilClass.py
    │   ├── utilFunction.py
    │   ├── LogHandler.py
    │   └── WebRequest.py
    ├── Schedule
    │   ├── __init__.py
    │   ├── ProxyScheduler.py
    │   ├── UsefulProxyCheck.py
    │   └── RawProxyCheck.py
    ├── LICENSE
    └── README.md
├── requirement.txt
├── README.md
├── LICENSE
├── qicai
    ├── qicai_top50.py
    └── QicaiCategoriesSpider.py
├── umei
    └── app.py
├── cmanuf
    └── download.py
├── cableav.py
├── baiduMap
    └── baiduMap.py
├── yasee1
    └── run.py
└── tuao8
    └── crawler.py


/91user/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kuaishou/config/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/kuaishou/lib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/proxy_pool/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-time-machine


--------------------------------------------------------------------------------
/proxy_pool/cli/start.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python proxyPool.py webserver &
3 | python proxyPool.py schedule


--------------------------------------------------------------------------------
/1024/requestments.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.6.3
2 | certifi==2022.12.7
3 | chardet==3.0.4
4 | idna==2.7
5 | lxml==4.9.1
6 | requests==2.20.1
7 | urllib3==1.26.5
8 | 


--------------------------------------------------------------------------------
/91user/config/uids.py:
--------------------------------------------------------------------------------
1 | USERS_UID = [
2 |     '3637DMj5U2Y7YRyzO9oivHdmcoRn6Cz38oR7yh9jrTonY4AM',
3 |     '787cUGTgFxeUcKp9wAODVVRi35IDVLjNjygNSkyXcSZfdfmZ'
4 | ]


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.6.3
2 | certifi==2022.12.7
3 | chardet==3.0.4
4 | idna==2.7
5 | lxml==4.9.1
6 | pymongo==3.7.2
7 | requests==2.20.0
8 | urllib3==1.26.5
9 | 


--------------------------------------------------------------------------------
/proxy_pool/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - 2.7
 4 |   # - nightly
 5 | os:
 6 |   - linux
 7 | install:
 8 |   - pip install -r requirements.txt
 9 | 
10 | script: python test.py


--------------------------------------------------------------------------------
/proxy_pool/requirements.txt:
--------------------------------------------------------------------------------
 1 | APScheduler==3.2.0
 2 | werkzeug==2.2.3
 3 | Flask==1.0
 4 | requests==2.20.0
 5 | lxml==4.9.1
 6 | PyExecJS==1.5.1
 7 | click==7.0
 8 | gunicorn==19.9.0
 9 | pymongo
10 | redis
11 | 


--------------------------------------------------------------------------------
/proxy_pool/doc/block_ips.md:
--------------------------------------------------------------------------------
1 | | block IP |  block 日期  |  msg |
2 | | -----   |  ---- | --------  |
3 | |   144.52.45.149   |  20190815  | 恶意访问 |
4 | |  39.100.153.226   |  20190816  | 恶意访问 |
5 | |  47.102.47.42   |  20190819  | 恶意访问 |
6 | |  125.71.211.125 |  20190820  | 恶意访问 |
7 | 
8 | 如需正常访问请提issues说明
9 | 


--------------------------------------------------------------------------------
/proxy_pool/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '2'
 2 | services:
 3 |   proxy_pool:
 4 |     build: .
 5 |     ports:
 6 |       - "5010:5010"
 7 |     links:
 8 |       - proxy_redis
 9 |     environment:
10 |       db_type: SSDB
11 |       ssdb_host: proxy_redis
12 |       ssdb_port: 6379
13 |   proxy_redis:
14 |     image: "redis"
15 | 


--------------------------------------------------------------------------------
/proxy_pool/Config/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/2/15
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/2/15:
11 | -------------------------------------------------
12 | """
13 | 


--------------------------------------------------------------------------------
/proxy_pool/DB/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/12/2
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/12/2: 
11 | -------------------------------------------------
12 | """


--------------------------------------------------------------------------------
/proxy_pool/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | 
 3 | MAINTAINER jhao104 <j_hao104@163.com>
 4 | 
 5 | ENV TZ Asia/Shanghai
 6 | 
 7 | WORKDIR /usr/src/app
 8 | 
 9 | COPY ./requirements.txt .
10 | 
11 | RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
12 | 
13 | COPY . .
14 | 
15 | EXPOSE 5010
16 | 
17 | WORKDIR /usr/src/app/cli
18 | 
19 | ENTRYPOINT [ "sh", "start.sh" ]
20 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyGetter/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/11/25
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/11/25: 
11 | -------------------------------------------------
12 | """


--------------------------------------------------------------------------------
/proxy_pool/Test/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/2/15
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/2/15:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'


--------------------------------------------------------------------------------
/proxy_pool/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/12/3
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/12/3: 
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'


--------------------------------------------------------------------------------
/proxy_pool/Api/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/12/3
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/12/3: 
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | 


--------------------------------------------------------------------------------
/proxy_pool/Manager/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/12/3
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/12/3: 
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from Manager.ProxyManager import ProxyManager
16 | 


--------------------------------------------------------------------------------
/proxy_pool/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     test.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2017/3/7
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/3/7: 
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from Test import testConfig
16 | 
17 | if __name__ == '__main__':
18 |     testConfig.testConfig()
19 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyHelper/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/7/11
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/7/11:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from ProxyHelper.Proxy import Proxy
16 | from ProxyHelper.ProxyUtil import checkProxyUseful
17 | 


--------------------------------------------------------------------------------
/proxy_pool/Util/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/11/25
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/11/25: 
11 | -------------------------------------------------
12 | """
13 | 
14 | from Util.utilFunction import validUsefulProxy
15 | from Util.LogHandler import LogHandler
16 | from Util.utilClass import Singleton
17 | 


--------------------------------------------------------------------------------
/proxy_pool/Schedule/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     __init__.py.py  
 5 |    Description :  
 6 |    Author :       JHao
 7 |    date：          2016/12/3
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2016/12/3: 
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from Schedule.RawProxyCheck import doRawProxyCheck
16 | from Schedule.UsefulProxyCheck import doUsefulProxyCheck
17 | 


--------------------------------------------------------------------------------
/kuaishou/run.py:
--------------------------------------------------------------------------------
 1 | from lib.crawler import Kuaishou
 2 | from time import sleep
 3 | from config.user import users
 4 | 
 5 | file_list = []  #创建一个空列表
 6 | def out_file(input_file,out_file):
 7 |     with open(input_file, "r") as f:
 8 |         file_2 = f.readlines()
 9 |         for file in file_2:
10 |             file_list.append(file)
11 |         out_file1 = set(file_list)    #set()函数可以自动过滤掉重复元素
12 |         last_out_file = list(out_file1)
13 |         for out in last_out_file:
14 |             with open(out_file,"a+",encoding="utf-8") as f:   #去重后文件写入文件里
15 |                 f.write(out)
16 |                 print(out)
17 | 
18 | def run():
19 |     app = Kuaishou()
20 |     for i in users:
21 |         app.setUid(i)
22 |         sleep(10)
23 | 
24 |     out_file('data/data.txt', 'data.txt')
25 | 
26 | if __name__ == '__main__':
27 |     run()
28 | 


--------------------------------------------------------------------------------
/proxy_pool/Test/testProxyClass.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     testProxyClass
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/8/8
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/8:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | import json
16 | from ProxyHelper import Proxy
17 | 
18 | 
19 | def testProxyClass():
20 |     proxy = Proxy("127.0.0.1:8080")
21 | 
22 |     print(proxy.info_dict)
23 | 
24 |     proxy.source = "test"
25 | 
26 |     proxy_str = json.dumps(proxy.info_dict, ensure_ascii=False)
27 | 
28 |     print(proxy_str)
29 | 
30 |     print(Proxy.newProxyFromJson(proxy_str).info_dict)
31 | 
32 | 
33 | testProxyClass()
34 | 


--------------------------------------------------------------------------------
/kuaishou/config/user.py:
--------------------------------------------------------------------------------
 1 | temp = [
 2 |     'qiuqiuya0708',
 3 |     'y0485201314',
 4 |     'aiwo33442528',
 5 |     'hellowuzi',
 6 |     'lg25802468',
 7 |     'flxiaohuxian520',
 8 |     'MB667890',
 9 |     '3xbyb7qjchwgeza',
10 |     'xiaoyun2121',
11 |     'mdxiangbei',
12 |     'dagouxingzuo',
13 |     'dear521_',
14 |     'sunyongfei',
15 |     'jin970608',
16 |     'Zr520976',
17 |     '3xmknin32j59p9w',
18 |     '3xynx4v3d3yjnxc',
19 |     'xy15705818104',
20 |     'xue888881',
21 |     '3x39f99nqet3m9e',
22 |     'HTMB20201212',
23 |     'kuailexiaoni',
24 |     'TTai569-',
25 |     'Sd543318617',
26 |     '3xtt8swc7idnnb9',
27 |     '3xiepavgtpfasxa',
28 |     'jzwnh666',
29 |     '3xi7ts3hndvw83g',
30 |     'xy5201314',
31 |     'C130748359',
32 | ]
33 | 
34 | users = [
35 |     'flxiaohuxian520',
36 |     'xiaoyun2121',
37 |     'dagouxingzuo',
38 |     'hellowuzi',
39 | ]


--------------------------------------------------------------------------------
/proxy_pool/Test/testWebRequest.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     testWebRequest
 5 |    Description :   test class WebRequest
 6 |    Author :        J_hao
 7 |    date：          2017/7/31
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/7/31: function testWebRequest
11 | -------------------------------------------------
12 | """
13 | __author__ = 'J_hao'
14 | 
15 | from Util.WebRequest import WebRequest
16 | 
17 | 
18 | # noinspection PyPep8Naming
19 | def testWebRequest():
20 |     """
21 |     test class WebRequest in Util/WebRequest.py
22 |     :return:
23 |     """
24 |     wr = WebRequest()
25 |     request_object = wr.get('https://www.baidu.com/')
26 |     assert request_object.status_code == 200
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     testWebRequest()
31 | 


--------------------------------------------------------------------------------
/proxy_pool/Test/testConfig.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     testGetConfig
 5 |    Description :   testGetConfig
 6 |    Author :        J_hao
 7 |    date：          2017/7/31
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/7/31:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'J_hao'
14 | 
15 | from Config.ConfigGetter import config
16 | 
17 | 
18 | # noinspection PyPep8Naming
19 | def testConfig():
20 |     """
21 |     :return:
22 |     """
23 |     print(config.db_type)
24 |     print(config.db_name)
25 |     print(config.db_host)
26 |     print(config.db_port)
27 |     print(config.db_password)
28 |     assert isinstance(config.proxy_getter_functions, list)
29 |     print(config.proxy_getter_functions)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     testConfig()
34 | 


--------------------------------------------------------------------------------
/proxy_pool/Test/testLogHandler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     testLogHandler
 5 |    Description :
 6 |    Author :        J_hao
 7 |    date：          2017/8/2
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/8/2:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'J_hao'
14 | 
15 | from Util.LogHandler import LogHandler
16 | 
17 | 
18 | # noinspection PyPep8Naming
19 | def testLogHandler():
20 |     """
21 |     test function LogHandler  in Util/LogHandler
22 |     :return:
23 |     """
24 |     log = LogHandler('test')
25 |     log.info('this is a log from test')
26 | 
27 |     log.resetName(name='test1')
28 |     log.info('this is a log from test1')
29 | 
30 |     log.resetName(name='test2')
31 |     log.info('this is a log from test2')
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     testLogHandler()
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## 废话连篇
 2 | 
 3 | 
 4 | 爬虫自给自足
 5 | 
 6 | 使用Python3完成
 7 | 
 8 | **注：爬虫大多具有时效性，所以早期上传的不一定能用**
 9 | 
10 | 
11 | 
12 | 这个readme我也是写了又删，删了又写。曾经一度不想更新（害，主要是懒）
13 | 
14 | 现在更新这篇也是单纯的因为太闲了。
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | ## 依赖包
22 | 
23 | 有的可能需要以下包，加粗表示必须滴（技术太菜，只能依赖这些包来搞一搞）
24 | 
25 | 
26 | - **requests** 
27 | 
28 | - **Beautifulsoup4**
29 | 
30 | - pymongo
31 | 
32 | - fake_UserAgent
33 | 
34 | - pymysql
35 | 
36 | 
37 | 
38 | ## 目录
39 | 
40 | - **1024**： 数字社区的图片
41 | 
42 | - **baiduMap**： 简单调用百度地图的api完成区域类的信息检索，需要用到开发账号
43 | 
44 | - **cmanuf**：机械工业出版社的pdf下载？**烂尾，bug太多，不修了**
45 | 
46 | - ~~**novel**：盗版小说的爬虫...存储到数据库中~~
47 | 
48 | - **qicai**：七彩英语（英文原著）的PDF下载
49 | 
50 | - **umei**： 批量下载图片
51 | 
52 | - **kuaishou**: 关键词：快手、无水印、解析、下载
53 | 
54 | - ~~**yasee1**：网站倒闭了~~
55 | 
56 | - **proxy_pool**：代理池源自[jhao104/proxy_pool](https://github.com/jhao104/proxy_pool/)
57 | 
58 | - **tuao8**: 一个小姐姐的图片下载爬虫
59 | 
60 | - **91user:** 传入UID解析视频m3u8播放链接
61 | 
62 | - **cableav.py** 一个神奇的网站。传入列表页，解析M3U8地址并存储在本地 
63 | 
64 | 


--------------------------------------------------------------------------------
/proxy_pool/doc/release_notes.md:
--------------------------------------------------------------------------------
 1 | ## Release Notes
 2 | 
 3 | * master
 4 | 
 5 |     1. 新增免费代理源 `西拉代理`  （2020-03-30）
 6 | 
 7 | * 2.0.1 
 8 | 
 9 |     1. 新增免费代理源 `89免费代理`;
10 |     2. 新增免费代理源 `齐云代理` 
11 |     
12 | * 2.0.0 (201908)
13 | 
14 |     1. WebApi集成Gunicorn方式启动, Windows平台暂不支持;
15 |     2. 优化Proxy调度程序;
16 |     3. 扩展Proxy属性;
17 |     4. 提供cli工具, 更加方便启动proxyPool
18 |     
19 | * 1.14 (2019.07)
20 | 
21 |     1. 修复`ProxyValidSchedule`假死bug,原因是Queue阻塞;
22 |     2. 修改代理源 `云代理` 抓取;
23 |     3. 修改代理源 `码农代理` 抓取;
24 |     4. 修改代理源 `代理66` 抓取, 引入 `PyExecJS` 模块破解加速乐动态Cookies加密;
25 |     
26 | * 1.13 (2019.02)
27 | 
28 |   1.使用.py文件替换.ini作为配置文件；
29 |   
30 |   2.更新代理采集部分；
31 |   
32 | * 1.12 (2018.4)
33 | 
34 |   1.优化代理格式检查;
35 | 
36 |   2.增加代理源;
37 | 
38 |   3.fix bug [#122](https://github.com/jhao104/proxy_pool/issues/122) [#126](https://github.com/jhao104/proxy_pool/issues/126)
39 | 
40 | * 1.11 (2017.8)
41 | 
42 | 　　1.使用多线程验证useful_pool;
43 | 
44 | * 1.10 (2016.11)
45 | 
46 | 　　1. 第一版；
47 | 
48 | 　　2. 支持PY2/PY3;
49 | 
50 | 　　3. 代理池基本功能；


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Thompson.Lin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/proxy_pool/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 J_hao104
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/proxy_pool/Util/utilClass.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # !/usr/bin/env python
 3 | """
 4 | -------------------------------------------------
 5 |    File Name：     utilClass.py  
 6 |    Description :  tool class
 7 |    Author :       JHao
 8 |    date：          2016/12/3
 9 | -------------------------------------------------
10 |    Change Activity:
11 |                    2016/12/3: Class LazyProperty
12 | -------------------------------------------------
13 | """
14 | __author__ = 'JHao'
15 | 
16 | 
17 | class LazyProperty(object):
18 |     """
19 |     LazyProperty
20 |     explain: http://www.spiderpy.cn/blog/5/
21 |     """
22 | 
23 |     def __init__(self, func):
24 |         self.func = func
25 | 
26 |     def __get__(self, instance, owner):
27 |         if instance is None:
28 |             return self
29 |         else:
30 |             value = self.func(instance)
31 |             setattr(instance, self.func.__name__, value)
32 |             return value
33 | 
34 | 
35 | class Singleton(type):
36 |     """
37 |     Singleton Metaclass
38 |     """
39 | 
40 |     _inst = {}
41 | 
42 |     def __call__(cls, *args, **kwargs):
43 |         if cls not in cls._inst:
44 |             cls._inst[cls] = super(Singleton, cls).__call__(*args)
45 |         return cls._inst[cls]
46 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyHelper/ProxyUtil.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     ProxyHelper
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/8/8
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/8:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from Util import validUsefulProxy
16 | 
17 | from datetime import datetime
18 | 
19 | 
20 | def checkProxyUseful(proxy_obj):
21 |     """
22 |     检测代理是否可用
23 |     :param proxy_obj: Proxy object
24 |     :return: Proxy object, status
25 |     """
26 | 
27 |     if validUsefulProxy(proxy_obj.proxy):
28 |         # 检测通过 更新proxy属性
29 |         proxy_obj.check_count += 1
30 |         proxy_obj.last_status = 1
31 |         proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
32 |         if proxy_obj.fail_count > 0:
33 |             proxy_obj.fail_count -= 1
34 |         return proxy_obj, True
35 |     else:
36 |         proxy_obj.check_count += 1
37 |         proxy_obj.last_status = 0
38 |         proxy_obj.last_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
39 |         proxy_obj.fail_count += 1
40 |         return proxy_obj, False
41 | 


--------------------------------------------------------------------------------
/proxy_pool/cli/proxyPool.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     proxy_pool
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/8/2
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/2:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | import sys
16 | import click
17 | import platform
18 | 
19 | sys.path.append('../')
20 | 
21 | from Config.setting import HEADER
22 | from Schedule.ProxyScheduler import runScheduler
23 | from Api.ProxyApi import runFlask,runFlaskWithGunicorn
24 | 
25 | CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
26 | 
27 | 
28 | @click.group(context_settings=CONTEXT_SETTINGS)
29 | @click.version_option(version='2.0.0')
30 | def cli():
31 |     """ProxyPool cli工具"""
32 | 
33 | 
34 | @cli.command(name="schedule")
35 | def schedule():
36 |     """ 启动调度程序 """
37 |     click.echo(HEADER)
38 |     runScheduler()
39 | 
40 | 
41 | @cli.command(name="webserver")
42 | def schedule():
43 |     """ 启动web服务 """
44 |     click.echo(HEADER)
45 |     if platform.system() == "Windows":
46 |         runFlask()
47 |     else:
48 |         runFlaskWithGunicorn()
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     cli()
53 | 


--------------------------------------------------------------------------------
/kuaishou/ks_down.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from multiprocessing import Pool
 3 | from fake_useragent import UserAgent
 4 | import time
 5 | import os
 6 | 
 7 | 
 8 | video_path ='./video/'
 9 | 
10 | UA = UserAgent()
11 | 
12 | headers = {
13 |     'Connection': 'close',
14 |     'User-Agent':UA.random
15 | }
16 | 
17 | def download(url):
18 |     video_name = url[-24:]
19 |     if os.path.exists(video_path+video_name) == True:
20 |         print(video_name + ' 视频已存在，跳过')
21 |         pass
22 |     else:
23 |         try:
24 |             req = requests.get(url,headers=headers)
25 |             req.raise_for_status()
26 |             req.close()
27 |             with open(video_path + video_name,'wb') as f:
28 |                 f.write(req.content)
29 |                 f.close()
30 |             print(str(video_name) + ' ~下载完成！')
31 |         except Exception as code:
32 |             print(code)
33 |             return None
34 | 
35 | if __name__ == '__main__':
36 |     start_time = time.time()
37 |     pool = Pool(8)
38 |     with open('./20200320.txt', 'r') as f:
39 |         for line in f:
40 |             line = line.split('?')[0]
41 |             line = line.strip('/\n')
42 |             pool.apply_async(download(line))
43 |         pool.close()
44 |         pool.join()
45 | 
46 |     end_time = time.time()
47 |     print('下载完成，总耗时：%s' % (end_time - start_time))


--------------------------------------------------------------------------------
/proxy_pool/Test/testGetFreeProxy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     testGetFreeProxy
 5 |    Description :   test model ProxyGetter/getFreeProxy
 6 |    Author :        J_hao
 7 |    date：          2017/7/31
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/7/31:function testGetFreeProxy
11 | -------------------------------------------------
12 | """
13 | __author__ = 'J_hao'
14 | 
15 | 
16 | from ProxyGetter.getFreeProxy import GetFreeProxy
17 | from Config.ConfigGetter import config
18 | 
19 | 
20 | def testGetFreeProxy():
21 |     """
22 |     test class GetFreeProxy in ProxyGetter/GetFreeProxy
23 |     :return:
24 |     """
25 |     proxy_getter_functions = config.proxy_getter_functions
26 |     for proxyGetter in proxy_getter_functions:
27 |         proxy_count = 0
28 |         for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
29 |             if proxy:
30 |                 print('{func}: fetch proxy {proxy},proxy_count:{proxy_count}'.format(func=proxyGetter, proxy=proxy,
31 |                                                                                      proxy_count=proxy_count))
32 |                 proxy_count += 1
33 |         # assert proxy_count >= 20, '{} fetch proxy fail'.format(proxyGetter)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     testGetFreeProxy()
38 | 


--------------------------------------------------------------------------------
/proxy_pool/Schedule/ProxyScheduler.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     ProxyScheduler
 5 |    Description :
 6 |    Author :        JHao
 7 |    date：          2019/8/5
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/5: ProxyScheduler
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | import sys
16 | from apscheduler.schedulers.blocking import BlockingScheduler
17 | 
18 | sys.path.append('../')
19 | 
20 | from Schedule import doRawProxyCheck, doUsefulProxyCheck
21 | from Manager import ProxyManager
22 | from Util import LogHandler
23 | 
24 | 
25 | class DoFetchProxy(ProxyManager):
26 |     """ fetch proxy"""
27 | 
28 |     def __init__(self):
29 |         ProxyManager.__init__(self)
30 |         self.log = LogHandler('fetch_proxy')
31 | 
32 |     def main(self):
33 |         self.log.info("start fetch proxy")
34 |         self.fetch()
35 |         self.log.info("finish fetch proxy")
36 | 
37 | 
38 | def rawProxyScheduler():
39 |     DoFetchProxy().main()
40 |     doRawProxyCheck()
41 | 
42 | 
43 | def usefulProxyScheduler():
44 |     doUsefulProxyCheck()
45 | 
46 | 
47 | def runScheduler():
48 |     rawProxyScheduler()
49 |     usefulProxyScheduler()
50 | 
51 |     scheduler_log = LogHandler("scheduler_log")
52 |     scheduler = BlockingScheduler(logger=scheduler_log)
53 | 
54 |     scheduler.add_job(rawProxyScheduler, 'interval', minutes=5, id="raw_proxy_check", name="raw_proxy定时采集")
55 |     scheduler.add_job(usefulProxyScheduler, 'interval', minutes=1, id="useful_proxy_check", name="useful_proxy定时检查")
56 | 
57 |     scheduler.start()
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     runScheduler()
62 | 


--------------------------------------------------------------------------------
/91user/run.py:
--------------------------------------------------------------------------------
 1 | from user import User, ClientSqlite
 2 | from config.uids import USERS_UID
 3 | import json
 4 | 
 5 | db = ClientSqlite()
 6 | 
 7 | 
 8 | def fetchall_table(uid):
 9 |     sql = '''SELECT data FROM users WHERE uid = ('{}')'''.format(uid)
10 |     result = db.fetchall_table(sql)
11 |     if result != None:
12 |         result = result[0]
13 |         data = json.loads(result[0])
14 |         data_num = len(data)
15 |         return {'data': data, 'data_num': int(data_num)}
16 |     else:
17 |         return None
18 | 
19 | def insert_table(data):
20 |     uid = data['uid']
21 |     name = data['name']
22 |     videos = json.dumps(data['data'])
23 |     sql = '''INSERT INTO users(uid, name,data) VALUES('{0}','{1}','{2}')'''.format(uid, name, videos)
24 |     db.insert_update_table(sql)
25 | 
26 | def update_table(data):
27 |     uid = data['uid']
28 |     name = data['name']
29 |     videos = json.dumps(data['data'])
30 |     sql = '''UPDATE users SET data = ('{0}') WHERE uid = "{1}"'''.format(videos, uid)
31 |     db.insert_update_table(sql)
32 | 
33 | 
34 | 
35 | if __name__ == "__main__":
36 |     for i in USERS_UID:
37 |         user = User(i)
38 |         public_data = user.public_data()
39 |         public_num = public_data['public_video']
40 |         local_data = fetchall_table(i)
41 |         if local_data != None and public_num > local_data['data_num']:
42 |             # if public_num > local_data['data_num']
43 |             user_data = user.parse_video()
44 |             update_table(user_data)
45 |             print('数据更新.....\n')
46 |         elif  local_data != None and public_num == local_data['data_num']:
47 |             print('公开视频与本地数据相符')
48 |             pass
49 |         else:
50 |             user_data = user.parse_video()
51 |             insert_table(user_data)
52 |             print('数据新增.....\n')
53 |     db.close_conn()


--------------------------------------------------------------------------------
/proxy_pool/Config/ConfigGetter.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     ConfigGetter
 5 |    Description :   读取配置
 6 |    Author :        JHao
 7 |    date：          2019/2/15
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/2/15:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | 
16 | from Util.utilClass import LazyProperty
17 | from Config.setting import *
18 | 
19 | 
20 | class ConfigGetter(object):
21 |     """
22 |     get config
23 |     """
24 | 
25 |     def __init__(self):
26 |         pass
27 | 
28 |     @LazyProperty
29 |     def db_type(self):
30 |         return DATABASES.get("default", {}).get("TYPE", "SSDB")
31 | 
32 |     @LazyProperty
33 |     def db_name(self):
34 |         return DATABASES.get("default", {}).get("NAME", "proxy")
35 | 
36 |     @LazyProperty
37 |     def db_host(self):
38 |         return DATABASES.get("default", {}).get("HOST", "127.0.0.1")
39 | 
40 |     @LazyProperty
41 |     def db_port(self):
42 |         return DATABASES.get("default", {}).get("PORT", 8888)
43 | 
44 |     @LazyProperty
45 |     def db_password(self):
46 |         return DATABASES.get("default", {}).get("PASSWORD", "")
47 | 
48 |     @LazyProperty
49 |     def proxy_getter_functions(self):
50 |         return PROXY_GETTER
51 | 
52 |     @LazyProperty
53 |     def host_ip(self):
54 |         return SERVER_API.get("HOST", "127.0.0.1")
55 | 
56 |     @LazyProperty
57 |     def host_port(self):
58 |         return SERVER_API.get("PORT", 5010)
59 | 
60 | 
61 | config = ConfigGetter()
62 | 
63 | if __name__ == '__main__':
64 |     print(config.db_type)
65 |     print(config.db_name)
66 |     print(config.db_host)
67 |     print(config.db_port)
68 |     print(config.proxy_getter_functions)
69 |     print(config.host_ip)
70 |     print(config.host_port)
71 |     print(config.db_password)
72 | 


--------------------------------------------------------------------------------
/qicai/qicai_top50.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | # date: 2018年10月15日
 3 | 
 4 | import requests
 5 | from bs4 import BeautifulSoup
 6 | import urllib.request
 7 | import re
 8 | from  multiprocessing import Pool
 9 | import random, time
10 | 
11 | headers = {
12 |     'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
13 |     'Host': 'www.qcenglish.com',
14 |     'Referer': 'http://www.qcenglish.com/'
15 | }
16 | 
17 | url = 'http://www.qcenglish.com'
18 | host = 'http://www.qcenglish.com'
19 | 
20 | download_path = './'
21 | 
22 | def get_article(url):
23 |     req = requests.get(url,headers=headers)
24 |     req.encoding = req.apparent_encoding
25 |     soup = BeautifulSoup(req.text,'lxml')
26 |     try:
27 |         pdf_title = soup.select('#details > dl > dd')[0].get_text()
28 |         download_link = soup.select('#download > li > a')[-1].get('href')
29 |         print('书名：' + pdf_title)
30 |         print('下载链接：' + host + download_link)
31 |         download_url = host + download_link
32 |         download(download_url,pdf_title)
33 |     except IndexError as e:
34 |         print(e)
35 |         pass
36 | 
37 | def download(url, title):
38 |     file_path = download_path + title + '.zip'
39 |     urllib.request.urlretrieve(url, file_path)
40 |     print('下载完成.......\n')
41 |     print('延迟等待....Hold on!')
42 |     time.sleep(random(3,10))
43 | 
44 | 
45 | def get_list(url):
46 |     top_list = []
47 |     req = requests.get(url,headers=headers)
48 |     soup = BeautifulSoup(req.text,'lxml')
49 |     pdf_list = soup.select('#rectop2 > ul > li > a')
50 |     for p_list in pdf_list:
51 |         p_list = p_list.get('href')
52 |         top_list.append(p_list)
53 |     # print(top_list)
54 |     p = re.compile('_')
55 |     clear_list = [x for x in top_list if not p.findall(x)]
56 |     return clear_list
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     p = Pool()
61 |     top_list = get_list(url)
62 |     for article_url in top_list:
63 |         start_url = host + article_url
64 |         # get_article(start_url)
65 |         start = p.apply_async(get_article(start_url))
66 |     p.close()
67 |     p.join()
68 |     if start.successful():
69 |         print('Top50 下载完成！\n')


--------------------------------------------------------------------------------
/umei/app.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import sys, os
 3 | from bs4 import BeautifulSoup
 4 | from time import sleep
 5 | 
 6 | urls = ['https://www.umei.fun/categories/16?page={}'.format(str(i)) for i in range(1,63)]
 7 | cookie = 'your cookies'
 8 | 
 9 | headers = {
10 |     'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
11 |     'accept-encoding': 'gzip, deflate, br',
12 |     'accept-language': 'zh-CN,zh;q=0.9',
13 |     'cache-control': 'max-age=0',
14 |     'cookie': cookie,
15 |     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36'
16 | }
17 | 
18 | def respon(url):
19 |     response = requests.get(url,headers=headers)
20 |     status = response.status_code
21 |     if status == 200:
22 |         return response.text
23 |     else:
24 |         return None
25 | 
26 | def gerUrls(page):
27 |     if page == None:
28 |         print('None!')
29 |     else:
30 |         html = BeautifulSoup(page,'lxml')
31 |         urls = html.select('div.section-white > div > div > div > div > div > div > div > a')
32 |         for url in urls:
33 |             url = 'https://www.umei.fun' + url.get('href')
34 |             imgpage = respon(url)
35 |             getImg(imgpage)
36 | 
37 | def getImg(page):
38 |     html = BeautifulSoup(page,'lxml')
39 |     imgs = html.select('div.container > div > div > img')
40 |     title = html.select('h2')[0].get_text()
41 |     if imgs == []:
42 |         print('No img!')
43 |         pass
44 |     else:
45 |         for img in imgs:
46 |             img = img.get('src')
47 |             download(img,title)
48 |         print(str(title) + ' download succesful!')
49 | 
50 | 
51 | def download(url,title):
52 |     picPath = os.getcwd() + '\pic' + '\\' + str(title)
53 |     if not os.path.exists(picPath):
54 |         os.mkdir(picPath)
55 |     con = requests.get(url)
56 |     name = url[-8:]
57 |     with open(picPath + '\\' + str(name) + '.jpg','wb') as f:
58 |         f.write(con.content)
59 |         f.flush()
60 | 
61 | if __name__ == '__main__':
62 |     for url in urls:
63 |         print(url)
64 |         try:
65 |             res = respon(url)
66 |             imgUrls = gerUrls(res)
67 |             sleep(1)
68 |         except:
69 |             print('Error \n')
70 |             continue
71 | 


--------------------------------------------------------------------------------
/proxy_pool/DB/MongodbClient.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：    MongodbClient.py
 5 |    Description :  封装mongodb操作
 6 |    Author :       JHao netAir
 7 |    date：          2017/3/3
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/3/3:
11 |                    2017/9/26:完成对mongodb的支持
12 | -------------------------------------------------
13 | """
14 | __author__ = 'Maps netAir'
15 | 
16 | from pymongo import MongoClient
17 | 
18 | 
19 | class MongodbClient(object):
20 |     def __init__(self, name, host, port, **kwargs):
21 |         self.name = name
22 |         self.client = MongoClient(host, port, **kwargs)
23 |         self.db = self.client.proxy
24 | 
25 |     def changeTable(self, name):
26 |         self.name = name
27 | 
28 |     def get(self, proxy):
29 |         data = self.db[self.name].find_one({'proxy': proxy})
30 |         return data['num'] if data != None else None
31 | 
32 |     def put(self, proxy, num=1):
33 |         if self.db[self.name].find_one({'proxy': proxy}):
34 |             return None
35 |         else:
36 |             self.db[self.name].insert({'proxy': proxy, 'num': num})
37 | 
38 |     def pop(self):
39 |         data = list(self.db[self.name].aggregate([{'$sample': {'size': 1}}]))
40 |         if data:
41 |             data = data[0]
42 |             value = data['proxy']
43 |             self.delete(value)
44 |             return {'proxy': value, 'value': data['num']}
45 |         return None
46 | 
47 |     def delete(self, value):
48 |         self.db[self.name].remove({'proxy': value})
49 | 
50 |     def getAll(self):
51 |         return {p['proxy']: p['num'] for p in self.db[self.name].find()}
52 | 
53 |     def clean(self):
54 |         self.client.drop_database('proxy')
55 | 
56 |     def delete_all(self):
57 |         self.db[self.name].remove()
58 | 
59 |     def update(self, key, value):
60 |         self.db[self.name].update({'proxy': key}, {'$inc': {'num': value}})
61 | 
62 |     def exists(self, key):
63 |         return True if self.db[self.name].find_one({'proxy': key}) != None else False
64 | 
65 |     def getNumber(self):
66 |         return self.db[self.name].count()
67 | 
68 | 
69 | if __name__ == "__main__":
70 |     db = MongodbClient('first', 'localhost', 27017)
71 |     # db.put('127.0.0.1:1')
72 |     # db2 = MongodbClient('second', 'localhost', 27017)
73 |     # db2.put('127.0.0.1:2')
74 |     print(db.pop())
75 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyGetter/CheckProxy.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     CheckProxy
 5 |    Description :   used for check getFreeProxy.py
 6 |    Author :        JHao
 7 |    date：          2018/7/10
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2018/7/10: CheckProxy
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from getFreeProxy import GetFreeProxy
16 | from Util.utilFunction import verifyProxyFormat
17 | 
18 | 
19 | from Util.LogHandler import LogHandler
20 | 
21 | log = LogHandler('check_proxy', file=False)
22 | 
23 | 
24 | class CheckProxy(object):
25 | 
26 |     @staticmethod
27 |     def checkAllGetProxyFunc():
28 |         """
29 |         检查getFreeProxy所有代理获取函数运行情况
30 |         Returns:
31 |             None
32 |         """
33 |         import inspect
34 |         member_list = inspect.getmembers(GetFreeProxy, predicate=inspect.isfunction)
35 |         proxy_count_dict = dict()
36 |         for func_name, func in member_list:
37 |             log.info(u"开始运行 {}".format(func_name))
38 |             try:
39 |                 proxy_list = [_ for _ in func() if verifyProxyFormat(_)]
40 |                 proxy_count_dict[func_name] = len(proxy_list)
41 |             except Exception as e:
42 |                 log.info(u"代理获取函数 {} 运行出错!".format(func_name))
43 |                 log.error(str(e))
44 |         log.info(u"所有函数运行完毕 " + "***" * 5)
45 |         for func_name, func in member_list:
46 |             log.info(u"函数 {n}, 获取到代理数: {c}".format(n=func_name, c=proxy_count_dict.get(func_name, 0)))
47 | 
48 |     @staticmethod
49 |     def checkGetProxyFunc(func):
50 |         """
51 |         检查指定的getFreeProxy某个function运行情况
52 |         Args:
53 |             func: getFreeProxy中某个可调用方法
54 | 
55 |         Returns:
56 |             None
57 |         """
58 |         func_name = getattr(func, '__name__', "None")
59 |         log.info("start running func: {}".format(func_name))
60 |         count = 0
61 |         for proxy in func():
62 |             if verifyProxyFormat(proxy):
63 |                 log.info("{} fetch proxy: {}".format(func_name, proxy))
64 |                 count += 1
65 |         log.info("{n} completed, fetch proxy number: {c}".format(n=func_name, c=count))
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     CheckProxy.checkAllGetProxyFunc()
70 |     CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)


--------------------------------------------------------------------------------
/kuaishou/ks_video.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from random import randint
 3 | from bs4 import BeautifulSoup
 4 | from fake_useragent import UserAgent
 5 | from time import sleep
 6 | import os
 7 | UA = UserAgent()
 8 | 
 9 | headers = {
10 |     'Connection': 'close',
11 |     'User-Agent':UA.random
12 | }
13 | 
14 | video_path = './video/'
15 | 
16 | 
17 | def get_page(url):
18 |     '''
19 |     :return: response
20 |     '''
21 |     try:
22 |         req = requests.get(url,headers=headers)
23 |         req.raise_for_status()
24 |         req.close()
25 |         req.encoding = 'utf-8'
26 |         return req
27 |     except Exception as code:
28 |         print(code)
29 |         sleep(3)
30 | 
31 | def download(url):
32 |     video_name = url[-24:]
33 |     if os.path.exists(video_path+video_name) == True:
34 |         print(video_name + ' 视频已存在，跳过')
35 |         pass
36 |     else:
37 |         try:
38 |             req = requests.get(url,headers=headers)
39 |             req.raise_for_status()
40 |             req.close()
41 |             with open(video_path + video_name,'wb') as f:
42 |                 f.write(req.content)
43 |                 f.close()
44 |             print(str(video_name) + ' ~下载完成！')
45 |         except Exception as code:
46 |             print(code)
47 |             return None
48 | 
49 | 
50 | def parse_xiacoo(html):
51 |     # http://v.xiacoo.com
52 |     soup = BeautifulSoup(html.text, 'lxml')
53 |     video_src = soup.select('source')[0]
54 |     video_url = video_src.get('src').split('?', 1)
55 |     url = video_url[0]
56 |     return url
57 | 
58 | def parse_xjj(html):
59 |     # https://xjj.show/ks.php
60 |     soup = BeautifulSoup(html.text, 'lxml')
61 |     video_src = soup.find_all('video')[0]
62 |     video_url = video_src.get('src').split('?', 1)
63 |     url = video_url[0]
64 |     return url
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     print('1: v.xiacoo.com; 2: xjj.show;')
69 |     select = int(input('Please input your select:'))
70 |     if select == 1:
71 |         start_url = 'http://v.xiacoo.com'
72 |         print('start url: ' + str(start_url))
73 |     elif select == 2:
74 |         start_url = 'https://xjj.show/ks.php'
75 |         print('start url: ' + str(start_url))
76 |     else:
77 |         print('ERROR: check your input!')
78 |         exit()
79 |     while True:
80 |         try:
81 |             video_page = get_page(start_url)
82 |             if video_page == None:
83 |                 print('url is None!')
84 |                 video_page = get_page(start_url)
85 |             else:
86 |                 video_url = parse_xjj(video_page)
87 |                 download(video_url)
88 |             sleep(randint(1,3))
89 |         except TimeoutError as code:
90 |             print(code)


--------------------------------------------------------------------------------
/cmanuf/download.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pdfkit
 3 | from time import sleep
 4 | from bs4 import BeautifulSoup
 5 | 
 6 | '''
 7 | No.1 手痒撸的，单次只能下载一本，只能下载H5中内容自行合并PDF，不下载PDF。
 8 | No.2 token每小时都需要更新，获取方法自行网站中debug。
 9 | No.3 book ID not detail ID
10 | No.4 感谢机械工业出版社......
11 | 
12 | 
13 | '''
14 | 
15 | headers = {
16 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
17 |     'Accept-Encoding': 'gzip, deflate',
18 |     'Accept-Language': 'zh-CN,zh;q=0.9',
19 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
20 |     'Cookie': 'JSESSIONID=A6DF07780010F3F5D221497A3A345A8D',
21 |     'DNT': '1',
22 |     'Host': 'www.hzcourse.com',
23 |     'Origin': 'http://www.hzcourse.com',
24 |     'Proxy-Connection': 'keep-alive',
25 |     'Referer': 'http://www.hzcourse.com',
26 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36',
27 |     'X-Requested-With': 'XMLHttpRequest'
28 | }
29 | 
30 | url = 'http://www.hzcourse.com/web/refbook/queryAllChapterList'
31 | 
32 | path_wk = r'C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe'
33 | config = pdfkit.configuration(wkhtmltopdf=path_wk)
34 | options = {
35 |     'page-size': 'Letter',
36 |     'margin-top': '0.75in',
37 |     'margin-right': '0.75in',
38 |     'margin-bottom': '0.75in',
39 |     'margin-left': '0.75in',
40 |     'encoding': "UTF-8",
41 |     'no-outline': None
42 | }
43 | 
44 | def getUrls(url,data):
45 |     res = requests.post(url,data=data)
46 |     jsdata = res.json()
47 |     urls = []
48 |     data = jsdata['data']
49 |     for i in data['data']:
50 |         link = i['ref']
51 |         urls.append(link)
52 |     return urls
53 | 
54 | def download(links):
55 |     num = 1
56 |     for i in links:
57 |         xtm = requests.get(url = 'http://www.hzcourse.com/resource/readBook?path=' + str(i),headers=headers)
58 |         soup = BeautifulSoup(xtm.text,'lxml')
59 |         for img in soup.find_all('img'):
60 |             img['src'] = 'http://www.hzcourse.com/resource/readBook?path=/openresources/teach_ebook/uncompressed/18563/OEBPS/Text/' + img['src']
61 |         article = str(soup).encode('utf-8')
62 |         with open(str(num) + '.html','wb') as f:
63 |             f.write(article)
64 |             f.close()
65 |         try:
66 |             pdfkit.from_file(str(num) + '.html',str(num) + '.pdf',configuration=config,options=options)
67 |         except Exception as e:
68 |             print('Error for ' + str(e) + ',Page :' + str(num))
69 |         num += 1
70 |         sleep(1)
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     bookid = input("Please input bookid:")
75 |     postData = {
76 |         'ebookId': bookid,
77 |         'token': '5a1536002e3441d0af4c3d640d0b37e9'
78 |     }
79 |     links = getUrls(url,postData)
80 |     download(links)
81 | 


--------------------------------------------------------------------------------
/proxy_pool/Schedule/UsefulProxyCheck.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     UsefulProxyCheck
 5 |    Description :   check useful proxy
 6 |    Author :        JHao
 7 |    date：          2019/8/7
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/7: check useful proxy
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from threading import Thread
16 | 
17 | try:
18 |     from Queue import Queue, Empty  # py2
19 | except:
20 |     from queue import Queue, Empty  # py3
21 | 
22 | from Util import LogHandler
23 | from Manager import ProxyManager
24 | from ProxyHelper import checkProxyUseful, Proxy
25 | 
26 | FAIL_COUNT = 0
27 | 
28 | 
29 | class UsefulProxyCheck(ProxyManager, Thread):
30 |     def __init__(self, queue, thread_name):
31 |         ProxyManager.__init__(self)
32 |         Thread.__init__(self, name=thread_name)
33 | 
34 |         self.queue = queue
35 |         self.log = LogHandler('useful_proxy_check')
36 | 
37 |     def run(self):
38 |         self.log.info("UsefulProxyCheck - {}  : start".format(self.name))
39 |         self.db.changeTable(self.useful_proxy_queue)
40 |         while True:
41 |             try:
42 |                 proxy_str = self.queue.get(block=False)
43 |             except Empty:
44 |                 self.log.info("UsefulProxyCheck - {}  : exit".format(self.name))
45 |                 break
46 | 
47 |             proxy_obj = Proxy.newProxyFromJson(proxy_str)
48 |             proxy_obj, status = checkProxyUseful(proxy_obj)
49 |             if status or proxy_obj.fail_count < FAIL_COUNT:
50 |                 self.db.put(proxy_obj)
51 |                 self.log.info('UsefulProxyCheck - {}  : {} validation pass'.format(self.name,
52 |                                                                                    proxy_obj.proxy.ljust(20)))
53 |             else:
54 |                 self.log.info('UsefulProxyCheck - {}  : {} validation fail'.format(self.name,
55 |                                                                                    proxy_obj.proxy.ljust(20)))
56 |                 self.db.delete(proxy_obj.proxy)
57 |             self.queue.task_done()
58 | 
59 | 
60 | def doUsefulProxyCheck():
61 |     proxy_queue = Queue()
62 | 
63 |     pm = ProxyManager()
64 |     pm.db.changeTable(pm.useful_proxy_queue)
65 |     for _proxy in pm.db.getAll():
66 |         proxy_queue.put(_proxy)
67 | 
68 |     thread_list = list()
69 |     for index in range(10):
70 |         thread_list.append(UsefulProxyCheck(proxy_queue, "thread_%s" % index))
71 | 
72 |     for thread in thread_list:
73 |         thread.start()
74 | 
75 |     for thread in thread_list:
76 |         thread.join()
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     doUsefulProxyCheck()
81 | 


--------------------------------------------------------------------------------
/proxy_pool/Schedule/RawProxyCheck.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     RawProxyCheck
 5 |    Description :   check raw_proxy to useful
 6 |    Author :        JHao
 7 |    date：          2019/8/6
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/8/6: check raw_proxy to useful
11 | -------------------------------------------------
12 | """
13 | __author__ = 'JHao'
14 | 
15 | from threading import Thread
16 | 
17 | try:
18 |     from Queue import Empty, Queue  # py2
19 | except:
20 |     from queue import Empty, Queue  # py3
21 | 
22 | from Util import LogHandler
23 | from Manager import ProxyManager
24 | from ProxyHelper import Proxy, checkProxyUseful
25 | 
26 | 
27 | class RawProxyCheck(ProxyManager, Thread):
28 |     def __init__(self, queue, thread_name):
29 |         ProxyManager.__init__(self)
30 |         Thread.__init__(self, name=thread_name)
31 |         self.log = LogHandler('raw_proxy_check')
32 |         self.queue = queue
33 | 
34 |     def run(self):
35 |         self.log.info("RawProxyCheck - {}  : start".format(self.name))
36 |         self.db.changeTable(self.useful_proxy_queue)
37 |         while True:
38 |             try:
39 |                 proxy_json = self.queue.get(block=False)
40 |             except Empty:
41 |                 self.log.info("RawProxyCheck - {}  : exit".format(self.name))
42 |                 break
43 | 
44 |             proxy_obj = Proxy.newProxyFromJson(proxy_json)
45 | 
46 |             proxy_obj, status = checkProxyUseful(proxy_obj)
47 |             if status:
48 |                 if self.db.exists(proxy_obj.proxy):
49 |                     self.log.info('RawProxyCheck - {}  : {} validation exists'.format(self.name,
50 |                                                                                       proxy_obj.proxy.ljust(20)))
51 |                 else:
52 |                     self.db.put(proxy_obj)
53 |                     self.log.info(
54 |                         'RawProxyCheck - {}  : {} validation pass'.format(self.name, proxy_obj.proxy.ljust(20)))
55 |             else:
56 |                 self.log.info('RawProxyCheck - {}  : {} validation fail'.format(self.name, proxy_obj.proxy.ljust(20)))
57 |             self.queue.task_done()
58 | 
59 | 
60 | def doRawProxyCheck():
61 |     proxy_queue = Queue()
62 | 
63 |     pm = ProxyManager()
64 |     pm.db.changeTable(pm.raw_proxy_queue)
65 |     for _proxy in pm.db.getAll():
66 |         proxy_queue.put(_proxy)
67 |     pm.db.clear()
68 | 
69 |     thread_list = list()
70 |     for index in range(20):
71 |         thread_list.append(RawProxyCheck(proxy_queue, "thread_%s" % index))
72 | 
73 |     for thread in thread_list:
74 |         thread.start()
75 | 
76 |     for thread in thread_list:
77 |         thread.join()
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     doRawProxyCheck()
82 | 


--------------------------------------------------------------------------------
/proxy_pool/Util/utilFunction.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # !/usr/bin/env python
 3 | """
 4 | -------------------------------------------------
 5 |    File Name：     utilFunction.py
 6 |    Description :  tool function
 7 |    Author :       JHao
 8 |    date：          2016/11/25
 9 | -------------------------------------------------
10 |    Change Activity:
11 |                    2016/11/25: 添加robustCrawl、verifyProxy、getHtmlTree
12 | -------------------------------------------------
13 | """
14 | import requests
15 | from lxml import etree
16 | 
17 | from Util.WebRequest import WebRequest
18 | 
19 | 
20 | def robustCrawl(func):
21 |     def decorate(*args, **kwargs):
22 |         try:
23 |             return func(*args, **kwargs)
24 |         except Exception as e:
25 |             pass
26 |             # logger.info(u"sorry, 抓取出错。错误原因:")
27 |             # logger.info(e)
28 | 
29 |     return decorate
30 | 
31 | 
32 | def verifyProxyFormat(proxy):
33 |     """
34 |     检查代理格式
35 |     :param proxy:
36 |     :return:
37 |     """
38 |     import re
39 |     verify_regex = r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}"
40 |     _proxy = re.findall(verify_regex, proxy)
41 |     return True if len(_proxy) == 1 and _proxy[0] == proxy else False
42 | 
43 | 
44 | def getHtmlTree(url, **kwargs):
45 |     """
46 |     获取html树
47 |     :param url:
48 |     :param kwargs:
49 |     :return:
50 |     """
51 | 
52 |     header = {'Connection': 'keep-alive',
53 |               'Cache-Control': 'max-age=0',
54 |               'Upgrade-Insecure-Requests': '1',
55 |               'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko)',
56 |               'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
57 |               'Accept-Encoding': 'gzip, deflate, sdch',
58 |               'Accept-Language': 'zh-CN,zh;q=0.8',
59 |               }
60 |     # TODO 取代理服务器用代理服务器访问
61 |     wr = WebRequest()
62 |     html = wr.get(url=url, header=header).content
63 |     return etree.HTML(html)
64 | 
65 | 
66 | def tcpConnect(proxy):
67 |     """
68 |     TCP 三次握手
69 |     :param proxy:
70 |     :return:
71 |     """
72 |     from socket import socket, AF_INET, SOCK_STREAM
73 |     s = socket(AF_INET, SOCK_STREAM)
74 |     ip, port = proxy.split(':')
75 |     result = s.connect_ex((ip, int(port)))
76 |     return True if result == 0 else False
77 | 
78 | 
79 | def validUsefulProxy(proxy):
80 |     """
81 |     检验代理是否可用
82 |     :param proxy:
83 |     :return:
84 |     """
85 |     if isinstance(proxy, bytes):
86 |         proxy = proxy.decode("utf8")
87 |     proxies = {"http": "http://{proxy}".format(proxy=proxy)}
88 |     try:
89 |         r = requests.get('http://www.baidu.com', proxies=proxies, timeout=10, verify=False)
90 |         if r.status_code == 200:
91 |             return True
92 |     except Exception as e:
93 |         pass
94 |     return False
95 | 
96 | 


--------------------------------------------------------------------------------
/proxy_pool/Config/setting.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     setting.py
 5 |    Description :   配置文件
 6 |    Author :        JHao
 7 |    date：          2019/2/15
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2019/2/15:
11 | -------------------------------------------------
12 | """
13 | 
14 | import sys
15 | from os import getenv
16 | from logging import getLogger
17 | 
18 | log = getLogger(__name__)
19 | 
20 | HEADER = """
21 | ****************************************************************
22 | *** ______  ********************* ______ *********** _  ********
23 | *** | ___ \_ ******************** | ___ \ ********* | | ********
24 | *** | |_/ / \__ __   __  _ __   _ | |_/ /___ * ___  | | ********
25 | *** |  __/|  _// _ \ \ \/ /| | | ||  __// _ \ / _ \ | | ********
26 | *** | |   | | | (_) | >  < \ |_| || |  | (_) | (_) || |___  ****
27 | *** \_|   |_|  \___/ /_/\_\ \__  |\_|   \___/ \___/ \_____/ ****
28 | ****                       __ / /                          *****
29 | ************************* /___ / *******************************
30 | *************************       ********************************
31 | ****************************************************************
32 | """
33 | 
34 | PY3 = sys.version_info >= (3,)
35 | 
36 | DB_TYPE = getenv('db_type', 'SSDB').upper()
37 | DB_HOST = getenv('db_host', '127.0.0.1')
38 | DB_PORT = getenv('db_port', 8888)
39 | DB_PASSWORD = getenv('db_password', '')
40 | 
41 | 
42 | """ 数据库配置 """
43 | DATABASES = {
44 |     "default": {
45 |         "TYPE": DB_TYPE,
46 |         "HOST": DB_HOST,
47 |         "PORT": DB_PORT,
48 |         "NAME": "proxy",
49 |         "PASSWORD": DB_PASSWORD
50 |     }
51 | }
52 | 
53 | # register the proxy getter function
54 | 
55 | PROXY_GETTER = [
56 |     "freeProxy01",
57 |     # "freeProxy02",
58 |     "freeProxy03",
59 |     "freeProxy04",
60 |     "freeProxy05",
61 |     # "freeProxy06",
62 |     "freeProxy07",
63 |     # "freeProxy08",
64 |     "freeProxy09",
65 |     "freeProxy13",
66 |     "freeProxy14",
67 |     "freeProxy14",
68 | ]
69 | 
70 | """ API config http://127.0.0.1:5010 """
71 | SERVER_API = {
72 |     "HOST": "0.0.0.0",  # The ip specified which starting the web API
73 |     "PORT": 5010  # port number to which the server listens to
74 | }
75 | 
76 | 
77 | class ConfigError(BaseException):
78 |     pass
79 | 
80 | 
81 | def checkConfig():
82 |     if DB_TYPE not in ["SSDB", "REDIS"]:
83 |         raise ConfigError('db_type Do not support: %s, must SSDB/REDIS .' % DB_TYPE)
84 | 
85 |     if type(DB_PORT) == str and not DB_PORT.isdigit():
86 |         raise ConfigError('if db_port is string, it must be digit, not %s' % DB_PORT)
87 | 
88 |     from ProxyGetter import getFreeProxy
89 |     illegal_getter = list(filter(lambda key: not hasattr(getFreeProxy.GetFreeProxy, key), PROXY_GETTER))
90 |     if len(illegal_getter) > 0:
91 |         raise ConfigError("ProxyGetter: %s does not exists" % "/".join(illegal_getter))
92 | 
93 | 
94 | checkConfig()
95 | 


--------------------------------------------------------------------------------
/proxy_pool/Util/LogHandler.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | -------------------------------------------------
  4 |    File Name：     LogHandler.py
  5 |    Description :  日志操作模块
  6 |    Author :       JHao
  7 |    date：          2017/3/6
  8 | -------------------------------------------------
  9 |    Change Activity:
 10 |                    2017/3/6: log handler
 11 |                    2017/9/21: 屏幕输出/文件输出 可选(默认屏幕和文件均输出)
 12 | -------------------------------------------------
 13 | """
 14 | __author__ = 'JHao'
 15 | 
 16 | import os
 17 | 
 18 | import logging
 19 | 
 20 | from logging.handlers import TimedRotatingFileHandler
 21 | 
 22 | # 日志级别
 23 | CRITICAL = 50
 24 | FATAL = CRITICAL
 25 | ERROR = 40
 26 | WARNING = 30
 27 | WARN = WARNING
 28 | INFO = 20
 29 | DEBUG = 10
 30 | NOTSET = 0
 31 | 
 32 | CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
 33 | ROOT_PATH = os.path.join(CURRENT_PATH, os.pardir)
 34 | LOG_PATH = os.path.join(ROOT_PATH, 'log')
 35 | 
 36 | if not os.path.exists(LOG_PATH):
 37 |     os.mkdir(LOG_PATH)
 38 | 
 39 | 
 40 | class LogHandler(logging.Logger):
 41 |     """
 42 |     LogHandler
 43 |     """
 44 | 
 45 |     def __init__(self, name, level=DEBUG, stream=True, file=True):
 46 |         self.name = name
 47 |         self.level = level
 48 |         logging.Logger.__init__(self, self.name, level=level)
 49 |         if stream:
 50 |             self.__setStreamHandler__()
 51 |         if file:
 52 |             self.__setFileHandler__()
 53 | 
 54 |     def __setFileHandler__(self, level=None):
 55 |         """
 56 |         set file handler
 57 |         :param level:
 58 |         :return:
 59 |         """
 60 |         file_name = os.path.join(LOG_PATH, '{name}.log'.format(name=self.name))
 61 |         # 设置日志回滚, 保存在log目录, 一天保存一个文件, 保留15天
 62 |         file_handler = TimedRotatingFileHandler(filename=file_name, when='D', interval=1, backupCount=15)
 63 |         file_handler.suffix = '%Y%m%d.log'
 64 |         if not level:
 65 |             file_handler.setLevel(self.level)
 66 |         else:
 67 |             file_handler.setLevel(level)
 68 |         formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
 69 | 
 70 |         file_handler.setFormatter(formatter)
 71 |         self.file_handler = file_handler
 72 |         self.addHandler(file_handler)
 73 | 
 74 |     def __setStreamHandler__(self, level=None):
 75 |         """
 76 |         set stream handler
 77 |         :param level:
 78 |         :return:
 79 |         """
 80 |         stream_handler = logging.StreamHandler()
 81 |         formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s')
 82 |         stream_handler.setFormatter(formatter)
 83 |         if not level:
 84 |             stream_handler.setLevel(self.level)
 85 |         else:
 86 |             stream_handler.setLevel(level)
 87 |         self.addHandler(stream_handler)
 88 | 
 89 |     def resetName(self, name):
 90 |         """
 91 |         reset name
 92 |         :param name:
 93 |         :return:
 94 |         """
 95 |         self.name = name
 96 |         self.removeHandler(self.file_handler)
 97 |         self.__setFileHandler__()
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     log = LogHandler('test')
102 |     log.info('this is a test msg')
103 | 


--------------------------------------------------------------------------------
/proxy_pool/Util/WebRequest.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | -------------------------------------------------
 4 |    File Name：     WebRequest
 5 |    Description :   Network Requests Class
 6 |    Author :        J_hao
 7 |    date：          2017/7/31
 8 | -------------------------------------------------
 9 |    Change Activity:
10 |                    2017/7/31:
11 | -------------------------------------------------
12 | """
13 | __author__ = 'J_hao'
14 | 
15 | from requests.models import Response
16 | import requests
17 | import random
18 | import time
19 | 
20 | 
21 | class WebRequest(object):
22 |     def __init__(self, *args, **kwargs):
23 |         pass
24 | 
25 |     @property
26 |     def user_agent(self):
27 |         """
28 |         return an User-Agent at random
29 |         :return:
30 |         """
31 |         ua_list = [
32 |             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101',
33 |             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.122',
34 |             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71',
35 |             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95',
36 |             'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71',
37 |             'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 732; .NET4.0C; .NET4.0E)',
38 |             'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50',
39 |             'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
40 |         ]
41 |         return random.choice(ua_list)
42 | 
43 |     @property
44 |     def header(self):
45 |         """
46 |         basic header
47 |         :return:
48 |         """
49 |         return {'User-Agent': self.user_agent,
50 |                 'Accept': '*/*',
51 |                 'Connection': 'keep-alive',
52 |                 'Accept-Language': 'zh-CN,zh;q=0.8'}
53 | 
54 |     def get(self, url, header=None, retry_time=5, timeout=30,
55 |             retry_flag=list(), retry_interval=5, *args, **kwargs):
56 |         """
57 |         get method
58 |         :param url: target url
59 |         :param header: headers
60 |         :param retry_time: retry time when network error
61 |         :param timeout: network timeout
62 |         :param retry_flag: if retry_flag in content. do retry
63 |         :param retry_interval: retry interval(second)
64 |         :param args:
65 |         :param kwargs:
66 |         :return:
67 |         """
68 |         headers = self.header
69 |         if header and isinstance(header, dict):
70 |             headers.update(header)
71 |         while True:
72 |             try:
73 |                 html = requests.get(url, headers=headers, timeout=timeout, **kwargs)
74 |                 if any(f in html.content for f in retry_flag):
75 |                     raise Exception
76 |                 return html
77 |             except Exception as e:
78 |                 print(e)
79 |                 retry_time -= 1
80 |                 if retry_time <= 0:
81 |                     # 多次请求失败
82 |                     resp = Response()
83 |                     resp.status_code = 200
84 |                     return resp
85 |                 time.sleep(retry_interval)
86 | 


--------------------------------------------------------------------------------
/cableav.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from bs4 import BeautifulSoup
  3 | from fake_useragent import UserAgent
  4 | import re
  5 | from datetime import datetime
  6 | from time import sleep
  7 | from random import randint
  8 | 
  9 | FILE_PATH = './'
 10 | 
 11 | host = 'https://www.cableav.tv/'
 12 | 
 13 | proxies = {
 14 |   'http': 'http://127.0.0.1:7890',
 15 |   'https': 'http://127.0.0.1:7890'
 16 | }
 17 | ua = UserAgent()
 18 | headers = {
 19 |   "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
 20 |   "accept-encoding": "gzip, deflate, br",
 21 |   "accept-language": "zh-CN,zh;q=0.9",
 22 |   "cache-control": "max-age=0",
 23 |   "dnt":"1",
 24 |   "referer":"https://cableav.tv/playlist/",
 25 |   "user-agent": ua.random
 26 | }
 27 | 
 28 | def open_page(url):
 29 | 
 30 |   sleep(randint(1,3))
 31 |   print('\n{} - [INFO]: requests at {}'.format(
 32 |     datetime.now().strftime("%Y-%m-%d %H:%M:%S"),url))
 33 | 
 34 |   req = requests.get(url,headers=headers,proxies=proxies)
 35 |   try:
 36 |     if req.status_code == 200 or req.status_code == 304:
 37 |       req.encoding = 'utf-8'
 38 |       return req
 39 |   except TimeoutError:
 40 |     print("Timeout:")
 41 |     cnt = 0
 42 |     while cnt < 3:
 43 |       open_page(url)
 44 |       cnt += 1
 45 | 
 46 | def parse_playlist(html):
 47 | 
 48 |   if html != None:
 49 |     page = BeautifulSoup(html.text,'lxml')
 50 |     video_urls = page.select('div.listing-content > h3 > a')
 51 |     for i in video_urls:
 52 |       data = i.get('href')
 53 |       yield data
 54 |   else:
 55 |     print("Result is None! \n")
 56 |     pass
 57 | 
 58 | def parse_video(html):
 59 |   PATTERN_URL = r'.*\"single_media_sources\":(\[\{.*\}\])'
 60 |   if html != None:
 61 |     page = BeautifulSoup(html.text,'lxml')
 62 |     m3u8 = page.find("meta", {"property": "og:video:url"})["content"]
 63 |     video_tags = page.find_all("meta", {"property": "video:tag"})
 64 |     best_quality = max([int(tag["content"][: -1]) for tag in video_tags])
 65 |     title = page.find("title").text.replace(' - CableAV','')
 66 | 
 67 |     for line in html.text.split('\n'):
 68 |       match = re.match(PATTERN_URL, line)
 69 |       if match:
 70 |         quality_lists = eval(match.group(1))
 71 |         for quality in quality_lists:
 72 |           if str(best_quality) in quality['source_label']:
 73 |             m3u8 = quality['source_file'].replace('\/', '/')
 74 |             break
 75 |   # return [title,m3u8]
 76 |     save_file(title,m3u8)
 77 | 
 78 | 
 79 | def save_file(title,m3u8):
 80 |   try:
 81 |     with open(FILE_PATH + 'test.txt','ab+') as f:
 82 |       result = '{},{}\r\n'.format(title,m3u8)
 83 |       f.write(result.encode('utf-8'))
 84 |     f.close()
 85 |   except IOError as e:
 86 |     print(e)
 87 |     pass
 88 | 
 89 | def run(url):
 90 |   page = open_page(url)
 91 |   play_list = parse_playlist(page)
 92 |   for i in play_list:
 93 |     video_page = open_page(i)
 94 |     parse_video(video_page)
 95 | 
 96 | if __name__ == '__main__':
 97 |   while True:
 98 |     start_url = input("Input page URL: \n")
 99 |     page_num = int(input('Input page list num：\n'))
100 |     if page_num <= 1:
101 |       run(start_url)
102 |     else:
103 |       urls = [start_url + "page/" + "{}/".format(x) for x in range(2,page_num+1)]
104 |       run(start_url)
105 |       for url in urls:
106 |         run(url)
107 | 


--------------------------------------------------------------------------------
/baiduMap/baiduMap.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | import requests
  3 | import pandas as pd
  4 | from time import sleep
  5 | 
  6 | ak=''
  7 | # KeyWord=u'早教'
  8 | # City=u'北京市'
  9 | # Tag=u'教育培训'
 10 | # Page=0
 11 | 
 12 | def getJson(url):
 13 |     response = requests.get(url)
 14 |     status = response.status_code
 15 |     data = response.json()
 16 |     if status == 200:
 17 |         return data
 18 |     else:
 19 |         num = 1
 20 |         while num < 4:
 21 |             print('连接错误！尝试重新获取！ 当前获取次数:' + str(num))
 22 |             num += 1
 23 |             sleep(3)
 24 |             print(url)
 25 |             getJson(url)
 26 |         print('Error!')
 27 |         return None
 28 | 
 29 | def getNum(data):
 30 |     # num = 0
 31 |     if data == None:
 32 |         return None
 33 |     else:
 34 |         total = data['total']
 35 |         # results = data['results']
 36 |         if total > 20:
 37 |             # pageNum = total // 20
 38 |             pageNum = int((total + 20 - 1) / 20)
 39 |             print('共检索到' + str(total) + '数据,共计：' + str(pageNum) + '页!')
 40 |             return pageNum
 41 |         else:
 42 |             pageNum = 1
 43 |             return pageNum
 44 | 
 45 | def parseData(data):
 46 |     if data == None:
 47 |         print('data is None！')
 48 |     else:
 49 |         datalist = []
 50 |         results = data['results']
 51 |         for i in results:
 52 |             name = i['name']
 53 |             add = i['address']
 54 |             detail = i['detail_info']
 55 |             mapUrl = detail['detail_url']
 56 |             if i.__contains__('telephone') == True:
 57 |                 tel = i['telephone']
 58 |             else:
 59 |                 tel = None
 60 | 
 61 |             tempData = {
 62 |                 'name': name,
 63 |                 'address': add,
 64 |                 'tel': str(tel),
 65 |                 'mapUrl': str(mapUrl)
 66 |             }
 67 |             datalist.append(tempData)
 68 |         return datalist
 69 | 
 70 | if __name__ == '__main__':
 71 |     headers = ['name','address','tel','map']
 72 |     KeyWord = input('输入检索关键词： \n')
 73 |     Tag = input('输入分类标签： \n')
 74 |     City = input('检索城市（市）： \n')
 75 |     startUrl = 'http://api.map.baidu.com/place/v2/search?query=' + KeyWord + \
 76 |                 '&tag=' + Tag + \
 77 |                 '&region=' + City + \
 78 |                 '&output=json' + \
 79 |                 '&ak=' + ak + \
 80 |                 '&scope=2&page_size=20' + \
 81 |                 '& page_num=0'
 82 |     json = getJson(startUrl)
 83 |     pageNum = getNum(json)
 84 |     if pageNum == None:
 85 |         print('No page number!')
 86 |     else:
 87 |         for num in range(0,int(pageNum)):
 88 |             url = 'http://api.map.baidu.com/place/v2/search?query=' + KeyWord + \
 89 |                 '&tag=' + Tag + \
 90 |                 '&region=' + City + \
 91 |                 '&output=json' + \
 92 |                 '&ak=' + ak + \
 93 |                 '&scope=2&page_size=20' + \
 94 |                 '& page_num=' + str(num)
 95 |             print('page is ：' + str(num))
 96 |             data = getJson(url)
 97 |             sleep(5)
 98 |             datalist = parseData(data)
 99 |             save = pd.DataFrame(datalist)
100 |             try:
101 |                 save.to_csv('./result.csv',header=headers,index=False,mode='a+',encoding='utf_8_sig')
102 |             except UnicodeEncodeError:
103 |                 print('编码错误！')
104 | 


--------------------------------------------------------------------------------
/qicai/QicaiCategoriesSpider.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from bs4 import BeautifulSoup
  3 | import urllib.request
  4 | import pymongo
  5 | from multiprocessing import Pool
  6 | 
  7 | mongo_client = pymongo.MongoClient('localhost',27017)
  8 | db = mongo_client['spider_db']
  9 | qcdb = db.client['qcdb']
 10 | 
 11 | 
 12 | host = 'http://www.qcenglish.com'
 13 | 
 14 | headers = {
 15 |     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
 16 |     'Host': 'www.qcenglish.com',
 17 |     'Referer': host
 18 | }
 19 | 
 20 | download_path = './tmp//'
 21 | 
 22 | def get_article(url):
 23 |     req = requests.get(url,headers=headers)
 24 |     req.encoding = req.apparent_encoding
 25 |     soup = BeautifulSoup(req.text,'lxml')
 26 |     try:
 27 |         pdf_title = soup.select('#details > dl > dd')[0].get_text()
 28 |         download_link = soup.select('#download > li > a')[0].get('href')
 29 |         print('书名：' + pdf_title)
 30 |         print('下载链接：' + host + download_link)
 31 |         download_url = host + download_link
 32 |         download(download_url,pdf_title)
 33 |     except IndexError as e:
 34 |         print(e)
 35 |         pass
 36 | 
 37 | def download(url, title):
 38 |     file_path = download_path + title + '.zip'
 39 |     conunter = 1
 40 |     try:
 41 |         urllib.request.urlretrieve(url, file_path)
 42 |     except urllib.error.URLError as e:
 43 |         while conunter <= 3:
 44 |             print("尝试重连，当前次数：" + str(conunter))
 45 |             download(url,title)
 46 |             conunter += 1
 47 |         pass
 48 |     print('下载完成.......')
 49 | 
 50 | def get_item_url(url):
 51 |     print('当前URL： ' + url)
 52 |     wb_date = requests.get(url,headers=headers)
 53 |     wb_date.encoding = wb_date.apparent_encoding
 54 |     soup = BeautifulSoup(wb_date.text,'lxml')
 55 |     items = soup.select('#container > div.content > dl.listitem > a')
 56 |     for item in items:
 57 |         # item = item.get('href')
 58 |         data = {
 59 |             'item_url': host + item.get('href'),
 60 |             'status': 0
 61 |         }
 62 |         print(data)
 63 |         qcdb.url.insert(data)
 64 |     print('当前列表页爬取完成！\n')
 65 | 
 66 | def url_generator(page_id,page_sum):
 67 |     page_sum = page_sum + 1
 68 |     for y in range(1,page_sum):
 69 |         url = 'http://www.qcenglish.com/ebook/list_' +  str(page_id)  + '_{}.html'.format(str(y))
 70 |         get_item_url(url)
 71 |         print('文章页获取ing....')
 72 | 
 73 | 
 74 | # url_generator(54,12)
 75 | 
 76 | 
 77 | if __name__ == '__main__':
 78 |     p = Pool()
 79 |     for item in qcdb.url.find():
 80 |         item_status = item.get('status')
 81 |         item_url = item.get('item_url')
 82 |         if item_status == 0:
 83 |             print('当前内容页：' + item_url)
 84 |             try:
 85 |                 p.apply_async(get_article(item_url))
 86 |                 qcdb.url.update({'item_url':item_url},{"$set":{"item_url":item_url,"status":1}},multi=False)
 87 |             except:
 88 |                 print('发现一个玄学问题!')
 89 |                 bad_url = {
 90 |                     'badURL': item_url,
 91 |                     'status': 0
 92 |                 }
 93 |                 qcdb.badurl.insert(bad_url)
 94 |                 print('已加入BadURL中，请注意查看！')
 95 |                 pass
 96 |         else:
 97 |             print('已经爬取过了····')
 98 |     print("等待新的线程加入！")
 99 |     p.close()
100 |     p.join()
101 |     print('完成！\n')


--------------------------------------------------------------------------------
/proxy_pool/DB/DbClient.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | """
  4 | -------------------------------------------------
  5 |    File Name：    DbClient.py
  6 |    Description :  DB工厂类
  7 |    Author :       JHao
  8 |    date：          2016/12/2
  9 | -------------------------------------------------
 10 |    Change Activity:
 11 |                    2016/12/2:
 12 | -------------------------------------------------
 13 | """
 14 | __author__ = 'JHao'
 15 | 
 16 | import os
 17 | import sys
 18 | 
 19 | from Config.ConfigGetter import config
 20 | from Util import Singleton
 21 | 
 22 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 23 | 
 24 | 
 25 | class DbClient(object):
 26 |     """
 27 |     DbClient DB工厂类 提供get/put/update/pop/delete/exists/getAll/clean/getNumber/changeTable方法
 28 | 
 29 |     目前存放代理的有两种, 使用changeTable方法切换操作对象：
 30 |         raw_proxy： 存放原始的代理；
 31 |         useful_proxy： 存放检验后的代理；
 32 | 
 33 | 
 34 |     抽象方法定义：
 35 |         get(proxy): 返回指定proxy的信息;
 36 |         put(proxy): 存入一个proxy信息;
 37 |         pop(): 返回并删除一个proxy信息;
 38 |         update(proxy): 更新指定proxy信息;
 39 |         delete(proxy): 删除指定proxy;
 40 |         exists(proxy): 判断指定proxy是否存在;
 41 |         getAll(): 列表形式返回所有代理;
 42 |         clean(): 清除所有proxy信息;
 43 |         getNumber(): 返回proxy数据量;
 44 |         changeTable(name): 切换操作对象 raw_proxy/useful_proxy
 45 | 
 46 | 
 47 |         所有方法需要相应类去具体实现：
 48 |             ssdb: SsdbClient.py
 49 |             redis: RedisClient.py
 50 |             mongodb: MongodbClient.py
 51 | 
 52 |     """
 53 | 
 54 |     __metaclass__ = Singleton
 55 | 
 56 |     def __init__(self):
 57 |         """
 58 |         init
 59 |         :return:
 60 |         """
 61 |         self.__initDbClient()
 62 | 
 63 |     def __initDbClient(self):
 64 |         """
 65 |         init DB Client
 66 |         :return:
 67 |         """
 68 |         __type = None
 69 |         if "SSDB" == config.db_type:
 70 |             __type = "SsdbClient"
 71 |         elif "REDIS" == config.db_type:
 72 |             __type = "RedisClient"
 73 |         elif "MONGODB" == config.db_type:
 74 |             __type = "MongodbClient"
 75 |         else:
 76 |             pass
 77 |         assert __type, 'type error, Not support DB type: {}'.format(config.db_type)
 78 |         self.client = getattr(__import__(__type), __type)(name=config.db_name,
 79 |                                                           host=config.db_host,
 80 |                                                           port=config.db_port,
 81 |                                                           password=config.db_password)
 82 | 
 83 |     def get(self, key, **kwargs):
 84 |         return self.client.get(key, **kwargs)
 85 | 
 86 |     def put(self, key, **kwargs):
 87 |         return self.client.put(key, **kwargs)
 88 | 
 89 |     def update(self, key, value, **kwargs):
 90 |         return self.client.update(key, value, **kwargs)
 91 | 
 92 |     def delete(self, key, **kwargs):
 93 |         return self.client.delete(key, **kwargs)
 94 | 
 95 |     def exists(self, key, **kwargs):
 96 |         return self.client.exists(key, **kwargs)
 97 | 
 98 |     def pop(self, **kwargs):
 99 |         return self.client.pop(**kwargs)
100 | 
101 |     def getAll(self):
102 |         return self.client.getAll()
103 | 
104 |     def clear(self):
105 |         return self.client.clear()
106 | 
107 |     def changeTable(self, name):
108 |         self.client.changeTable(name)
109 | 
110 |     def getNumber(self):
111 |         return self.client.getNumber()
112 | 


--------------------------------------------------------------------------------
/proxy_pool/Api/ProxyApi.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | """
  4 | -------------------------------------------------
  5 |    File Name：     ProxyApi.py
  6 |    Description :   WebApi
  7 |    Author :       JHao
  8 |    date：          2016/12/4
  9 | -------------------------------------------------
 10 |    Change Activity:
 11 |                    2016/12/04: WebApi
 12 |                    2019/08/14: 集成Gunicorn启动方式
 13 | -------------------------------------------------
 14 | """
 15 | __author__ = 'JHao'
 16 | 
 17 | import sys
 18 | import platform
 19 | from werkzeug.wrappers import Response
 20 | from flask import Flask, jsonify, request
 21 | 
 22 | sys.path.append('../')
 23 | 
 24 | from Config.ConfigGetter import config
 25 | from Manager.ProxyManager import ProxyManager
 26 | 
 27 | app = Flask(__name__)
 28 | 
 29 | 
 30 | class JsonResponse(Response):
 31 |     @classmethod
 32 |     def force_type(cls, response, environ=None):
 33 |         if isinstance(response, (dict, list)):
 34 |             response = jsonify(response)
 35 | 
 36 |         return super(JsonResponse, cls).force_type(response, environ)
 37 | 
 38 | 
 39 | app.response_class = JsonResponse
 40 | 
 41 | api_list = {
 42 |     'get': u'get an useful proxy',
 43 |     # 'refresh': u'refresh proxy pool',
 44 |     'get_all': u'get all proxy from proxy pool',
 45 |     'delete?proxy=127.0.0.1:8080': u'delete an unable proxy',
 46 |     'get_status': u'proxy number'
 47 | }
 48 | 
 49 | 
 50 | @app.route('/')
 51 | def index():
 52 |     return api_list
 53 | 
 54 | 
 55 | @app.route('/get/')
 56 | def get():
 57 |     proxy = ProxyManager().get()
 58 |     return proxy.info_json if proxy else {"code": 0, "src": "no proxy"}
 59 | 
 60 | 
 61 | @app.route('/refresh/')
 62 | def refresh():
 63 |     # TODO refresh会有守护程序定时执行，由api直接调用性能较差，暂不使用
 64 |     # ProxyManager().refresh()
 65 |     pass
 66 |     return 'success'
 67 | 
 68 | 
 69 | @app.route('/get_all/')
 70 | def getAll():
 71 |     proxies = ProxyManager().getAll()
 72 |     return jsonify([_.info_dict for _ in proxies])
 73 | 
 74 | 
 75 | @app.route('/delete/', methods=['GET'])
 76 | def delete():
 77 |     proxy = request.args.get('proxy')
 78 |     ProxyManager().delete(proxy)
 79 |     return {"code": 0, "src": "success"}
 80 | 
 81 | 
 82 | @app.route('/get_status/')
 83 | def getStatus():
 84 |     status = ProxyManager().getNumber()
 85 |     return status
 86 | 
 87 | 
 88 | if platform.system() != "Windows":
 89 |     import gunicorn.app.base
 90 |     from six import iteritems
 91 | 
 92 | 
 93 |     class StandaloneApplication(gunicorn.app.base.BaseApplication):
 94 | 
 95 |         def __init__(self, app, options=None):
 96 |             self.options = options or {}
 97 |             self.application = app
 98 |             super(StandaloneApplication, self).__init__()
 99 | 
100 |         def load_config(self):
101 |             _config = dict([(key, value) for key, value in iteritems(self.options)
102 |                             if key in self.cfg.settings and value is not None])
103 |             for key, value in iteritems(_config):
104 |                 self.cfg.set(key.lower(), value)
105 | 
106 |         def load(self):
107 |             return self.application
108 | 
109 | 
110 | def runFlask():
111 |     app.run(host=config.host_ip, port=config.host_port)
112 | 
113 | 
114 | def runFlaskWithGunicorn():
115 |     _options = {
116 |         'bind': '%s:%s' % (config.host_ip, config.host_port),
117 |         'workers': 4,
118 |         'accesslog': '-',  # log to stdout
119 |         'access_log_format': '%(h)s %(l)s %(t)s "%(r)s" %(s)s "%(a)s"'
120 |     }
121 |     StandaloneApplication(app, _options).run()
122 | 
123 | 
124 | if __name__ == '__main__':
125 |     if platform.system() == "Windows":
126 |         runFlask()
127 |     else:
128 |         runFlaskWithGunicorn()
129 | 


--------------------------------------------------------------------------------
/yasee1/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import requests
  4 | 
  5 | host = 'https://1.yasee1.com/'
  6 | 
  7 | def getVideoId():
  8 |     videoId = int(input("Input Video ID: "))
  9 |     # videoUrl = host + str("video-") + str(videoId)
 10 |     return str(videoId)
 11 | 
 12 | def getXHR():
 13 |     videoId = getVideoId()
 14 |     headers = {
 15 |         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
 16 |         "X-Requested-With": "XMLHttpRequest",
 17 |         "Referer": str(host) + "video-" + str(videoId)
 18 |     }
 19 |     videoUrl = str(host) + "index/req/getPlayerDomain?id=" + videoId
 20 |     response = requests.get(videoUrl,headers=headers)
 21 |     res_status = response.status_code
 22 |     if res_status == 200:
 23 |         response = response.json()
 24 |         # print(response)
 25 |         return response
 26 |     else:
 27 |         return None
 28 | 
 29 | def parseXHR():
 30 |     XHR = getXHR()
 31 |     code = XHR.get("code")
 32 |     if code == -2:
 33 |         data = XHR.get("info")
 34 |         down_url = data.get("down_url")
 35 |         video_hls = data.get("video_hls")
 36 |         data = {
 37 |             "down_url" : down_url,
 38 |             "video_hls" : video_hls
 39 |         }
 40 |         return data
 41 |     else:
 42 |         print('Error! \n')
 43 |         return None
 44 | 
 45 | 
 46 | def m3u8(data):
 47 |     down_url = data.get("down_url")
 48 |     video_hls = data.get("video_hls")
 49 |     hlsUrl = video_hls.split('/',3)
 50 | 
 51 |     if hlsUrl[2] == '[domain_dan]':
 52 |         video_hls = video_hls.replace("[domain_dan]","hone.yyhdyl.com")
 53 |     elif hlsUrl[2] == '[domain_fourth]':
 54 |         video_hls = video_hls.replace("[domain_fourth]","head2.yyhdyl.com")
 55 |     elif hlsUrl[2] == '[domain_shuang]':
 56 |         video_hls = video_hls.replace("[domain_shuang]","htwo.yyhdyl.com")
 57 |     elif hlsUrl[2] == '[domain_three]':
 58 |         video_hls = video_hls.replace("[domain_three]","head.yyhdyl.com")
 59 |     else:
 60 |         video_hls = None
 61 | 
 62 |     if down_url == None:
 63 |         return video_hls
 64 |     else:
 65 |         quality = down_url[-9:-5]
 66 |         if quality == str("720p"):
 67 |             video_hls = video_hls.replace("hls.m3u8","hls-720p.m3u8")
 68 |         elif quality == str("480p"):
 69 |             video_hls = video_hls.replace("hls.m3u8","hls-480p.m3u8")
 70 |         elif quality == str("360p"):
 71 |             video_hls = video_hls.replace("hls.m3u8","hls-360p.m3u8")
 72 |         elif quality == str("240p"):
 73 |             video_hls = video_hls.replace("hls.m3u8","hls-240p.m3u8")
 74 |         else:
 75 |             video_hls = None
 76 |     return video_hls
 77 | 
 78 | def download(url,filename):
 79 |     header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36"}
 80 |     downloadPath = os.getcwd() + '\Temp'
 81 |     if not os.path.exists(downloadPath):
 82 |         os.mkdir(downloadPath)
 83 |     content = requests.get(url,headers=header).text
 84 |     num = 0
 85 |     tempVideo = os.path.join(downloadPath,f'{filename}.ts')
 86 |     fileLine = content.split('\n')
 87 |     for line in fileLine:
 88 |         if line[-4:] == ".jpg":
 89 |             tsUrl = url.rsplit('/',1)[0] + "/" + line
 90 |             # res = requests.get(tsUrl)
 91 |             # with open(downloadPath + "\\" + str(num) + ".ts",'wb') as f:
 92 |             #     f.write(res.content)
 93 |             #     f.flush()
 94 |             print(tsUrl)
 95 |             num += 1
 96 |     print('Download Successful!')
 97 | 
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     while True:
102 |         XHR = parseXHR()
103 |         if XHR == None:
104 |             print('Error!\n')
105 |         else:
106 |             m3u8_url = m3u8(XHR)
107 |             print(m3u8_url)
108 | 


--------------------------------------------------------------------------------
/tuao8/crawler.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import requests
  3 | import os
  4 | import time
  5 | import threading
  6 | from bs4 import BeautifulSoup
  7 | 
  8 | 
  9 | class myThred(threading.Thread):
 10 |     def __init__(self,url,dir,filename):
 11 |         threading.Thread.__init__(self)
 12 |         self.ThreadID = filename
 13 |         self.url = url
 14 |         self.dir = dir
 15 |         self.filename = filename
 16 | 
 17 |     def run(self):
 18 |         downloadPic(self.url,self.dir,self.filename)
 19 | def getList(url):
 20 |     try:
 21 |         html = requests.get(url)
 22 |         soup = BeautifulSoup(html.text,'lxml')
 23 |         articlelist = soup.select('#container > main > article > div > a')
 24 |         articleurls = [articleurl.get('href') for articleurl in articlelist]
 25 |         return articleurls
 26 |     except Exception as e:
 27 |         print(e)
 28 |         return None
 29 | 
 30 | def getTitle(url):
 31 |     try:
 32 |         html = requests.get(url)
 33 |         soup = BeautifulSoup(html.text,'lxml')
 34 |         title = soup.select('h1.title')[0].get_text()
 35 |         return title
 36 |     except Exception as e:
 37 |         print(e)
 38 |         return None
 39 | 
 40 | def getImgurl(url):
 41 |     try:
 42 |         html = requests.get(url)
 43 |         soup = BeautifulSoup(html.text,'lxml')
 44 |         imgurl = soup.select('div.entry')[0].p.img['src']
 45 |         return imgurl
 46 |     except Exception as e:
 47 |         print(e)
 48 |         return None
 49 | 
 50 | def downloadPic(url,dir,filename):
 51 |     req = requests.get(url)
 52 |     if req.status_code == 200:
 53 |         with open(str(dir) + '/' + str(filename) + '.jpg', 'wb+') as f:
 54 |             f.write(req.content)
 55 |     else:
 56 |         print('链接错误: ' + str(req.status_code))
 57 | 
 58 | def getLastpage(url):
 59 |     html = requests.get(url)
 60 |     soup = BeautifulSoup(html.text,'lxml')
 61 |     lastnum = soup.select('#dm-fy > li > a')[-2].get_text()
 62 |     return int(lastnum)
 63 | 
 64 | def getArticles(url):
 65 |     imgurls = []
 66 |     lastpage = getLastpage(url)
 67 |     pageurls = [str(url) + '?page={}'.format(number) for number in range(1,lastpage)]
 68 |     for imgurl in pageurls:
 69 |         imgurls.append(imgurl)
 70 |     return imgurls
 71 | 
 72 | def startUrl(url):
 73 |     category = int(input('请输入分类ID： '))
 74 |     categoryLast = int(input('请输入分类对应的最后页码： '))
 75 |     categoryUrl = [str(url) + 'category-' + str(category) + '_{}.html'.format(num) for num in range(1,int(categoryLast) + 1)]
 76 |     return categoryUrl
 77 | 
 78 | def main(url):
 79 |     imglinks = []
 80 |     title = getTitle(url)
 81 |     articles = getArticles(url)
 82 |     filename = 1
 83 |     for imgurl in articles:
 84 |         imglink = getImgurl(imgurl)
 85 |         imglinks.append(imglink)
 86 |         print('获取下载链接ing.......' + str(imglink))
 87 |     print('共计取得： ' +str(len(imglinks)) + '张图片链接')
 88 |     if os.path.exists(title) == False:
 89 |         os.mkdir(title)
 90 |         threads = []
 91 |         for img in imglinks:
 92 |             thread = myThred(img, title, filename)
 93 |             thread.start()
 94 |             threads.append(thread)
 95 |         # downloadPic(imglink, title, filename)
 96 |             print('下载完成....' + str(filename))
 97 |             filename += 1
 98 |         for t in threads:
 99 |             t.join()
100 |     else:
101 |         print('文件已存在，跳过下载.....' + str(filename))
102 | 
103 | url = 'https://www.tuao8.com/'
104 | 
105 | if __name__ == '__main__':
106 |     try:
107 |         starturls = startUrl(url)
108 |         for starturl in starturls:
109 |             articleurls = getList(starturl)
110 |             for articleurl in articleurls:
111 |                 print(articleurl)
112 |                 main(articleurl)
113 |                 time.sleep(3)
114 |                 print('图集下载完成，休眠 3S......')
115 |         print('当前分类爬取完成.....')
116 |     except Exception as e:
117 |         print(e)


--------------------------------------------------------------------------------
/proxy_pool/Manager/ProxyManager.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | """
  4 | -------------------------------------------------
  5 |    File Name：     ProxyManager.py
  6 |    Description :
  7 |    Author :       JHao
  8 |    date：          2016/12/3
  9 | -------------------------------------------------
 10 |    Change Activity:
 11 |                    2016/12/3:
 12 | -------------------------------------------------
 13 | """
 14 | __author__ = 'JHao'
 15 | 
 16 | import random
 17 | 
 18 | from ProxyHelper import Proxy
 19 | from DB.DbClient import DbClient
 20 | from Config.ConfigGetter import config
 21 | from Util.LogHandler import LogHandler
 22 | from Util.utilFunction import verifyProxyFormat
 23 | from ProxyGetter.getFreeProxy import GetFreeProxy
 24 | 
 25 | 
 26 | class ProxyManager(object):
 27 |     """
 28 |     ProxyManager
 29 |     """
 30 | 
 31 |     def __init__(self):
 32 |         self.db = DbClient()
 33 |         self.raw_proxy_queue = 'raw_proxy'
 34 |         self.log = LogHandler('proxy_manager')
 35 |         self.useful_proxy_queue = 'useful_proxy'
 36 | 
 37 |     def fetch(self):
 38 |         """
 39 |         fetch proxy into db by ProxyGetter
 40 |         :return:
 41 |         """
 42 |         self.db.changeTable(self.raw_proxy_queue)
 43 |         proxy_set = set()
 44 |         self.log.info("ProxyFetch : start")
 45 |         for proxyGetter in config.proxy_getter_functions:
 46 |             self.log.info("ProxyFetch - {func}: start".format(func=proxyGetter))
 47 |             try:
 48 |                 for proxy in getattr(GetFreeProxy, proxyGetter.strip())():
 49 |                     proxy = proxy.strip()
 50 | 
 51 |                     if not proxy or not verifyProxyFormat(proxy):
 52 |                         self.log.error('ProxyFetch - {func}: '
 53 |                                        '{proxy} illegal'.format(func=proxyGetter, proxy=proxy.ljust(20)))
 54 |                         continue
 55 |                     elif proxy in proxy_set:
 56 |                         self.log.info('ProxyFetch - {func}: '
 57 |                                       '{proxy} exist'.format(func=proxyGetter, proxy=proxy.ljust(20)))
 58 |                         continue
 59 |                     else:
 60 |                         self.log.info('ProxyFetch - {func}: '
 61 |                                       '{proxy} success'.format(func=proxyGetter, proxy=proxy.ljust(20)))
 62 |                         self.db.put(Proxy(proxy, source=proxyGetter))
 63 |                         proxy_set.add(proxy)
 64 |             except Exception as e:
 65 |                 self.log.error("ProxyFetch - {func}: error".format(func=proxyGetter))
 66 |                 self.log.error(str(e))
 67 | 
 68 |     def get(self):
 69 |         """
 70 |         return a useful proxy
 71 |         :return:
 72 |         """
 73 |         self.db.changeTable(self.useful_proxy_queue)
 74 |         item_list = self.db.getAll()
 75 |         if item_list:
 76 |             random_choice = random.choice(item_list)
 77 |             return Proxy.newProxyFromJson(random_choice)
 78 |         return None
 79 | 
 80 |     def delete(self, proxy_str):
 81 |         """
 82 |         delete proxy from pool
 83 |         :param proxy_str:
 84 |         :return:
 85 |         """
 86 |         self.db.changeTable(self.useful_proxy_queue)
 87 |         self.db.delete(proxy_str)
 88 | 
 89 |     def getAll(self):
 90 |         """
 91 |         get all proxy from pool as list
 92 |         :return:
 93 |         """
 94 |         self.db.changeTable(self.useful_proxy_queue)
 95 |         item_list = self.db.getAll()
 96 |         return [Proxy.newProxyFromJson(_) for _ in item_list]
 97 | 
 98 |     def getNumber(self):
 99 |         self.db.changeTable(self.raw_proxy_queue)
100 |         total_raw_proxy = self.db.getNumber()
101 |         self.db.changeTable(self.useful_proxy_queue)
102 |         total_useful_queue = self.db.getNumber()
103 |         return {'raw_proxy': total_raw_proxy, 'useful_proxy': total_useful_queue}
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     pp = ProxyManager()
108 |     pp.fetch()
109 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyHelper/Proxy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | -------------------------------------------------
  4 |    File Name：     Proxy
  5 |    Description :   代理对象类型封装
  6 |    Author :        JHao
  7 |    date：          2019/7/11
  8 | -------------------------------------------------
  9 |    Change Activity:
 10 |                    2019/7/11: 代理对象类型封装
 11 | -------------------------------------------------
 12 | """
 13 | __author__ = 'JHao'
 14 | 
 15 | import json
 16 | 
 17 | 
 18 | class Proxy(object):
 19 | 
 20 |     def __init__(self, proxy, fail_count=0, region="", proxy_type="",
 21 |                  source="", check_count=0, last_status="", last_time=""):
 22 |         self._proxy = proxy
 23 |         self._fail_count = fail_count
 24 |         self._region = region
 25 |         self._type = proxy_type
 26 |         self._source = source
 27 |         self._check_count = check_count
 28 |         self._last_status = last_status
 29 |         self._last_time = last_time
 30 | 
 31 |     @classmethod
 32 |     def newProxyFromJson(cls, proxy_json):
 33 |         """
 34 |         根据proxy属性json创建Proxy实例
 35 |         :param proxy_json:
 36 |         :return:
 37 |         """
 38 |         proxy_dict = json.loads(proxy_json)
 39 |         return cls(proxy=proxy_dict.get("proxy", ""),
 40 |                    fail_count=proxy_dict.get("fail_count", 0),
 41 |                    region=proxy_dict.get("region", ""),
 42 |                    proxy_type=proxy_dict.get("type", ""),
 43 |                    source=proxy_dict.get("source", ""),
 44 |                    check_count=proxy_dict.get("check_count", 0),
 45 |                    last_status=proxy_dict.get("last_status", ""),
 46 |                    last_time=proxy_dict.get("last_time", "")
 47 |                    )
 48 | 
 49 |     @property
 50 |     def proxy(self):
 51 |         """ 代理 ip:port """
 52 |         return self._proxy
 53 | 
 54 |     @property
 55 |     def fail_count(self):
 56 |         """ 检测失败次数 """
 57 |         return self._fail_count
 58 | 
 59 |     @property
 60 |     def region(self):
 61 |         """ 地理位置(国家/城市) """
 62 |         return self._region
 63 | 
 64 |     @property
 65 |     def type(self):
 66 |         """ 透明/匿名/高匿 """
 67 |         return self._type
 68 | 
 69 |     @property
 70 |     def source(self):
 71 |         """ 代理来源 """
 72 |         return self._source
 73 | 
 74 |     @property
 75 |     def check_count(self):
 76 |         """ 代理检测次数 """
 77 |         return self._check_count
 78 | 
 79 |     @property
 80 |     def last_status(self):
 81 |         """ 最后一次检测结果  1 -> 可用; 0 -> 不可用"""
 82 |         return self._last_status
 83 | 
 84 |     @property
 85 |     def last_time(self):
 86 |         """ 最后一次检测时间 """
 87 |         return self._last_time
 88 | 
 89 |     @property
 90 |     def info_dict(self):
 91 |         """ 属性字典 """
 92 |         return {"proxy": self._proxy,
 93 |                 "fail_count": self._fail_count,
 94 |                 "region": self._region,
 95 |                 "type": self._type,
 96 |                 "source": self._source,
 97 |                 "check_count": self.check_count,
 98 |                 "last_status": self.last_status,
 99 |                 "last_time": self.last_time}
100 | 
101 |     @property
102 |     def info_json(self):
103 |         """ 属性json格式 """
104 |         return json.dumps(self.info_dict, ensure_ascii=False)
105 | 
106 |     # --- proxy method ---
107 |     @fail_count.setter
108 |     def fail_count(self, value):
109 |         self._fail_count = value
110 | 
111 |     @region.setter
112 |     def region(self, value):
113 |         self._region = value
114 | 
115 |     @type.setter
116 |     def type(self, value):
117 |         self._type = value
118 | 
119 |     @source.setter
120 |     def source(self, value):
121 |         self._source = value
122 | 
123 |     @check_count.setter
124 |     def check_count(self, value):
125 |         self._check_count = value
126 | 
127 |     @last_status.setter
128 |     def last_status(self, value):
129 |         self._last_status = value
130 | 
131 |     @last_time.setter
132 |     def last_time(self, value):
133 |         self._last_time = value
134 | 


--------------------------------------------------------------------------------
/proxy_pool/DB/RedisClient.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | -------------------------------------------------
  4 |    File Name：     RedisClient
  5 |    Description :  封装Redis相关操作
  6 |    Author :        JHao
  7 |    date：          2019/8/9
  8 | -------------------------------------------------
  9 |    Change Activity:
 10 |                    2019/8/9: 封装Redis相关操作
 11 | -------------------------------------------------
 12 | """
 13 | __author__ = 'JHao'
 14 | 
 15 | from Config.setting import PY3
 16 | 
 17 | from redis.connection import BlockingConnectionPool
 18 | from redis import Redis
 19 | 
 20 | 
 21 | class RedisClient(object):
 22 |     """
 23 |     Redis client 和SSDB协议一致 数据结构一致, 但部分方法不通用
 24 | 
 25 |     Redis中代理存放的结构为hash：
 26 |         原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
 27 |         验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
 28 | 
 29 |     """
 30 | 
 31 |     def __init__(self, name, **kwargs):
 32 |         """
 33 |         init
 34 |         :param name: hash name
 35 |         :param host: host
 36 |         :param port: port
 37 |         :param password: password
 38 |         :return:
 39 |         """
 40 |         self.name = name
 41 |         self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
 42 | 
 43 |     def get(self, proxy_str):
 44 |         """
 45 |         从hash中获取对应的proxy, 使用前需要调用changeTable()
 46 |         :param proxy_str: proxy str
 47 |         :return:
 48 |         """
 49 |         data = self.__conn.hget(name=self.name, key=proxy_str)
 50 |         if data:
 51 |             return data.decode('utf-8') if PY3 else data
 52 |         else:
 53 |             return None
 54 | 
 55 |     def put(self, proxy_obj):
 56 |         """
 57 |         将代理放入hash, 使用changeTable指定hash name
 58 |         :param proxy_obj: Proxy obj
 59 |         :return:
 60 |         """
 61 |         data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
 62 |         return data
 63 | 
 64 |     def delete(self, proxy_str):
 65 |         """
 66 |         移除指定代理, 使用changeTable指定hash name
 67 |         :param proxy_str: proxy str
 68 |         :return:
 69 |         """
 70 |         self.__conn.hdel(self.name, proxy_str)
 71 | 
 72 |     def exists(self, proxy_str):
 73 |         """
 74 |         判断指定代理是否存在, 使用changeTable指定hash name
 75 |         :param proxy_str: proxy str
 76 |         :return:
 77 |         """
 78 |         return self.__conn.hexists(self.name, proxy_str)
 79 | 
 80 |     def update(self, proxy_obj):
 81 |         """
 82 |         更新 proxy 属性
 83 |         :param proxy_obj:
 84 |         :return:
 85 |         """
 86 |         self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
 87 | 
 88 |     def pop(self):
 89 |         """
 90 |         弹出一个代理
 91 |         :return: dict {proxy: value}
 92 |         """
 93 |         # proxies = self.__conn.hkeys(self.name)
 94 |         # if proxies:
 95 |         #     proxy = random.choice(proxies)
 96 |         #     value = self.__conn.hget(self.name, proxy)
 97 |         #     self.delete(proxy)
 98 |         #     return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
 99 |         #             'value': value.decode('utf-8') if PY3 and value else value}
100 |         return None
101 | 
102 |     def getAll(self):
103 |         """
104 |         列表形式返回所有代理, 使用changeTable指定hash name
105 |         :return:
106 |         """
107 |         item_dict = self.__conn.hgetall(self.name)
108 |         if PY3:
109 |             return [value.decode('utf8') for key, value in item_dict.items()]
110 |         else:
111 |             return item_dict.values()
112 | 
113 |     def clear(self):
114 |         """
115 |         清空所有代理, 使用changeTable指定hash name
116 |         :return:
117 |         """
118 |         return self.__conn.delete(self.name)
119 | 
120 |     def getNumber(self):
121 |         """
122 |         返回代理数量
123 |         :return:
124 |         """
125 |         return self.__conn.hlen(self.name)
126 | 
127 |     def changeTable(self, name):
128 |         """
129 |         切换操作对象
130 |         :param name: raw_proxy/useful_proxy
131 |         :return:
132 |         """
133 |         self.name = name
134 | 


--------------------------------------------------------------------------------
/proxy_pool/DB/SsdbClient.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | """
  4 | -------------------------------------------------
  5 |    File Name：     SsdbClient.py
  6 |    Description :  封装SSDB操作
  7 |    Author :       JHao
  8 |    date：          2016/12/2
  9 | -------------------------------------------------
 10 |    Change Activity:
 11 |                    2016/12/2:
 12 |                    2017/09/22: PY3中 redis-py返回的数据是bytes型
 13 |                    2017/09/27: 修改pop()方法 返回{proxy:value}字典
 14 | -------------------------------------------------
 15 | """
 16 | __author__ = 'JHao'
 17 | 
 18 | from Config.setting import PY3
 19 | 
 20 | from redis.connection import BlockingConnectionPool
 21 | from redis import Redis
 22 | 
 23 | 
 24 | class SsdbClient(object):
 25 |     """
 26 |     SSDB client
 27 | 
 28 |     SSDB中代理存放的结构为hash：
 29 |         原始代理存放在name为raw_proxy的hash中, key为代理的ip:por, value为代理属性的字典;
 30 |         验证后的代理存放在name为useful_proxy的hash中, key为代理的ip:port, value为代理属性的字典;
 31 | 
 32 |     """
 33 |     def __init__(self, name, **kwargs):
 34 |         """
 35 |         init
 36 |         :param name: hash name
 37 |         :param host: host
 38 |         :param port: port
 39 |         :param password: password
 40 |         :return:
 41 |         """
 42 |         self.name = name
 43 |         self.__conn = Redis(connection_pool=BlockingConnectionPool(**kwargs))
 44 | 
 45 |     def get(self, proxy_str):
 46 |         """
 47 |         从hash中获取对应的proxy, 使用前需要调用changeTable()
 48 |         :param proxy_str: proxy str
 49 |         :return:
 50 |         """
 51 |         data = self.__conn.hget(name=self.name, key=proxy_str)
 52 |         if data:
 53 |             return data.decode('utf-8') if PY3 else data
 54 |         else:
 55 |             return None
 56 | 
 57 |     def put(self, proxy_obj):
 58 |         """
 59 |         将代理放入hash, 使用changeTable指定hash name
 60 |         :param proxy_obj: Proxy obj
 61 |         :return:
 62 |         """
 63 |         data = self.__conn.hset(self.name, proxy_obj.proxy, proxy_obj.info_json)
 64 |         return data
 65 | 
 66 |     def delete(self, proxy_str):
 67 |         """
 68 |         移除指定代理, 使用changeTable指定hash name
 69 |         :param proxy_str: proxy str
 70 |         :return:
 71 |         """
 72 |         self.__conn.hdel(self.name, proxy_str)
 73 | 
 74 |     def exists(self, proxy_str):
 75 |         """
 76 |         判断指定代理是否存在, 使用changeTable指定hash name
 77 |         :param proxy_str: proxy str
 78 |         :return:
 79 |         """
 80 |         return self.__conn.hexists(self.name, proxy_str)
 81 | 
 82 |     def update(self, proxy_obj):
 83 |         """
 84 |         更新 proxy 属性
 85 |         :param proxy_obj:
 86 |         :return:
 87 |         """
 88 |         self.__conn.hset(self.name,  proxy_obj.proxy, proxy_obj.info_json)
 89 | 
 90 |     def pop(self):
 91 |         """
 92 |         弹出一个代理
 93 |         :return: dict {proxy: value}
 94 |         """
 95 |         # proxies = self.__conn.hkeys(self.name)
 96 |         # if proxies:
 97 |         #     proxy = random.choice(proxies)
 98 |         #     value = self.__conn.hget(self.name, proxy)
 99 |         #     self.delete(proxy)
100 |         #     return {'proxy': proxy.decode('utf-8') if PY3 else proxy,
101 |         #             'value': value.decode('utf-8') if PY3 and value else value}
102 |         return None
103 | 
104 |     def getAll(self):
105 |         """
106 |         列表形式返回所有代理, 使用changeTable指定hash name
107 |         :return:
108 |         """
109 |         item_dict = self.__conn.hgetall(self.name)
110 |         if PY3:
111 |             return [value.decode('utf8') for key, value in item_dict.items()]
112 |         else:
113 |             return item_dict.values()
114 | 
115 |     def clear(self):
116 |         """
117 |         清空所有代理, 使用changeTable指定hash name
118 |         :return:
119 |         """
120 |         return self.__conn.execute_command("hclear", self.name)
121 | 
122 |     def getNumber(self):
123 |         """
124 |         返回代理数量
125 |         :return:
126 |         """
127 |         return self.__conn.hlen(self.name)
128 | 
129 |     def changeTable(self, name):
130 |         """
131 |         切换操作对象
132 |         :param name: raw_proxy/useful_proxy
133 |         :return:
134 |         """
135 |         self.name = name
136 | 


--------------------------------------------------------------------------------
/1024/new1024spider.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import os
  3 | import threading
  4 | import random, time
  5 | from bs4 import BeautifulSoup
  6 | 
  7 | host = 'https://hh.flexui.win/'
  8 | 
  9 | headers={
 10 |     'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
 11 |     'Referer':host
 12 | }
 13 | 
 14 | class myThred(threading.Thread):
 15 |     def __init__(self,url,dir,filename):
 16 |         threading.Thread.__init__(self)
 17 |         self.ThreadID = filename
 18 |         self.url = url
 19 |         self.dir = dir
 20 |         self.filename = filename
 21 | 
 22 |     def run(self):
 23 |         download_pic(self.url,self.dir,self.filename)
 24 | 
 25 | def download_pic(url,dir,filename):
 26 |     try:
 27 |         req = requests.get(url, headers=headers)
 28 |         if req.status_code == 200:
 29 |             with open('pic' + '/' + str(dir) + '/' + str(filename), 'wb+') as f:
 30 |                 f.write(req.content)
 31 |                 # print('下载完成.......' + str(filename))
 32 |         else:
 33 |             print("发生错误，跳过下载....." + str(req.status_code))
 34 |     except TimeoutError as e:
 35 |         print("链接超时: " + str(e))
 36 | 
 37 | def open_url(url):
 38 |     try:
 39 |         req = requests.get(url,headers=headers)
 40 |         req.encoding = req.apparent_encoding
 41 |         return req
 42 |     except (TimeoutError,ConnectionError,requests.exceptions.ConnectionError) as e:
 43 |         print('链接超时' + str(e))
 44 | 
 45 | def get_page(url):
 46 |     url_list = []
 47 |     html = open_url(url)
 48 |     soup = BeautifulSoup(html.text,'lxml')
 49 |     article_url = soup.select('tbody > tr > td.tal > h3 > a')
 50 |     for url in article_url:
 51 |         url = str(host) + url.get('href')
 52 |         url_list.append(url)
 53 |     return url_list
 54 | 
 55 | def get_article(url):
 56 |     img_all =[]
 57 |     html = open_url(url)
 58 |     soup = BeautifulSoup(html.text,'lxml')
 59 |     title = soup.select('td > h4')[0]
 60 |     title = title.get_text()
 61 |     img_urls = soup.select("input[type='image']")
 62 |     for img_url in img_urls:
 63 |         img_url = img_url.get('data-src')
 64 |         img_all.append(img_url)
 65 |     img_sum = len(img_all)
 66 |     print('当前帖子：\n' + str(title) + '\n共计取到 ' + str(img_sum) + ' 张图片连接......')
 67 |     if os.path.exists(title) == False:
 68 |         os.makedirs('pic' + '/' + str(title))
 69 |         threads = []
 70 |         for imgurl in img_all:
 71 |             imgname = imgurl.split('/')[-1]
 72 |             thread = myThred(imgurl,title,imgname)
 73 |             thread.start()
 74 |             threads.append(thread)
 75 |         for t in threads:
 76 |             t.join()
 77 |         timer = random.randint(2,5)
 78 |         print('下载完成............\n' + '休眠 ' + str(timer) + ' 秒......')
 79 |         time.sleep(timer)
 80 |     else:
 81 |         print("文件夹已存在，跳过下载。")
 82 | 
 83 | if __name__ == '__main__':
 84 |     offset = 1
 85 |     while offset <= 2:
 86 |         page_url = 'https://hh.flexui.win/thread0806.php?fid=16&search=&page=' + str(offset)
 87 |         try:
 88 |             pagelist = get_page(page_url)
 89 |             for url in pagelist:
 90 |                 if url == 'https://hh.flexui.win/read.php?tid=5877':
 91 |                     print("pass")
 92 |                 elif url == 'https://hh.flexui.win/htm_data/16/1106/524942.html':
 93 |                     print('pass')
 94 |                 elif url == 'https://hh.flexui.win/htm_data/16/1808/344501.html':
 95 |                     print('pass')
 96 |                 elif url == 'https://hh.flexui.win/htm_data/16/1110/622028.html':
 97 |                     print('pass')
 98 |                 elif url == 'https://hh.flexui.win/htm_data/16/1706/2424348.html':
 99 |                     print('pass')
100 |                 elif url == 'https://hh.flexui.win/htm_data/16/1707/2519480.html':
101 |                     print('pass')
102 |                 elif url == 'https://hh.flexui.win/htm_data/16/0805/136474.html':
103 |                     print('pass')
104 |                 elif url == 'https://hh.flexui.win/htm_data/16/1109/594741.html':
105 |                     print('pass')
106 |                 elif url == 'https://hh.flexui.win/htm_data/16/1812/3351645.html':
107 |                     print('pass')
108 |                 else:
109 |                     get_article(url)
110 |         except Exception as e:
111 |             print('发生错误....跳过下载......' + str(e))
112 |         offset += 1
113 | 


--------------------------------------------------------------------------------
/proxy_pool/doc/introduce.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ## 代理池介绍
  3 | 
  4 | 本项目通过爬虫方式持续抓取代理网站公布的免费代理IP，实时校验，维护部分可以使用的代理，并通过api的形式提供外部使用。
  5 | 
  6 | ### 1、问题
  7 | 
  8 | 构建一个代理IP池，可能有下面这些问题：
  9 | 
 10 | * 代理IP从何而来？
 11 | 
 12 | 　　许多刚接触爬虫的，都试过去西刺、快代理之类有免费代理的网站去抓些免费代理，还是有一些代理能用。
 13 | 当然，如果你有更好的代理接口也可以自己接入。
 14 | 
 15 | 　　免费代理的采集也很简单，无非就是：`访问页面`` —> `正则/xpath提取` —> `保存`
 16 | 
 17 | * 如何保证代理质量？
 18 | 
 19 | 　　可以肯定免费的代理IP大部分都是不能用的，不然别人还提供付费接口干嘛(不过事实上很多代理商的付费IP也不稳定，也有很多是不能用)。
 20 | 所以采集回来的代理IP不能直接使用，检测的办法也很简单：可以写个程序不断的用代理访问一个稳定的网站，看是否可以正常访问即可。
 21 | 这个过程可以使用多线/进程或异步的方式，因为检测代理是个很慢的过程。
 22 | 
 23 | * 采集回来的代理如何存储？
 24 | 
 25 | 　　这里不得不推荐一个国人开发的高性能支持多种数据结构的NoSQL数据库[SSDB](http://ssdb.io/docs/zh_cn/)，用于替代Redis。支持队列、hash、set、k-v对，支持T级别数据。是做分布式爬虫很好中间存储工具。
 26 | 
 27 | * 如何让爬虫更方便的用到这些代理？
 28 | 
 29 | 　　答案肯定是做成服务咯，Python有这么多的web框架，随便拿一个来写个api供爬虫调用。这样代理和爬虫架构分离有很多好处，
 30 | 比如：当爬虫完全不用考虑如何校验代理，如何保证拿到的代理可用，这些都由代理池来完成。这样只需要安静的码爬虫代码就行啦。
 31 | 
 32 | ### 2、代理池设计
 33 | 
 34 | 　　代理池由四部分组成:
 35 | 
 36 | * ProxyGetter:
 37 | 
 38 | 　　代理获取接口，目前有5个免费代理源，每调用一次就会抓取这个5个网站的最新代理放入DB，支持自定义扩展额外的代理获取接口；
 39 | 
 40 | * DB:
 41 | 
 42 | 　　用于存放代理IP，目前支持SSDB和Redis(推荐SSDB)。至于为什么选择SSDB，大家可以参考这篇[文章](https://www.sdk.cn/news/2684),个人觉得SSDB是个不错的Redis替代方案，如果你没有用过SSDB，安装起来也很简单，可以参考[这里](https://github.com/jhao104/memory-notes/blob/master/SSDB/SSDB%E5%AE%89%E8%A3%85%E9%85%8D%E7%BD%AE%E8%AE%B0%E5%BD%95.md)；
 43 | 
 44 | * Schedule:
 45 | 
 46 | 　　计划任务，定时去检测DB中的代理可用性，删除不可用的代理。同时也会主动通过ProxyGetter去获取最新代理放入DB；
 47 | 
 48 | * ProxyApi:
 49 | 
 50 | 　　代理池的外部接口，由[Flask](http://flask.pocoo.org/)实现，功能是给爬虫提供与代理池交互的接口。
 51 | 
 52 | <!--#### 功能图纸-->
 53 | ![设计](https://pic2.zhimg.com/v2-f2756da2986aa8a8cab1f9562a115b55_b.png)
 54 | 
 55 | ### 3、代码模块
 56 | 
 57 | 　　Python中高层次的数据结构,动态类型和动态绑定,使得它非常适合于快速应用开发,也适合于作为胶水语言连接已有的软件部件。用Python来搞这个代理IP池也很简单，代码分为6个模块：
 58 | 
 59 | * Api:
 60 | 
 61 | 　　api接口相关代码，目前api是由Flask实现，代码也非常简单。客户端请求传给Flask，Flask调用`ProxyManager`中的实现，包括`get/delete/refresh/get_all`；
 62 | 
 63 | * DB:
 64 | 
 65 | 　　数据库相关代码，目前数据库是支持SSDB/Redis。代码用工厂模式实现，方便日后扩展其他类型数据库；
 66 | 
 67 | * Manager:
 68 | 
 69 | 　　`get/delete/refresh/get_all`等接口的具体实现类，目前代理池只负责管理proxy，日后可能会有更多功能，比如代理和爬虫的绑定，代理和账号的绑定等等；
 70 | 
 71 | * ProxyGetter:
 72 | 
 73 | 　　代理获取的相关代码，目前抓取了[快代理](http://www.kuaidaili.com)、[代理66](http://www.66ip.cn/)、[有代理](http://www.youdaili.net/Daili/http/)、[西刺代理](http://api.xicidaili.com/free2016.txt)、[guobanjia](http://www.goubanjia.com/free/gngn/index.shtml)这个五个网站的免费代理，经测试这个5个网站每天更新的可用代理只有六七十个，当然也支持自己扩展代理接口；
 74 | 
 75 | * Schedule:
 76 | 
 77 | 　　定时任务相关代码，现在只是实现定时去刷新代理，并验证可用代理，采用多进程方式；
 78 | 
 79 | * Util:
 80 | 
 81 | 　　存放一些公共的模块方法或函数，包含`GetConfig`:读取配置文件config.ini的类，`ConfigParse`: 扩展ConfigParser的类，使其对大小写敏感， `Singleton`:实现单例，`LazyProperty`:实现类属性惰性计算。等等；
 82 | 
 83 | * 其他文件:
 84 | 
 85 | 　　配置文件:`Config.ini``,数据库配置和代理获取接口配置，可以在GetFreeProxy中添加新的代理获取方法，并在Config.ini中注册即可使用；
 86 | 
 87 | ### 4、安装
 88 | 
 89 | 下载代码:
 90 | ```
 91 | git clone git@github.com:jhao104/proxy_pool.git
 92 | 
 93 | 或者直接到https://github.com/jhao104/proxy_pool 下载zip文件
 94 | ```
 95 | 
 96 | 安装依赖:
 97 | ```
 98 | pip install -r requirements.txt
 99 | ```
100 | 
101 | 启动:
102 | 
103 | ```
104 | 如果你的依赖已经安全完成并且具备运行条件,可以直接在Run下运行main.py
105 | 到Run目录下:
106 | >>>python main.py
107 | 
108 | 如果运行成功你应该可以看到有4个main.py进程在
109 | 
110 | 
111 | 你也可以分别运行他们,依次到Api下启动ProxyApi.py,Schedule下启动ProxyRefreshSchedule.py和ProxyValidSchedule.py即可
112 | ```
113 | 
114 | docker:
115 | ```
116 | git clone git@github.com:jhao104/proxy_pool.git
117 | 
118 | cd proxy_pool
119 | 
120 | docker build -t proxy:latest -f Dockerfile .
121 | 
122 | docker run -p 5010:5010 -d proxy:latest
123 | 
124 | # Wait a few minutes
125 | curl localhost:5010/get/
126 | # result: xxx.xxx.xxx.xxx:xxxx
127 | 
128 | curl localhost:5010/get_all/
129 | ```
130 | 
131 | ### 5、使用
132 | 　　定时任务启动后，会通过GetFreeProxy中的方法抓取代理存入数据库并验证。此后默认每10分钟会重复执行一次。定时任务启动大概一两分钟后，便可在[SSDB](https://github.com/jhao104/SSDBAdmin)中看到刷新出来的可用的代理：
133 | 
134 | ![useful_proxy](https://pic2.zhimg.com/v2-12f9b7eb72f60663212f317535a113d1_b.png)
135 | 
136 | 　　启动ProxyApi.py后即可在浏览器中使用接口获取代理，一下是浏览器中的截图:
137 | 
138 | 　　index页面:
139 | 
140 | ![index](https://pic3.zhimg.com/v2-a867aa3db1d413fea8aeeb4c693f004a_b.png)
141 | 
142 | 　　get：
143 | 
144 | ![get](https://pic1.zhimg.com/v2-f54b876b428893235533de20f2edbfe0_b.png)
145 | 
146 | 　　get_all：
147 | 
148 | ![get_all](https://pic3.zhimg.com/v2-5c79f8c07e04f9ef655b9bea406d0306_b.png)
149 | 
150 | 
151 | 　　爬虫中使用，如果要在爬虫代码中使用的话， 可以将此api封装成函数直接使用，例如:
152 | ```
153 | import requests
154 | 
155 | def get_proxy():
156 |     return requests.get("http://127.0.0.1:5010/get/").content
157 | 
158 | def delete_proxy(proxy):
159 |     requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
160 | 
161 | # your spider code
162 | 
163 | def spider():
164 |     # ....
165 |     requests.get('https://www.example.com', proxies={"http": "http://{}".format(get_proxy())})
166 |     # ....
167 | 
168 | ```
169 | 
170 | 　　测试地址：http://123.207.35.36:5010 单机勿压测。谢谢
171 | 
172 | ### 6、最后
173 | 　　时间仓促，功能和代码都比较简陋，以后有时间再改进。喜欢的在github上给个star。感谢！
174 | 


--------------------------------------------------------------------------------
/kuaishou/lib/crawler.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import json
  3 | import os
  4 | import re
  5 | import requests
  6 | from random import randint
  7 | from time import sleep
  8 | from bs4 import BeautifulSoup
  9 | 
 10 | requests.packages.urllib3.disable_warnings()
 11 | 
 12 | 
 13 | 
 14 | def get_proxy():
 15 |     return requests.get("http://127.0.0.1:9910/get/").json()
 16 | 
 17 | def delete_proxy(proxy):
 18 |     requests.get("http://127.0.0.1:9910/delete/?proxy={}".format(proxy))
 19 | 
 20 | 
 21 | class Kuaishou():
 22 | 
 23 | 
 24 |     __headersWeb = {
 25 |         'accept': '*/*',
 26 |         'Accept-Encoding': 'gzip, deflate, br',
 27 |         'Accept-Language': 'zh-CN,zh;q=0.9',
 28 |         'Connection': 'keep-alive',
 29 |         'Content-Type': 'application/json',
 30 |         'Host': 'live.kuaishou.com',
 31 |         'Origin': 'https://live.kuaishou.com',
 32 |         'Sec-Fetch-Mode': 'cors',
 33 |         'Sec-Fetch-Site': 'same-origin',
 34 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36',
 35 |         #填上你的cookie
 36 |         'Cookie': ''
 37 |     }
 38 | 
 39 |     __PROFILE_URL = "https://live.kuaishou.com/profile/"
 40 |     __DATA_URL = "https://live.kuaishou.com/m_graphql"
 41 |     __WORK_URL = "https://v.kuaishou.com/fw/photo/"
 42 | 
 43 |     __DATA_PATH = './data/'
 44 | 
 45 |     def __headersMobile(self):
 46 |         num = randint(1, 300)
 47 |         with open('./config/ua_mobile.txt', 'r') as f:
 48 |             ua = f.readlines()[num].replace('\n', '')
 49 |         headers_mobile = {
 50 |             'Host': 'v.kuaishou.com',
 51 |             'User-Agent':ua,
 52 |             # 'User-Agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
 53 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 54 |             'Accept-Language': 'zh-CN,zh;q=0.9',
 55 |             'Accept-Encoding': 'gzip, deflate, br',
 56 |             'Connection': 'keep-alive',
 57 |             #填上你的cookie
 58 |             'Cookie': '',
 59 |             'Upgrade-Insecure-Requests': '1',
 60 |         }
 61 |         return headers_mobile
 62 | 
 63 |     def __parseVideo(self,videoID):
 64 | 
 65 |         proxy = get_proxy().get('proxy')
 66 |         url = self.__WORK_URL + videoID
 67 |         print('Current Task： %s' %url)
 68 |         try:
 69 |             req = requests.get(url, headers=self.__headersMobile(),proxies={"http": "http://{}".format(proxy)},timeout=(3,7))
 70 |             req.raise_for_status()
 71 |             req.close()
 72 | 
 73 |             soup = BeautifulSoup(req.text,'lxml')
 74 |             noWaterMarkVideo = soup.find(attrs={'id': 'hide-pagedata'}).attrs['data-pagedata']
 75 |             pattern = re.compile('\"srcNoMark\":"(.*?)"},', re.S)
 76 |             real_url = re.findall(pattern, noWaterMarkVideo)[0]
 77 |             print(real_url)
 78 | 
 79 |             if not os.path.exists(self.__DATA_PATH):
 80 |                 os.makedirs(self.__DATA_PATH)
 81 | 
 82 | 
 83 |             with open(self.__DATA_PATH + 'data.txt','a+',encoding='utf-8') as f:
 84 |                 f.write(real_url + '\n')
 85 |                 f.close()
 86 |             # sleep(5)
 87 |         except Exception as e:
 88 |             num = 5
 89 |             while num < 1:
 90 |                 delete_proxy(proxy)
 91 |                 print('error: %s' %e)
 92 |                 self.__parseVideo(videoID)
 93 |                 sleep(3)
 94 |                 num -= 1
 95 | 
 96 |     def setUid(self,uid):
 97 |         self.uid = uid
 98 |         self.user()
 99 | 
100 |     def user(self):
101 | 
102 |         payload1 = {'operationName': "privateFeedsQuery",
103 |                     'query': "query privateFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\n   privateFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n     pcursor\n     list {\n       id\n       thumbnailUrl\n       poster\n       workType\n       type\n       useVideoPlayer\n       imgUrls\n       imgSizes\n       magicFace\n       musicName\n       caption\n       location\n       liked\n       onlyFollowerCanComment\n       relativeHeight\n       timestamp\n       width\n       height\n       counts {\n         displayView\n         displayLike\n        displayComment\n         __typename\n       }\n       user {\n         id\n         eid\n         name\n        avatar\n         __typename\n       }\n       expTag\n      __typename\n     }\n     __typename\n  }\n }\n",
104 |                     'variables': {'principalId': str(self.uid), 'pcursor': "", 'count': 512}}
105 | 
106 |         res = requests.post(self.__DATA_URL, headers=self.__headersWeb, json=payload1)
107 | 
108 |         # print(res.content)
109 |         works = json.loads(res.content.decode(encoding='utf-8'))['data']['privateFeeds']['list']
110 | 
111 |         # with open("./" + uid + "2.json", "w") as fp:
112 |         #     fp.write(json.dumps(works, indent=2))
113 | 
114 |         if works != []:
115 |             if works[0]['id'] is None:
116 |                 works.pop(0)
117 | 
118 | 
119 |             print('Video Count：%s ' %len(works))
120 |             print(works)
121 |             for work in works:
122 |                 type = work['workType']
123 |                 if type == 'video':
124 |                     work_id = work['id']
125 |                     sleep(3)
126 |                     self.__parseVideo(work_id)
127 | 
128 |             print('Parse Successful ^-^ \n')
129 | 
130 |         else:
131 |             print(works)
132 |             sleep(3)
133 |             self.user()
134 | 
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/91user/user.py:
--------------------------------------------------------------------------------
  1 | import requests, re
  2 | import random
  3 | import urllib.parse as up
  4 | from time import sleep
  5 | from bs4 import BeautifulSoup
  6 | from fake_useragent import UserAgent
  7 | import sqlite3
  8 | 
  9 | ua = UserAgent()
 10 | proxies = {
 11 |   'http': 'http://127.0.0.1:1080',
 12 |   'https': 'http://127.0.0.1:1080'
 13 | }
 14 | 
 15 | def random_headers():
 16 |     ip = str(random.choice(list(range(255)))) + '.' + str(random.choice(list(range(255)))) + '.' + str(
 17 |         random.choice(list(range(255)))) + '.' + str(random.choice(list(range(255))))
 18 | 
 19 |     headers = {
 20 |         'X-Client-IP': ip,
 21 |         'X-Remote-IP': ip,
 22 |         'X-Remote-Addr': ip,
 23 |         'X-Originating-IP': ip,
 24 |         'x-forwarded-for': ip,
 25 |         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
 26 |         'Accept-Encoding': 'gzip, deflate',
 27 |         'Accept-Language': 'zh-CN,zh;q=0.9',
 28 |         'Cache-Control': 'max-age=0',
 29 |         # 'Cookie': '__cfduid=dda7b976a0a240beb0968fd6673951c471618894642; CLIPSHARE=jkaaau6k1p151iqhto35fsgrnl; mode=d',
 30 |         'Host': '91porn.com',
 31 |         'Referer': 'http://91porn.com/',  # fe7dCN6lNv5VirM8tSKWVndvRtHMSVyeHRRNQDEbKvUfjKzE
 32 |         'User-Agent': ua.random
 33 |     }
 34 |     return headers
 35 | 
 36 | def get_page(url):
 37 |     page = requests.get(url,headers=random_headers(),proxies=proxies)
 38 |     page.encoding = page.apparent_encoding
 39 |     html = BeautifulSoup(page.text,'lxml')
 40 |     if html != None:
 41 |         return html
 42 |     else:
 43 |         return None
 44 | 
 45 | 
 46 | class User:
 47 | 
 48 |     def __init__(self,uid):
 49 |         self.uid = uid
 50 |         self.start_url = 'http://91porn.com/uvideos.php?UID={}'.format(self.uid)
 51 |         self.public_datas = self.public_data()
 52 | 
 53 |     def public_data(self):
 54 |         page = get_page(self.start_url)
 55 |         page_num = page.select('ul.nav.navbar-nav.navbar-right > a')[-1].get_text()
 56 |         public_video = re.findall(r'\d+',page_num)[0]
 57 |         page_num = int(public_video) // 8
 58 |         if page_num == 0:
 59 |             page_num = 1
 60 |             data = {
 61 |                 'page_num':page_num,
 62 |                 'public_video':int(public_video)
 63 |             }
 64 |             return data
 65 |         else:
 66 |             page_num+=1
 67 |             data = {
 68 |                 'page_num':page_num,
 69 |                 'public_video':int(public_video)
 70 |             }
 71 |             return data
 72 | 
 73 |     def __parse_user(self):
 74 |         end_num = self.public_datas['page_num']
 75 |         urls = ['http://91porn.com/uvideos.php?UID={}&page={}'.format(str(self.uid),str(i))for i in range(1,int(end_num+1))]
 76 |         # page = get_page(self.start_url)
 77 |         for url in urls:
 78 |             print(url)
 79 |             page = get_page(url)
 80 |             video_urls = page.select('div.well.well-sm > a')
 81 |             video_ids = page.select('div.thumb-overlay > img')
 82 |             video_names = page.select('span.video-title.title-truncate.m-t-5')
 83 |             for video_url,id,name in zip(video_urls,video_ids,video_names):
 84 |                 data = {
 85 |                     'url':video_url.get('href'),
 86 |                     'id':id.get('src').split('/')[-1].strip('.jpg'),
 87 |                     'title':name.get_text()
 88 |                 }
 89 |                 yield data
 90 | 
 91 |     def parse_video(self):
 92 |         video_data = []
 93 |         for user_data in self.__parse_user():
 94 |             print('当前执行任务：%s' %user_data['url'])
 95 |             page = get_page(user_data['url'])
 96 |             m3u8 = page.find(text=re.compile('.*"%.*"'))
 97 |             temp =  m3u8.split('"')[-2]
 98 |             m3u8_url = up.unquote(temp).split("'")[1]
 99 |             new_data = {
100 |                 'url': user_data['url'],
101 |                 'id': user_data['id'],
102 |                 'title': user_data['title'],
103 |                 'm3u8': m3u8_url
104 |             }
105 |             video_data.append(new_data)
106 |             sleep(random.randint(1,3))
107 |         up_users = page.select('span.title-yakov > a > span')[0].get_text()
108 |         all_data = {'uid':self.uid,'name':up_users,'data':video_data}
109 |         return all_data
110 | 
111 | class ClientSqlite:
112 | 
113 |     def __init__(self, dbName="./91user.db"):
114 |         self.conn = sqlite3.connect(dbName)
115 |         self.cur = self.conn.cursor()
116 |         self.create_table()
117 | 
118 |     def close_conn(self):
119 |         self.cur.close()
120 |         self.conn.close()
121 | 
122 |     def create_table(self):
123 |         sql = '''CREATE table users(
124 |                         id INTEGER PRIMARY KEY AUTOINCREMENT ,
125 |                         uid varchar(255) NOT NULL ,
126 |                         name varchar(255) DEFAULT NULL,
127 |                         data text
128 |                     )'''
129 |         try:
130 |             self.cur.execute(sql)
131 |             self.conn.commit()
132 |             return True
133 |         except Exception as e:
134 |             #print('[ERROR]:%s' %e)
135 |             return False
136 | 
137 |     def fetchall_table(self,sql,limit_flag=True):
138 |         try:
139 |             self.cur.execute(sql)
140 |             if limit_flag == True:
141 |                 result = self.cur.fetchall()
142 |                 if len(result) > 0:
143 |                     return result
144 |                 else:
145 |                     return None
146 |             else:
147 |                 result = self.cur.fetchone()
148 |                 if len(result) > 0:
149 |                     return result
150 |                 else:
151 |                     return None
152 |         except Exception as e:
153 |             print('[SELECT TABLE ERROR]:%s' %e)
154 |             return None
155 | 
156 |     def insert_update_table(self,sql):
157 |         try:
158 |             self.cur.execute(sql)
159 |             self.conn.commit()
160 |             return True
161 |         except Exception as e:
162 |             print('[INSERT/UPDATE TABLE ERROR]:%s' %e)
163 |             return False
164 | 


--------------------------------------------------------------------------------
/proxy_pool/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 爬虫IP代理池
  3 | =======
  4 | [![Build Status](https://travis-ci.org/jhao104/proxy_pool.svg?branch=master)](https://travis-ci.org/jhao104/proxy_pool)
  5 | [![](https://img.shields.io/badge/Powered%20by-@j_hao104-green.svg)](http://www.spiderpy.cn/blog/)
  6 | [![Requirements Status](https://requires.io/github/jhao104/proxy_pool/requirements.svg?branch=master)](https://requires.io/github/jhao104/proxy_pool/requirements/?branch=master)
  7 | [![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/jhao104/proxy_pool/blob/master/LICENSE)
  8 | [![GitHub contributors](https://img.shields.io/github/contributors/jhao104/proxy_pool.svg)](https://github.com/jhao104/proxy_pool/graphs/contributors)
  9 | [![](https://img.shields.io/badge/language-Python-green.svg)](https://github.com/jhao104/proxy_pool)
 10 | 
 11 |     ______                        ______             _
 12 |     | ___ \_                      | ___ \           | |
 13 |     | |_/ / \__ __   __  _ __   _ | |_/ /___   ___  | |
 14 |     |  __/|  _// _ \ \ \/ /| | | ||  __// _ \ / _ \ | |
 15 |     | |   | | | (_) | >  < \ |_| || |  | (_) | (_) || |___
 16 |     \_|   |_|  \___/ /_/\_\ \__  |\_|   \___/ \___/ \_____\
 17 |                            __ / /
 18 |                           /___ /
 19 | 
 20 | ##### [介绍文档](https://github.com/jhao104/proxy_pool/blob/master/doc/introduce.md)
 21 | 
 22 | * 支持版本: ![](https://img.shields.io/badge/Python-2.x-green.svg) ![](https://img.shields.io/badge/Python-3.x-blue.svg)
 23 | 
 24 | * 测试地址: http://118.24.52.95 (单机勿压, 感谢。 恶意访问关[小黑屋](https://github.com/jhao104/proxy_pool/blob/bff423dffe6e2881ee45d5b66d8a6ad682c8e4ab/doc/block_ips.md)哦)
 25 | 
 26 | ### 下载安装
 27 | 
 28 | * 下载源码:
 29 | 
 30 | ```shell
 31 | git clone git@github.com:jhao104/proxy_pool.git
 32 | 
 33 | 或者直接到https://github.com/jhao104/proxy_pool/releases 下载zip文件
 34 | ```
 35 | 
 36 | * 安装依赖:
 37 | 
 38 | ```shell
 39 | pip install -r requirements.txt
 40 | ```
 41 | 
 42 | * 配置Config/setting.py:
 43 | 
 44 | ```shell
 45 | # Config/setting.py 为项目配置文件
 46 | 
 47 | # 配置DB     
 48 | DATABASES = {
 49 |     "default": {
 50 |         "TYPE": "SSDB",        # 目前支持SSDB或REDIS数据库
 51 |         "HOST": "127.0.0.1",   # db host
 52 |         "PORT": 8888,          # db port，例如SSDB通常使用8888，REDIS通常默认使用6379
 53 |         "NAME": "proxy",       # 默认配置
 54 |         "PASSWORD": ""         # db password
 55 | 
 56 |     }
 57 | }
 58 | 
 59 | 
 60 | # 配置 ProxyGetter
 61 | 
 62 | PROXY_GETTER = [
 63 |     "freeProxy01",      # 这里是启用的代理抓取函数名，可在ProxyGetter/getFreeProxy.py 扩展
 64 |     "freeProxy02",
 65 |     ....
 66 | ]
 67 | 
 68 | 
 69 | # 配置 API服务
 70 | 
 71 | SERVER_API = {
 72 |     "HOST": "0.0.0.0",  # 监听ip, 0.0.0.0 监听所有IP
 73 |     "PORT": 5010        # 监听端口
 74 | }
 75 |        
 76 | # 上面配置启动后，代理池访问地址为 http://127.0.0.1:5010
 77 | 
 78 | ```
 79 | 
 80 | * 启动:
 81 | 
 82 | ```shell
 83 | # 如果你的依赖已经安装完成并且具备运行条件,可以在cli目录下通过ProxyPool.py启。动
 84 | # 程序分为: schedule 调度程序 和 webserver Api服务
 85 | 
 86 | # 首先启动调度程序
 87 | >>>python proxyPool.py schedule
 88 | 
 89 | # 然后启动webApi服务
 90 | >>>python proxyPool.py webserver
 91 | 
 92 | 
 93 | ```
 94 | 
 95 | ### Docker
 96 | 
 97 | ```bash
 98 | docker pull jhao104/proxy_pool
 99 | 
100 | # 远程数据库
101 | docker run --env db_type=REDIS --env db_host=x.x.x.x --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
102 | 
103 | # 宿主机上的数据库
104 | docker run --env db_type=REDIS --env db_host=host.docker.internal --env db_port=6379 --env db_password=pwd_str -p 5010:5010 jhao104/proxy_pool
105 | 
106 | ```
107 | 
108 | 
109 | ### 使用
110 | 
111 | 　　启动过几分钟后就能看到抓取到的代理IP，你可以直接到数据库中查看，推荐一个[SSDB可视化工具](https://github.com/jhao104/SSDBAdmin)。
112 | 
113 | 　　也可以通过api访问http://127.0.0.1:5010 查看。
114 | 
115 | * Api
116 | 
117 | | api | method | Description | arg|
118 | | ----| ---- | ---- | ----|
119 | | / | GET | api介绍 | None |
120 | | /get | GET | 随机获取一个代理 | None|
121 | | /get_all | GET | 获取所有代理 |None|
122 | | /get_status | GET | 查看代理数量 |None|
123 | | /delete | GET | 删除代理  |proxy=host:ip|
124 | 
125 | * 爬虫使用
126 | 
127 | 　　如果要在爬虫代码中使用的话， 可以将此api封装成函数直接使用，例如：
128 | 
129 | ```python
130 | import requests
131 | 
132 | def get_proxy():
133 |     return requests.get("http://127.0.0.1:5010/get/").json()
134 | 
135 | def delete_proxy(proxy):
136 |     requests.get("http://127.0.0.1:5010/delete/?proxy={}".format(proxy))
137 | 
138 | # your spider code
139 | 
140 | def getHtml():
141 |     # ....
142 |     retry_count = 5
143 |     proxy = get_proxy().get("proxy")
144 |     while retry_count > 0:
145 |         try:
146 |             html = requests.get('http://www.example.com', proxies={"http": "http://{}".format(proxy)})
147 |             # 使用代理访问
148 |             return html
149 |         except Exception:
150 |             retry_count -= 1
151 |     # 出错5次, 删除代理池中代理
152 |     delete_proxy(proxy)
153 |     return None
154 | ```
155 | 
156 | ### 扩展代理
157 | 
158 | 　　项目默认包含几个免费的代理获取方法，但是免费的毕竟质量不好，所以如果直接运行可能拿到的代理质量不理想。所以，提供了代理获取的扩展方法。
159 | 
160 | 　　添加一个新的代理获取方法如下:
161 | 
162 | * 1、首先在[GetFreeProxy](https://github.com/jhao104/proxy_pool/blob/b9ccdfaada51b57cfb1bbd0c01d4258971bc8352/ProxyGetter/getFreeProxy.py#L32)类中添加你的获取代理的静态方法，
163 | 该方法需要以生成器(yield)形式返回`host:ip`格式的代理，例如:
164 | 
165 | ```python
166 | 
167 | class GetFreeProxy(object):
168 |     # ....
169 | 
170 |     # 你自己的方法
171 |     @staticmethod
172 |     def freeProxyCustom():  # 命名不和已有重复即可
173 | 
174 |         # 通过某网站或者某接口或某数据库获取代理 任意你喜欢的姿势都行
175 |         # 假设你拿到了一个代理列表
176 |         proxies = ["139.129.166.68:3128", "139.129.166.61:3128", ...]
177 |         for proxy in proxies:
178 |             yield proxy
179 |         # 确保每个proxy都是 host:ip正确的格式就行
180 | ```
181 | 
182 | * 2、添加好方法后，修改Config/setting.py文件中的`PROXY_GETTER`项：
183 | 
184 | 　　在`PROXY_GETTER`下添加自定义的方法的名字:
185 | 
186 | ```shell
187 | PROXY_GETTER = [
188 |     "freeProxy01",    
189 |     "freeProxy02",
190 |     ....
191 |     "freeProxyCustom"  #  # 确保名字和你添加方法名字一致
192 | ]
193 | ```
194 | 
195 | 
196 | 　　`ProxySchedule`会每隔一段时间抓取一次代理，下次抓取时会自动识别调用你定义的方法。
197 | 
198 | ### 代理采集
199 | 
200 |    目前实现的采集免费代理网站有(排名不分先后, 下面仅是对其发布的免费代理情况, 付费代理测评可以参考[这里](https://zhuanlan.zhihu.com/p/33576641)): 
201 |    
202 |   | 厂商名称 |  状态  |  更新速度 |  可用率  |  是否被墙  |  地址 |
203 |   | -----   |  ---- | --------  | ------ | --------- | ----- |
204 |   | 无忧代理 |  可用  | 几分钟一次 |   *     |  否       | [地址](http://www.data5u.com/free/index.html) |
205 |   | 66代理   | 可用  | 更新很慢   |   *     |  否      | [地址](http://www.66ip.cn/) |
206 |   | 西刺代理 | 可用   | 几分钟一次 |   *     | 否       | [地址](http://www.xicidaili.com)|
207 |   | 全网代理 |  可用  | 几分钟一次 |   *     |  否      | [地址](http://www.goubanjia.com/)|
208 |   | 训代理 |  已关闭免费代理  | * |   *     |  否      | [地址](http://www.xdaili.cn/)|
209 |   | 快代理 |  可用  |几分钟一次|   *     |  否      | [地址](https://www.kuaidaili.com/)|
210 |   | 云代理 |  可用  |几分钟一次|   *     |  否      | [地址](http://www.ip3366.net/)|
211 |   | IP海 |  可用  |几小时一次|   *     |  否      | [地址](http://www.iphai.com/)|
212 |   | 免费IP代理库 |  可用  |快|   *     |  否      | [地址](http://ip.jiangxianli.com/)|
213 |   | 中国IP地址 |  可用  |几分钟一次|   *     |  是      | [地址](http://cn-proxy.com/)|
214 |   | Proxy List |  可用  |几分钟一次|   *     |  是      | [地址](https://proxy-list.org/chinese/index.php)|
215 |   | ProxyList+ |  可用  |几分钟一次|   *     |  是      | [地址](https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1)|
216 |   
217 |   如果还有其他好的免费代理网站, 可以在提交在[issues](https://github.com/jhao104/proxy_pool/issues/71), 下次更新时会考虑在项目中支持。
218 | 
219 | ### 问题反馈
220 | 
221 | 　　任何问题欢迎在[Issues](https://github.com/jhao104/proxy_pool/issues) 中反馈，如果没有账号可以去 我的[博客](http://www.spiderpy.cn/blog/message)中留言。
222 | 
223 | 　　你的反馈会让此项目变得更加完美。
224 | 
225 | ### 贡献代码
226 | 
227 | 　　本项目仅作为基本的通用的代理池架构，不接收特有功能(当然,不限于特别好的idea)。
228 | 
229 | 　　本项目依然不够完善，如果发现bug或有新的功能添加，请在[Issues](https://github.com/jhao104/proxy_pool/issues)中提交bug(或新功能)描述，在确认后提交你的代码。
230 | 
231 | 　　这里感谢以下contributor的无私奉献：
232 | 
233 | 　　[@kangnwh](https://github.com/kangnwh)| [@bobobo80](https://github.com/bobobo80)| [@halleywj](https://github.com/halleywj)| [@newlyedward](https://github.com/newlyedward)| [@wang-ye](https://github.com/wang-ye)| [@gladmo](https://github.com/gladmo)| [@bernieyangmh](https://github.com/bernieyangmh)| [@PythonYXY](https://github.com/PythonYXY)| [@zuijiawoniu](https://github.com/zuijiawoniu)| [@netAir](https://github.com/netAir)| [@scil](https://github.com/scil)| [@tangrela](https://github.com/tangrela)| [@highroom](https://github.com/highroom)| [@luocaodan](https://github.com/luocaodan)| [@vc5](https://github.com/vc5)| [@1again](https://github.com/1again)| [@obaiyan](https://github.com/obaiyan)
234 | 
235 | 
236 | ### Release Notes
237 | 
238 |    [release notes](https://github.com/jhao104/proxy_pool/blob/master/doc/release_notes.md)
239 | 
240 | 


--------------------------------------------------------------------------------
/proxy_pool/ProxyGetter/getFreeProxy.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # !/usr/bin/env python
  3 | """
  4 | -------------------------------------------------
  5 |    File Name：     GetFreeProxy.py
  6 |    Description :  抓取免费代理
  7 |    Author :       JHao
  8 |    date：          2016/11/25
  9 | -------------------------------------------------
 10 |    Change Activity:
 11 |                    2016/11/25:
 12 | -------------------------------------------------
 13 | """
 14 | import re
 15 | import sys
 16 | import requests
 17 | from time import sleep
 18 | 
 19 | sys.path.append('..')
 20 | 
 21 | from Util.WebRequest import WebRequest
 22 | from Util.utilFunction import getHtmlTree
 23 | 
 24 | # for debug to disable insecureWarning
 25 | requests.packages.urllib3.disable_warnings()
 26 | 
 27 | 
 28 | class GetFreeProxy(object):
 29 |     """
 30 |     proxy getter
 31 |     """
 32 | 
 33 |     @staticmethod
 34 |     def freeProxy01():
 35 |         """
 36 |         无忧代理 http://www.data5u.com/
 37 |         几乎没有能用的
 38 |         :return:
 39 |         """
 40 |         url_list = [
 41 |             'http://www.data5u.com/',
 42 |             'http://www.data5u.com/free/gngn/index.shtml',
 43 |             'http://www.data5u.com/free/gnpt/index.shtml'
 44 |         ]
 45 |         key = 'ABCDEFGHIZ'
 46 |         for url in url_list:
 47 |             html_tree = getHtmlTree(url)
 48 |             ul_list = html_tree.xpath('//ul[@class="l2"]')
 49 |             for ul in ul_list:
 50 |                 try:
 51 |                     ip = ul.xpath('./span[1]/li/text()')[0]
 52 |                     classnames = ul.xpath('./span[2]/li/attribute::class')[0]
 53 |                     classname = classnames.split(' ')[1]
 54 |                     port_sum = 0
 55 |                     for c in classname:
 56 |                         port_sum *= 10
 57 |                         port_sum += key.index(c)
 58 |                     port = port_sum >> 3
 59 |                     yield '{}:{}'.format(ip, port)
 60 |                 except Exception as e:
 61 |                     print(e)
 62 | 
 63 |     @staticmethod
 64 |     def freeProxy02(count=20):
 65 |         """
 66 |         代理66 http://www.66ip.cn/
 67 |         :param count: 提取数量
 68 |         :return:
 69 |         """
 70 |         urls = [
 71 |             "http://www.66ip.cn/mo.php?sxb=&tqsl={}&port=&export=&ktip=&sxa=&submit=%CC%E1++%C8%A1&textarea=",
 72 |             "http://www.66ip.cn/nmtq.php?getnum={}&isp=0&anonymoustype=0&s"
 73 |             "tart=&ports=&export=&ipaddress=&area=0&proxytype=2&api=66ip"
 74 |         ]
 75 | 
 76 |         try:
 77 |             import execjs
 78 |             import requests
 79 | 
 80 |             headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0',
 81 |                        'Accept': '*/*',
 82 |                        'Connection': 'keep-alive',
 83 |                        'Accept-Language': 'zh-CN,zh;q=0.8'}
 84 |             session = requests.session()
 85 |             src = session.get("http://www.66ip.cn/", headers=headers).text
 86 |             src = src.split("</script>")[0] + '}'
 87 |             src = src.replace("<script>", "function test() {")
 88 |             src = src.replace("while(z++)try{eval(", ';var num=10;while(z++)try{var tmp=')
 89 |             src = src.replace(");break}", ";num--;if(tmp.search('cookie') != -1 | num<0){return tmp}}")
 90 |             ctx = execjs.compile(src)
 91 |             src = ctx.call("test")
 92 |             src = src[src.find("document.cookie="): src.find("};if((")]
 93 |             src = src.replace("document.cookie=", "")
 94 |             src = "function test() {var window={}; return %s }" % src
 95 |             cookie = execjs.compile(src).call('test')
 96 |             js_cookie = cookie.split(";")[0].split("=")[-1]
 97 |         except Exception as e:
 98 |             print(e)
 99 |             return
100 | 
101 |         for url in urls:
102 |             try:
103 |                 html = session.get(url.format(count), cookies={"__jsl_clearance": js_cookie}, headers=headers).text
104 |                 ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}", html)
105 |                 for ip in ips:
106 |                     yield ip.strip()
107 |             except Exception as e:
108 |                 print(e)
109 |                 pass
110 | 
111 |     @staticmethod
112 |     def freeProxy03(page_count=1):
113 |         """
114 |         西刺代理 http://www.xicidaili.com
115 |         :return:
116 |         """
117 |         url_list = [
118 |             'http://www.xicidaili.com/nn/',  # 高匿
119 |             'http://www.xicidaili.com/nt/',  # 透明
120 |         ]
121 |         for each_url in url_list:
122 |             for i in range(1, page_count + 1):
123 |                 page_url = each_url + str(i)
124 |                 tree = getHtmlTree(page_url)
125 |                 proxy_list = tree.xpath('.//table[@id="ip_list"]//tr[position()>1]')
126 |                 for proxy in proxy_list:
127 |                     try:
128 |                         yield ':'.join(proxy.xpath('./td/text()')[0:2])
129 |                     except Exception as e:
130 |                         pass
131 | 
132 |     @staticmethod
133 |     def freeProxy04():
134 |         """
135 |         guobanjia http://www.goubanjia.com/
136 |         :return:
137 |         """
138 |         url = "http://www.goubanjia.com/"
139 |         tree = getHtmlTree(url)
140 |         proxy_list = tree.xpath('//td[@class="ip"]')
141 |         # 此网站有隐藏的数字干扰，或抓取到多余的数字或.符号
142 |         # 需要过滤掉<p style="display:none;">的内容
143 |         xpath_str = """.//*[not(contains(@style, 'display: none'))
144 |                                         and not(contains(@style, 'display:none'))
145 |                                         and not(contains(@class, 'port'))
146 |                                         ]/text()
147 |                                 """
148 |         for each_proxy in proxy_list:
149 |             try:
150 |                 # :符号裸放在td下，其他放在div span p中，先分割找出ip，再找port
151 |                 ip_addr = ''.join(each_proxy.xpath(xpath_str))
152 | 
153 |                 # HTML中的port是随机数，真正的端口编码在class后面的字母中。
154 |                 # 比如这个：
155 |                 # <span class="port CFACE">9054</span>
156 |                 # CFACE解码后对应的是3128。
157 |                 port = 0
158 |                 for _ in each_proxy.xpath(".//span[contains(@class, 'port')]"
159 |                                           "/attribute::class")[0]. \
160 |                         replace("port ", ""):
161 |                     port *= 10
162 |                     port += (ord(_) - ord('A'))
163 |                 port /= 8
164 | 
165 |                 yield '{}:{}'.format(ip_addr, int(port))
166 |             except Exception as e:
167 |                 pass
168 | 
169 |     @staticmethod
170 |     def freeProxy05():
171 |         """
172 |         快代理 https://www.kuaidaili.com
173 |         """
174 |         url_list = [
175 |             'https://www.kuaidaili.com/free/inha/',
176 |             'https://www.kuaidaili.com/free/intr/'
177 |         ]
178 |         for url in url_list:
179 |             tree = getHtmlTree(url)
180 |             proxy_list = tree.xpath('.//table//tr')
181 |             sleep(1)  # 必须sleep 不然第二条请求不到数据
182 |             for tr in proxy_list[1:]:
183 |                 yield ':'.join(tr.xpath('./td/text()')[0:2])
184 | 
185 |     @staticmethod
186 |     def freeProxy06():
187 |         """
188 |         码农代理 https://proxy.coderbusy.com/
189 |         :return:
190 |         """
191 |         urls = ['https://proxy.coderbusy.com/']
192 |         for url in urls:
193 |             tree = getHtmlTree(url)
194 |             proxy_list = tree.xpath('.//table//tr')
195 |             for tr in proxy_list[1:]:
196 |                 yield ':'.join(tr.xpath('./td/text()')[0:2])
197 | 
198 |     @staticmethod
199 |     def freeProxy07():
200 |         """
201 |         云代理 http://www.ip3366.net/free/
202 |         :return:
203 |         """
204 |         urls = ['http://www.ip3366.net/free/?stype=1',
205 |                 "http://www.ip3366.net/free/?stype=2"]
206 |         request = WebRequest()
207 |         for url in urls:
208 |             r = request.get(url, timeout=10)
209 |             proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
210 |             for proxy in proxies:
211 |                 yield ":".join(proxy)
212 | 
213 |     @staticmethod
214 |     def freeProxy08():
215 |         """
216 |         IP海 http://www.iphai.com/free/ng
217 |         :return:
218 |         """
219 |         urls = [
220 |             'http://www.iphai.com/free/ng',
221 |             'http://www.iphai.com/free/np',
222 |             'http://www.iphai.com/free/wg',
223 |             'http://www.iphai.com/free/wp'
224 |         ]
225 |         request = WebRequest()
226 |         for url in urls:
227 |             r = request.get(url, timeout=10)
228 |             proxies = re.findall(r'<td>\s*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s*?</td>[\s\S]*?<td>\s*?(\d+)\s*?</td>',
229 |                                  r.text)
230 |             for proxy in proxies:
231 |                 yield ":".join(proxy)
232 | 
233 |     @staticmethod
234 |     def freeProxy09(page_count=1):
235 |         """
236 |         http://ip.jiangxianli.com/?page=
237 |         免费代理库
238 |         :return:
239 |         """
240 |         for i in range(1, page_count + 1):
241 |             url = 'http://ip.jiangxianli.com/?country=中国&?page={}'.format(i)
242 |             html_tree = getHtmlTree(url)
243 |             for index, tr in enumerate(html_tree.xpath("//table//tr")):
244 |                 if index == 0:
245 |                     continue
246 |                 yield ":".join(tr.xpath("./td/text()")[0:2]).strip()
247 | 
248 |     # @staticmethod
249 |     # def freeProxy10():
250 |     #     """
251 |     #     墙外网站 cn-proxy
252 |     #     :return:
253 |     #     """
254 |     #     urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218']
255 |     #     request = WebRequest()
256 |     #     for url in urls:
257 |     #         r = request.get(url, timeout=10)
258 |     #         proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.text)
259 |     #         for proxy in proxies:
260 |     #             yield ':'.join(proxy)
261 | 
262 |     # @staticmethod
263 |     # def freeProxy11():
264 |     #     """
265 |     #     https://proxy-list.org/english/index.php
266 |     #     :return:
267 |     #     """
268 |     #     urls = ['https://proxy-list.org/english/index.php?p=%s' % n for n in range(1, 10)]
269 |     #     request = WebRequest()
270 |     #     import base64
271 |     #     for url in urls:
272 |     #         r = request.get(url, timeout=10)
273 |     #         proxies = re.findall(r"Proxy\('(.*?)'\)", r.text)
274 |     #         for proxy in proxies:
275 |     #             yield base64.b64decode(proxy).decode()
276 | 
277 |     # @staticmethod
278 |     # def freeProxy12():
279 |     #     urls = ['https://list.proxylistplus.com/Fresh-HTTP-Proxy-List-1']
280 |     #     request = WebRequest()
281 |     #     for url in urls:
282 |     #         r = request.get(url, timeout=10)
283 |     #         proxies = re.findall(r'<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td>(\d+)</td>', r.text)
284 |     #         for proxy in proxies:
285 |     #             yield ':'.join(proxy)
286 | 
287 |     @staticmethod
288 |     def freeProxy13(max_page=2):
289 |         """
290 |         http://www.qydaili.com/free/?action=china&page=1
291 |         齐云代理
292 |         :param max_page:
293 |         :return:
294 |         """
295 |         base_url = 'http://www.qydaili.com/free/?action=china&page='
296 |         request = WebRequest()
297 |         for page in range(1, max_page + 1):
298 |             url = base_url + str(page)
299 |             r = request.get(url, timeout=10)
300 |             proxies = re.findall(
301 |                 r'<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\s\S]*?<td.*?>(\d+)</td>',
302 |                 r.text)
303 |             for proxy in proxies:
304 |                 yield ':'.join(proxy)
305 | 
306 |     @staticmethod
307 |     def freeProxy14(max_page=2):
308 |         """
309 |         http://www.89ip.cn/index.html
310 |         89免费代理
311 |         :param max_page:
312 |         :return:
313 |         """
314 |         base_url = 'http://www.89ip.cn/index_{}.html'
315 |         request = WebRequest()
316 |         for page in range(1, max_page + 1):
317 |             url = base_url.format(page)
318 |             r = request.get(url, timeout=10)
319 |             proxies = re.findall(
320 |                 r'<td.*?>[\s\S]*?(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})[\s\S]*?</td>[\s\S]*?<td.*?>[\s\S]*?(\d+)[\s\S]*?</td>',
321 |                 r.text)
322 |             for proxy in proxies:
323 |                 yield ':'.join(proxy)
324 | 
325 |     @staticmethod
326 |     def freeProxy15():
327 |         urls = ['http://www.xiladaili.com/putong/',
328 |                 "http://www.xiladaili.com/gaoni/",
329 |                 "http://www.xiladaili.com/http/",
330 |                 "http://www.xiladaili.com/https/"]
331 |         request = WebRequest()
332 |         for url in urls:
333 |             r = request.get(url, timeout=10)
334 |             ips = re.findall(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{1,5}", r.text)
335 |             for ip in ips:
336 |                 yield ip.strip()
337 | 
338 | 
339 | if __name__ == '__main__':
340 |     from CheckProxy import CheckProxy
341 | 
342 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy01)
343 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy02)
344 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy03)
345 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy04)
346 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy05)
347 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy06)
348 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy07)
349 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy08)
350 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy09)
351 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy13)
352 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy14)
353 |     # CheckProxy.checkGetProxyFunc(GetFreeProxy.freeProxy15)
354 | 
355 |     CheckProxy.checkAllGetProxyFunc()
356 | 


--------------------------------------------------------------------------------
/kuaishou/config/ua_mobile.txt:
--------------------------------------------------------------------------------
  1 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MX4 Pro Build/KTU84P) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025469 Mobile Safari/533.1 MicroMessenger/6.2.0.52_r1162382.561 NetType/WIFI Language/zh_CN QQ/6.6.0.2935"
  2 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; Mi Note 2 Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
  3 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-cn; HUAWEI NXT-AL10 Build/HUAWEINXT-AL10) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
  4 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; M031 Build/JRO03H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
  5 | "Mozilla/5.0 (iPhone 6p; CPU iPhone OS 9_2_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 MQQBrowser/7.1.1 Mobile/13D15 Safari/8536.25 MttCustomUA/2"
  6 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-CN; R7Plusm Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
  7 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-CN; M351 Build/KTU84P) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/9.9.5.489 U3/0.8.0 Mobile Safari/533.1"
  8 | "Mozilla/5.0 (Linux; Android 6.0.1) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 V1_AND_SQ_6.6.0_432_YYB_D QQ/6.6.0.2935 NetType/WIFI WebP/0.3.0 Pixel/1080"
  9 | "Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
 10 | "Mozilla/5.0 (Linux; U; Android 5.0; zh-cn; SM-N9008S Build/LRX21V) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36""Mozilla/5.0 (Linux; U; Android 6.0.1; zh-tw; MI NOTE LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
 11 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-CN; M040 Build/JRO03H) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/9.4.1.362 U3/0.8.0 Mobile Safari/533.1"
 12 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; CAM-AL00 Build/HONORCAM-AL00) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
 13 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; SM-G9300 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 14 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; Redmi Note 3 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.2.10"
 15 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_6 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B651 Safari/9537.53"
 16 | "Mozilla/5.0 (Linux; Android 6.0; MI 5 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.32.960 NetType/WIFI Language/zh_CN"
 17 | "Mozilla/5.0 (Linux; Android 4.4.2; Che2-TL00 Build/HonorChe2-TL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/30.0.0.0 Mobile Safari/537.36 baidubrowser/4.0.18.7 (Baidu; P1 4.4.2)"
 18 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36 baiduboxapp/0_11.0.1.8_enohpi_6311_046/5.3.9_1C2%258enohPi/1099a/C9862C9728FE44B7E7F67206DD31DF71E464A2D3DFRMAGIFCCE/1"
 19 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/6.0 MQQBrowser/6.3 Mobile/13F69 Safari/8536.25"
 20 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; M351 Build/KTU84P) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/6.0 (Baidu; P1 4.4.4)"
 21 | "Mozilla/5.0 (Linux; U; Android 5.0; zh-cn; vivo X5Pro D Build/LRX21M) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
 22 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 MQQBrowser/7.1.1 Mobile/13G35 Safari/8536.25 MttCustomUA/2"
 23 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B72 search%2F1.0 baiduboxapp/0_0.0.1.7_enohpi_4331_057/1.01_2C2%257enohPi/1099a/6C098F1CCE0764F9FA70F99DA9974B9B200A469E0FCHCTFCNPL/1"
 24 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI NXT-AL10 Build/HUAWEINXT-AL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
 25 | "Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn; U8800 Build/HuaweiU8800) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
 26 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 8_1_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.5 Mobile/12B435 Safari/8536.25"
 27 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; Mi-4c Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
 28 | "Mozilla/5.0 (Linux; U; Android 5.0.2; zh-CN; ZTE A2015 Build/LRX22G) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.5.2.598 U3/0.8.0 Mobile Safari/534.30"
 29 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53"
 30 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B72 rabbit/1.0 baiduboxapp/0_0.0.1.7_enohpi_4331_057/1.01_2C2%7enohPi/1099a/6C098F1CCE0764F9FA70F99DA9974B9B200A469E0FCHCTFCNPL/1"
 31 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-cn; MI 5 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.5.8"
 32 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; SM-C7000 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 33 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12B411 MicroMessenger/6.0 NetType/WIFI"
 34 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/31.0.1650.18 Mobile/11B554a Safari/8536.25"
 35 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; 2013022 Build/HM2013022) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 XiaoMi/MiuiBrowser/1.0"
 36 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; H2 Build/KTU84P) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baidubrowser/5.6.4.7 (Baidu; P1 4.4.4)"
 37 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4"
 38 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14C92 Safari/8536.25 MttCustomUA/2"
 39 | "Mozilla/5.0 (Linux; U; Android 5.1; zh-cn; m1 metal Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
 40 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11B554a"
 41 | "Mozilla/5.0 (Linux; Android 4.4.4; 3007 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036872 Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
 42 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 MicroMessenger/6.5.1 NetType/4G Language/zh_CN"
 43 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Mobile/14C92 baiduboxapp/0_9.1.0.8_enohpi_8022_2421/2.01_2C2%259enohPi/1099a/7BFBC8133E01585727F0D0DAEA85ECB05BBDA3239FCLBEQODGS/1"
 44 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.1.01_2C2%258enohPi/1099a/B48CB688D97A000A4EAF4B07020F7C58749C7A3BBFCRLQHBLAK/1"
 45 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G35 baiduboxapp/0_11.0.1.8_enohpi_4331_057/4.3.9_2C2%257enohPi/1099a/59F82D9544E5148BEABAEC021D139750A60B19447FRDBKOKIPL/1"
 46 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_0_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13A452 MicroMessenger/6.5.2 NetType/WIFI Language/zh_HK"
 47 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.1.4"
 48 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A403 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.0.01_1C2%257enohPi/1099a/335E9AB11CE04E10F6A72096F5FDBB5A7636DC06AFCBDJDKRDF/1"
 49 | "Mozilla/5.0 (Linux; Android 4.2.1; M040 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36"
 50 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13B143 baiduboxapp/0_9.1.0.8_enohpi_4331_057/1.9_2C2%257enohPi/1099a/756767DF0D52EA9AC7FAD5F6CB8569A51169AD34FFRQMQOMOPL/1"
 51 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; MI NOTE Pro Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
 52 | "Mozilla/5.0 (Linux; U; Android 5.0.2; zh-cn; Redmi Note 3 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
 53 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTE Build/KTU84P) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/6.5 (Baidu; P1 4.4.4)"
 54 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; Le X620 Build/HEXCNFN5801708221S) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
 55 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; OPPO R9s Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 56 | "Mozilla/5.0 (Linux; U; Android 5.1; zh-CN; MX5 Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.0.880 Mobile Safari/537.36"
 57 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; HUAWEI NXT-AL10 Build/HUAWEINXT-AL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 58 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; M040 Build/JRO03H) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/4.1 Mobile Safari/533.1"
 59 | "Mozilla/5.0 (Linux; Android 5.1; OPPO R9m Build/LMY47I) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 5.1)"
 60 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; MI 5s Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 61 | "Mozilla/5.0 (Linux; U; Android 5.1; zh-CN; P01 Build/LMY47D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.0.4.846 U3/0.8.0 Mobile Safari/534.30"
 62 | "Mozilla/5.0 (iPhone 6sp; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/6.0 MQQBrowser/6.9.1 Mobile/14B100 Safari/8536.25 MttCustomUA/2"
 63 | "Mozilla/5.0 (Linux; Android 6.0.1; SM-A9100 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.31.940 NetType/cmnet Language/zh_CN"
 64 | "Mozilla/5.0 (Linux; Android 6.0.1; SM-A9000 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/49.0.2623.105 Mobile Safari/537.36 rabbit/1.0 baiduboxapp/7.1 (Baidu; P1 6.0.1)"
 65 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_0_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13A452 MicroMessenger/6.5.1 NetType/4G Language/zh_HK"
 66 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12A405 baiduboxapp/0_0.0.6.5_enohpi_6311_046/2.0.8_4C2%255enohPi/1099a/0E12BC204E06E175FD283E21BFE1661EE0A20B6CAFNTCGOKCPB/1"
 67 | "Mozilla/5.0 (Linux; Android 4.4.2; HUAWEI MT7-TL10 Build/HuaweiMT7-TL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.1 baiduboxapp/8.0 (Baidu; P1 4.4.2)"
 68 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTETD Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36 XiaoMi/MiuiBrowser/2.0.1"
 69 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI CRR-UL00 Build/HUAWEICRR-UL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3 rabbit/1.0 baiduboxapp/7.3.1 (Baidu; P1 6.0)"
 70 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.1.01_2C2%259enohPi/1099a/9986F0DD6A3045728A5362A8D3F0494565FA71F98OCTFNGQADF/1"
 71 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1 Mobile/14B100 Safari/8536.25 MttCustomUA/2"
 72 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_4331_057/1.1.01_2C2%257enohPi/1099a/8FE439171945D5357F343E920EC24A8EC36B102F6ORTOHTRBNQ/1"
 73 | "Mozilla/5.0 (iPhone 6p; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14B100 Safari/8536.25 MttCustomUA/2"
 74 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 4LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
 75 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; 2014812 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
 76 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; MI 5 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.2.10"
 77 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12F70 baiduboxapp/0_0.0.5.6_enohpi_6311_046/3.8_4C2%255enohPi/1099a/0E12BC204E06E175FD283E21BFE1661EE0A20B6CAFNTCGOKCPB/1"
 78 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; VIE-AL10 Build/HUAWEIVIE-AL10) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
 79 | Opera/9.80 (Android 4.0.3; Linux; Opera Mobi/ADR-1210241511) Presto/2.11.355 Version/12.10
 80 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 7_0_6 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.0.5 Mobile/11B651 Safari/8536.25"
 81 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-CN; HM NOTE 1LTE Build/KTU84P) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.2.0.535 U3/0.8.0 Mobile Safari/534.30"
 82 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; MI 3W Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 83 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-CN; Coolpad 8297W Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
 84 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_4331_057/1.1.01_1C2%258enohPi/1099a/7120825E4587BDA5841C4BCB46FD17F28960A9B7CORMBPONISH/1"
 85 | "MQQBrowser/3.7/Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn; U8800 Build/HuaweiU8800) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"
 86 | "Mozilla/5.0 (Linux; U; Android 7.0; zh-CN; HUAWEI NXT-DL00 Build/HUAWEINXT-DL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.8.885 Mobile Safari/537.36"
 87 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; X9180 Build/KVT49L) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025411 Mobile Safari/533.1 MicroMessenger/6.1.0.66_r1062275.542 NetType/WIFI"
 88 | "Mozilla/5.0 (iPhone 6p; CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14A456 Safari/8536.25 MttCustomUA/2"
 89 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; BLN-AL10 Build/HONORBLN-AL10) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/6.0 Mobile Safari/537.36"
 90 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.7 Mobile/12B466 Safari/8536.25"
 91 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; Redmi Note 3 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
 92 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 5 Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
 93 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 MQQBrowser/7.1.1 Mobile/12B440 Safari/8536.25 MttCustomUA/2"
 94 | "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3) AppleWebKit/534.31 (KHTML, like Gecko) Chrome/17.0.558.0 Safari/534.31 UCBrowser/2.3.1.257"
 95 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI MT7-TL10 Build/HuaweiMT7-TL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/6.3 baiduboxapp/7.2 (Baidu; P1 6.0)"
 96 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.1.01_1C2%257enohPi/1099a/752A993C0AEC43B6407895135DB16B54CF97E7CA0OCQHDDTBHR/1"
 97 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 5 Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
 98 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A456 MicroMessenger/6.5.1 NetType/WIFI Language/zh_CN"
 99 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13C75 search%2F1.0 baiduboxapp/0_2.0.4.7_enohpi_4331_057/2.9_2C2%257enohPi/1099a/8B57FF7B17EC30BCA1645DDEE6A9FA55F2533EC77FRRTATLJPT/1"
100 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11B554a Safari/7534.48.3"
101 | "Mozilla/5.0 (Linux; Android 6.0; FRD-AL00 Build/HUAWEIFRD-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
102 | "Mozilla/5.0 (Linux; Android 5.1; HUAWEI RIO-UL00 Build/HUAWEIRIO-UL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36 baiduboxapp/5.0 (Baidu; P1 5.1)"
103 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baidubrowser/4.1.3.1 (Baidu; P1 4.2.1)"
104 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 baidubrowser/4.2.9.2 (Baidu; P1 4.2.1)"
105 | MQQBrowser/3.5/Adr (Linux; U; 4.0.3; zh-cn; M9 Build/Flyme 1.0.1;640*960)
106 | "Mozilla/5.0 (iPad; U; CPU OS 8 like Mac OS X; zh-CN; iPad2,1) AppleWebKit/534.46 (KHTML, like Gecko) UCBrowser/2.7.0.448 U3/ Mobile/10A403 Safari/7543.48.3"
107 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; 2014011 Build/HM2014011) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
108 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; Redmi Note 3 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
109 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_1002_5211/1.1.01_2C2%259enohPi/1099a/F43FDC43F8C35D60A7CA4FCB5C46D81F2B32273D4OCIKSQKFGF/1"
110 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4"
111 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 5s Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
112 | "Mozilla/5.0 (Linux; U; Android 4.2.2; zh-cn; Q3杩蜂綘鐗?Build/JDQ39) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/5.3.5 (Baidu; P1 4.2.2)"
113 | "Mozilla/5.0 (iPod; CPU iPhone OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Mobile/10A523"
114 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A456 MicroMessenger/6.3.31 NetType/WIFI Language/zh_CN"
115 | "Mozilla/5.0 (Linux; Android 4.4.4; R8207 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.32.960 NetType/WIFI Language/zh_CN"
116 | "Mozilla/5.0 (Linux; U; Android 4.0.4; zh-cn; HTC S720e Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
117 | "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523 Safari/8536.25"
118 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A456 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/2.0.01_2C2%258enohPi/1099a/88D9A3B372D39F0C64AA8F4E99D056B33FF34092COCCJFJKBJL/1"
119 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; Redmi Note 4 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
120 | MQQBrowser/3.7/Adr (Linux; U; 4.0.3; zh-cn; M9 Build/Flyme 1.0.1;640*960)
121 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A403 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.0.01_1C2%257enohPi/1099a/18AD075E3CBD0462C9617711F06F0DC478A60A43CORGEOCQSTA/1"
122 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-CN; HM NOTE 1LTETD Build/KTU84P) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/9.9.7.500 U3/0.8.0 Mobile Safari/534.30"
123 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36 search%2F1.0 baiduboxapp/0_0.0.3.7_enohpi_8022_2421/5.3.9_1C2%257enohPi/1099a/9505E9159CC43BA967F76982E26CFC3489DF3D7E9ORDOACSBGC/1"
124 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; M040 Build/JRO03H) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baidubrowser/4.2.4.0 (Baidu; P1 4.1.1)"
125 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI GRA-TL00 Build/HUAWEIGRA-TL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
126 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.0 Mobile Safari/537.36"
127 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-CN; M031 Build/JRO03H) AppleWebKit/534.31 (KHTML, like Gecko) UCBrowser/8.8.3.278 U3/0.8.0 Mobile Safari/534.31"
128 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-CN; Mi-4c Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
129 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn) AppleWebKit/530.17 (KHTML, like Gecko) FlyFlow/2.2 Version/4.0 Mobile Safari/530.17"
130 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B72 MicroMessenger/6.3.31 NetType/4G Language/zh_CN"
131 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G34 search%2F1.0 baiduboxapp/0_7.0.5.7_enohpi_4331_057/3.3.9_1C2%258enohPi/1099a/591F7EA5466A5F2BD0F1944370B33E9FCF294080CORCCADISSI/1"
132 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36 MicroMessenger/6.3.31 NetType/4G Language/en"
133 | MQQBrowser/3.7/Adr (Linux; U; 2.3.5; zh-cn; U8800 Build/U8800V100R001C00B528G002;480*800)
134 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12B435 baiduboxapp/0_11.0.1.8_enohpi_6311_046/1.1.8_2C2%256enohPi/1099a/B1462749FF0C14103293010233DFF116329A79106FCFHIOKAAO/1"
135 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_1002_5211/1.1.01_1C2%257enohPi/1099a/F05ABD424D307E0FFB1E50CF5370E081E85EAA1DCOCGCMHQEJF/1"
136 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12F70 Safari/600.1.4"
137 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 4LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
138 | "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-us) AppleWebKit/533.16 (KHTML, like Gecko) Version/5.0 Safari/533.16"
139 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; M351 Build/KTU84P) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
140 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G35 QQ/6.5.3.410 V1_IPH_SQ_6.5.3_1_APP_A Pixel/750 Core/UIWebView NetType/2G Mem/117"
141 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M031 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
142 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; MI 2S Build/JRO03L) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 MicroMessenger/6.0.2.58_r984381.520 NetType/WIFI"
143 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; SCH-N719 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
144 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.8 Mobile Safari/537.36"
145 | "Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn) AppleWebKit/530.17 (KHTML, like Gecko) FlyFlow/2.2 Version/4.0 Mobile Safari/530.17"
146 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_1002_5211/1.1.01_2C2%259enohPi/1099a/D9E41401243D2BF49B3F1025023FB28BE2B1806FAFCTELCAFAM/1"
147 | "Mozilla/5.0 (Linux; Android 4.1.1; M040 Build/JRO03H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.64 Mobile Safari/537.36"
148 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; HUAWEI NXT-AL10 Build/HUAWEINXT-AL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
149 | "MQQBrowser/3.7/Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M9 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
150 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B72 rabbit%2F1.0 baiduboxapp/0_0.0.1.7_enohpi_4331_057/1.01_2C2%257enohPi/1099a/6C098F1CCE0764F9FA70F99DA9974B9B200A469E0FCHCTFCNPL/1"
151 | "Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166  Safari/535.19"
152 | "Mozilla/5.0 (Linux; Android 6.0; PRO 6 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
153 | "Mozilla/5.0 (Linux; Android 5.0.2; vivo X6Plus A Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.1 baiduboxapp/8.0 (Baidu; P1 5.0.2)"
154 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_5 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11B601 baiduboxapp/0_0.0.1.5_enohpi_6311_046/5.0.7_4C2%255enohPi/1099a/0E12BC204E06E175FD283E21BFE1661EE0A20B6CAFNTCGOKCPB/1"
155 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; PLK-TL01H Build/HONORPLK-TL01H) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/6.0 Mobile Safari/537.36"
156 | "Mozilla/5.0 (Linux; U; Android 4.0.4; zh-cn; HS-EG906 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 MicroMessenger/5.3.1.67_r745169.462"
157 | "Mozilla/5.0 (Linux; Android 4.2.1; M040 Build/JOP40D) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.117 Mobile Safari/537.36 OPR/20.0.1396.72047"
158 | "Mozilla/5.0 (Linux; Android 5.1.1; SM-J3119 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.1 rabbit/1.0 baiduboxapp/7.5.1 (Baidu; P1 5.1.1)"
159 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTE Build/KTU84P) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025410 Mobile Safari/533.1 MicroMessenger/6.1.0.40_r1018582.540 NetType/WIFI"
160 | "Mozilla/5.0 (Linux; Android 5.0.1; M355 Build/LRX22C) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 5.0.1)"
161 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; KIW-AL10 Build/HONORKIW-AL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
162 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI MAX Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
163 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B72 baiduboxapp/0_0.0.1.7_enohpi_4331_057/1.01_2C2%257enohPi/1099a/6C098F1CCE0764F9FA70F99DA9974B9B200A469E0FCHCTFCNPL/1"
164 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M9 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
165 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 MicroMessenger/6.3.31 NetType/WIFI Language/zh_CN"
166 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; HUAWEI NXT-AL10 Build/HUAWEINXT-AL10) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
167 | "Mozilla/5.0 (Linux; Android 5.0.2; PLK-AL10 Build/HONORPLK-AL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.32.960 NetType/4G Language/zh_CN"
168 | "MQQBrowser/3.5/Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M9 Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
169 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 Maxthon/4.1.3.2000"
170 | "Mozilla/5.0 (iPod; CPU iPhone OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A523 Safari/8536.25"
171 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; FRD-AL00 Build/HUAWEIFRD-AL00) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/6.0 Mobile Safari/537.36"
172 | "Mozilla/5.0 (iPad; U; CPU  OS 4_1 like Mac OS X; en-us)AppleWebKit/532.9(KHTML, like Gecko) Version/4.0.5 Mobile/8B117 Safari/6531.22.7"
173 | "MQQBrowser/4.0/Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M031 Build/IML74K) AppleWebKit/533.1 (KHTML, like Gecko) Mobile Safari/533.1"
174 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; SM701 Build/SANFRANCISCO) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025469 Mobile Safari/533.1 MicroMessenger/6.2.5.49_r7ead8bf.620 NetType/WIFI Language/zh_CN QQ/6.6.0.2935"
175 | "Mozilla/5.0 (Linux; U; Android 4.0.4; zh-cn; HTC S720e Build/IMM76D) UC AppleWebKit/534.31 (KHTML, like Gecko) Mobile Safari/534.31"
176 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-CN; HUAWEI P7-L09 Build/HuaweiP7-L09) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.8.885 Mobile Safari/537.36"
177 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; MI NOTE LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
178 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baidubrowser/4.3.16.2 (Baidu; P1 4.2.1)"
179 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; 2013022 Build/HM2013022) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Mobile Safari/537.36 XiaoMi/MiuiBrowser/2.1.1"
180 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_0_4 like Mac OS X; zh-CN) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/11B554a UCBrowser/9.3.1.339 Mobile"
181 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-CN; HUAWEI M2-A01L Build/HUAWEIM2-A01L) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
182 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; GT-N7000 Build/IML74K) AppleWebKit/533.1 (KHTML, like Gecko) Mobile MQQBrowser/4.0 Safari/533.1"
183 | "Mozilla/5.0 (Linux; Android 5.1; HUAWEI TAG-AL00 Build/HUAWEITAG-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 5.1)"
184 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12B466 MicroMessenger/6.3.25 NetType/WIFI Language/zh_CN"
185 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; CHM-TL00H Build/HonorCHM-TL00H) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.3 Mobile Safari/537.36"
186 | "Mozilla/5.0 (Linux; Android 4.0.3; M031 Build/IML74K) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
187 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; 1505-A01 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
188 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; Redmi 4 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
189 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_0_2 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Mobile/14A456 MicroMessenger/6.3.31 NetType/4G Language/zh_CN"
190 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_1002_5211/1.1.01_2C2%259enohPi/1099a/92C8BA18492BB3828CC4EAC668B57F226E53C75A0FCLKEASDJI/1"
191 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M031 Build/IML74K) AppleWebKit/530.17 (KHTML, like Gecko) FlyFlow/2.3 Version/4.0 Mobile Safari/530.17 baidubrowser/023_1.41.3.2_diordna_069_046/uzieM_51_3.0.4_130M/1200a/963E77C7DAC3FA587DF3A7798517939D%7C408994110686468/1"
192 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTETD Build/KTU84P) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.5 Mobile Safari/533.1 MicroMessenger/6.0.0.54_r849063.501 NetType/WIFI"
193 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; MI 5 Build/MRA58K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.0.0.818 U3/0.8.0 Mobile Safari/534.30"
194 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_6311_046/1.1.01_4C2%258enohPi/1099a/FA2F975DEDF8C4854AD1FA83AE17405D77A668812OCMKCLFACM/1"
195 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; HUAWEI C199 Build/HuaweiC199) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/6.9.1 (Baidu; P1 4.4.2)"
196 | "Mozilla/5.0 (Linux; U; Android 4.3; zh-CN; SCH-N719 Build/JSS15J) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/9.9.5.489 U3/0.8.0 Mobile Safari/533.1"
197 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; KNT-AL20 Build/HUAWEIKNT-AL20) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.8.885 Mobile Safari/537.36"
198 | "Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D167 Safari/9537.53"
199 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM 2A Build/KTU84Q) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
200 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; Redmi Note 4 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
201 | "Mozilla/5.0 (iPhone 92; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14C92 Safari/8536.25 MttCustomUA/2"
202 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G35 baiduboxapp/0_11.0.1.8_enohpi_6311_046/4.3.9_1C2%258enohPi/1099a/3D17F7183A02694CDFB8EAD47ACE34ADFCABE5C4CFCRMPJCNIT/1"
203 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-CN; HM NOTE 1LTE Build/KTU84P) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.1.0.527 U3/0.8.0 Mobile Safari/534.30"
204 | "Mozilla/5.0 (Linux; Android 4.4.4; Hisense E621T Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 baiduboxapp/5.0 (Baidu; P1 4.4.4)"
205 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; Redmi 3S Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
206 | "Mozilla/5.0 (Linux; Android 6.0.1; C106-6 Build/ZOXCNFN5801710251S) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0.1)"
207 | "Mozilla/5.0 (Linux; Android 6.0; MI 5 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
208 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-CN; M040 Build/JOP40D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 UCBrowser/9.6.0.378 U3/0.8.0 Mobile Safari/533.1"
209 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_2_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13D15 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.2.9_2C2%258enohPi/1099a/5FFD596738FD9AFC3C7CBF5306823B976AAD62255FCCIOMGABP/1"
210 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X; zh-CN) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/12A405 UCBrowser/10.0.2.497 Mobile"
211 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-CN; H60-L01 Build/HDH60-L01) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
212 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1S Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
213 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; SM-G8508S Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
214 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; H60-L03 Build/HDH60-L03) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
215 | "Mozilla/5.0 (iPhone 6p; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14C92 Safari/8536.25 MttCustomUA/2 QBWebViewType/1"
216 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_4331_057/1.1.01_2C2%257enohPi/1099a/2C1ED716FFB303A6EE0BC907FCB29AB76A5F9CC2CORHORADLFG/1"
217 | "Mozilla/5.0 (Linux; U; Android 3.2; zh-cn; GT-P6200 Build/HTJ85B) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13"
218 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-tw; 3007 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
219 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_0_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13A452 MicroMessenger/6.5.1 NetType/WIFI Language/zh_HK"
220 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTETD Build/KTU84P) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/6.1 (Baidu; P1 4.4.4)"
221 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI 5s Build/MXB48T) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
222 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_4331_057/1.1.01_2C2%257enohPi/1099a/6CB513C895DB7CC7ED4D68C22346381C5D3B7D63AFCBASLOSLA/1"
223 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36 MicroMessenger/6.3.31 NetType/WIFI Language/zh_CN"
224 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Mobile/14C92 baiduboxapp/0_11.0.1.8_enohpi_4331_057/2.01_2C2%257enohPi/1099a/123DE89DA7B2F985D5B30E4BB97D464839D5888EFFRQLCHEMJA/1"
225 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; HUAWEI MT7-CL00 Build/HuaweiMT7-CL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
226 | "Mozilla/5.0 (Linux; Android 4.1.1; M040 Build/JRO03H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.59 Mobile Safari/537.36"
227 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI CRR-UL00 Build/HUAWEICRR-UL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
228 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI NXT-TL00 Build/HUAWEINXT-TL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.1 baiduboxapp/8.0 (Baidu; P1 6.0)"
229 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G36 baiduboxapp/0_11.0.1.8_enohpi_4331_057/5.3.9_2C2%257enohPi/1099a/97829D6966BEDAFF982333CCA59C711CB1365693CFCMIIFKHSC/1"
230 | "Mozilla/5.0 (Linux; Android 5.0.2; SM-A5000 Build/LRX22G; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/45.0.2454.95 Mobile Safari/537.36 baiduboxapp/6.3.1 (Baidu; P1 5.0.2)"
231 | "Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn; U8800 Build/HuaweiU8800) AppleWebKit/530.17 (KHTML, like Gecko) FlyFlow/2.3 Version/4.0 Mobile Safari/530.17 baidubrowser/042_1.6.3.2_diordna_008_084/IEWAUH_01_5.3.2_0088U/1001a/BE44DF7FABA8768B2A1B1E93C4BAD478%7C898293140340353/1"
232 | "Mozilla/5.0 (Linux; U; Android 4.1.2; zh-cn; XT885 Build/6.7.2_GC-385) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025440 Mobile Safari/533.1 MicroMessenger/6.2.4.51_rdf8da56.600 NetType/WIFI Language/zh_CN"
233 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTETD Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.5 Mobile Safari/537.36"
234 | "Mozilla/5.0 (Linux; Android 5.0.1; GEM-702L Build/HUAWEIGEM-702L) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile Safari/537.36 MicroMessenger/6.3.18.800 NetType/WIFI Language/zh_TW"
235 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; Redmi 3 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.4"
236 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 MicroMessenger/6.3.31 NetType/4G Language/zh_CN"
237 | "Mozilla/5.0 (Linux; Android 5.0.2; CHE-TL00H Build/HonorCHE-TL00H) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 5.0.2)"
238 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; ATH-AL00 Build/HONORATH-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.1.888 Mobile Safari/537.36"
239 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; vivo X7Plus Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
240 | "Mozilla/5.0 (iPhone 6; CPU iPhone OS 9_3_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 MQQBrowser/7.1.1 Mobile/13E238 Safari/8536.25 MttCustomUA/2"
241 | "Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Mobile/10A523"
242 | "Mozilla/5.0 (iPad; CPU OS 7_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/6.0 MQQBrowser/4.0.2 Mobile/11D167 Safari/7534.48.3"
243 | "Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Mobile/11D201 MicroMessenger/6.5.1 NetType/WIFI Language/zh_CN"
244 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; HM NOTE 1LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.6 Mobile Safari/537.36"
245 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MI 4LTE Build/KTU84P) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/6.5.1 (Baidu; P1 4.4.4)"
246 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13G34 baiduboxapp/0_0.2.0.6_enohpi_8022_2421/3.3.9_1C2%257enohPi/1099a/C4E5EC24B39915CE768255A294BBD6CCC1C0D066BOROAORHNQB/1"
247 | "Mozilla/5.0 (iPhone 6p; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 MQQBrowser/7.1.1 Mobile/14C92 Safari/8536.25 MttCustomUA/2"
248 | "MQQBrowser/2.7 Mozilla/5.0 (iPad; CPU OS 6_0_1 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Mobile/10A523 Safari/7534.48.3"
249 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-CN; SM-G9350 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
250 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.8 Mobile/12F70 Safari/8536.25"
251 | "Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Mobile/13F69 baiduboxapp/0_11.0.1.8_enohpi_4331_057/2.3.9_1C2%258enohPi/1099a/75E19099B81694E3716130D650008D6953AD6FA6BOCRKRCHBIS/1"
252 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MI 4LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.8 Mobile Safari/537.36"
253 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; MI NOTE LTE Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.146 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.4.6"
254 | "Mozilla/5.0 (Linux; U; Android 5.1; zh-CN; 8681-A01 Build/LMY47D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.9.2.712 U3/0.8.0 Mobile Safari/534.30"
255 | Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; rv:11.0) like Gecko
256 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_6311_046/1.1.01_2C2%256enohPi/1099a/5B8948B232A282C658E8A275E25732696F87C379DOCBCKFLCRC/1"
257 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 7_0_5 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.0.4 Mobile/11B601 Safari/8536.25"
258 | "Mozilla/5.0 (iPhone 5SGLOBAL; CPU iPhone OS 8_3 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 MQQBrowser/7.1.1 Mobile/12F70 Safari/8536.25 MttCustomUA/2"
259 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-CN; HUAWEI GRA-UL10 Build/HUAWEIGRA-UL10) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.5.884 Mobile Safari/537.36"
260 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-CN; N5209 Build/KTU84P) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/10.10.0.800 U3/0.8.0 Mobile Safari/534.30"
261 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; PE-TL20 Build/HuaweiPE-TL20) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
262 | "Mozilla/5.0 (Linux; Android 6.0; PLK-TL01H Build/HONORPLK-TL01H) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.1 (Baidu; P1 6.0)"
263 | "Mozilla/5.0 (Linux; U; Android Marshmallow 6.0; zh-cn; LG-D857 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/6.3 Mobile Safari/537.36"
264 | "Mozilla/5.0 (Linux; U; Android 6.0; zh-cn; EVA-AL10 Build/HUAWEIEVA-AL10) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.0 Mobile Safari/537.36"
265 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-CN; Redmi Note 3 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/40.0.2214.89 UCBrowser/11.2.1.888 Mobile Safari/537.36"
266 | "Mozilla/5.0 (iPad; CPU OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A405 Safari/600.1.4"
267 | "Mozilla/5.0 (iPad; U; CPU OS 6 like Mac OS X; zh-cn Model:iPad2,1) UC AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B176 Safari/7543.48.3"
268 | "Mozilla/5.0 (Linux; Android 5.0.2; SM-A5000 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
269 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Mobile/14B100 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/1.1.01_1C2%257enohPi/1099a/6417E5B84CE6F787C41B42AB1485E3FD82A305451FCNFGHEJOQ/1"
270 | "Mozilla/5.0 (iPhone 5SGLOBAL; CPU iPhone OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/6.0 MQQBrowser/6.1.1 Mobile/13G36 Safari/8536.25"
271 | "Mozilla/5.0 (iPhone 5CGLOBAL; CPU iPhone OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/6.0 MQQBrowser/5.6 Mobile/12B440 Safari/8536.25"
272 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X; zh-CN) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/12B411 UCBrowser/10.0.5.508 Mobile"
273 | "Mozilla/5.0 (Linux; Android 4.4.4; iToolsVM Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
274 | "Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; ZTE B2015 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/6.0 Mobile Safari/537.36"
275 | "Mozilla/5.0 (Linux; Android 4.4.4; M351 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/33.0.0.0 Mobile Safari/537.36 MicroMessenger/6.0.0.50_r844973.501 NetType/WIFI"
276 | "Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; MI 4LTE Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/39.0.0.0 Mobile Safari/537.36 XiaoMi/MiuiBrowser/2.1.1"
277 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.24 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.24 T5/2.0 baiduboxapp/5.1 (Baidu; P1 4.2.1)"
278 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; MI NOTE Pro Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/46.0.2490.85 Mobile Safari/537.36 XiaoMi/MiuiBrowser/8.2.15"
279 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI VIE-AL10 Build/HUAWEIVIE-AL10) AppleWebKit/537.36(KHTML,like Gecko) Version/4.0 Mobile Safari/537.36"
280 | "Mozilla/5.0 (Linux; U; Android 5.1.1; zh-cn; SM-J5108 Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36"
281 | "Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Mobile/14C92 MicroMessenger/6.5.1 NetType/WIFI Language/zh_CN"
282 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Mobile/12H143 baiduboxapp/0_11.0.1.8_enohpi_8022_2421/4.8_1C2%257enohPi/1099a/2B2EE77DD24DF84D74B6A0FE95860D3FCCEEC6931FCCRRBETHP/1"
283 | "Mozilla/5.0 (Linux; Android 6.0.1; SM-A9100 Build/MMB29M) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.8 TBS/036887 Safari/537.36 MicroMessenger/6.3.31.940 NetType/WIFI Language/zh_CN"
284 | "Mozilla/5.0 (Linux; U; Android 2.3.5; zh-cn; U8800 Build/HuaweiU8800) UC AppleWebKit/534.31 (KHTML, like Gecko) Mobile Safari/534.31"
285 | "Mozilla/5.0 (Linux; U; Android 4.0.3; zh-cn; M031 Build/IML74K) UC AppleWebKit/534.31 (KHTML, like Gecko) Mobile Safari/534.31"
286 | "Mozilla/5.0 (Linux; U; Android 4.2.1; zh-cn; M040 Build/JOP40D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
287 | "Mozilla/5.0 (Linux; U; Android 4.1.1; zh-cn; M040 Build/JRO03H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30"
288 | "Mozilla/5.0 (Linux; Android 6.0; HUAWEI MT7-CL00 Build/HuaweiMT7-CL00; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/54.0.2840.68 Mobile Safari/537.36 baiduboxapp/6.3.1 (Baidu; P1 6.0)"
289 | "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X; zh-CN) AppleWebKit/537.51.1 (KHTML, like Gecko) Mobile/12B411 UCBrowser/10.1.0.518 Mobile WindVane tae_sdk_ios_1.0.1"
290 | "Mozilla/5.0 (Linux; U; Android 4.4.2; zh-cn; SM701 Build/SANFRANCISCO) AppleWebKit/533.1 (KHTML, like Gecko)Version/4.0 MQQBrowser/5.4 TBS/025469 Mobile Safari/533.1 MicroMessenger/6.2.5.49_r7ead8bf.620 NetType/WIFI Language/zh_CN"


--------------------------------------------------------------------------------