├── README.rst ├── UPDATE ├── tagword_crawler ├── __init__.py ├── spider.py ├── crawler.py └── ua.py ├── LICENSE └── setup.py /README.rst: -------------------------------------------------------------------------------- 1 | 说明 2 | ======== 3 | 4 | 开发中项目,为打造一个能容纳各类平台爬虫的通用管理工具 5 | 6 | -------------------------------------------------------------------------------- /UPDATE: -------------------------------------------------------------------------------- 1 | 2020-06-04 2 | 更新v0.1.3 3 | - 解决从site-packages寻找"tagword_crawler_*"爬虫的bug 4 | - 增加多线程选项,修改fetch,支持多items 5 | 6 | 7 | 2020-06-11 8 | 更新v0.1.5 9 | - 修正register spider的逻辑,增加可以根据模块名称选择爬虫,通过给入参数spider 10 | 11 | 12 | 2020-06-12 13 | 更新v0.2.0 14 | - 更新控制没个输入的请求不在增加url参数,同时在输出中增加source参数, 15 | - 这次是一个大的改动,之前所有的爬虫都得调整输入参数 16 | -------------------------------------------------------------------------------- /tagword_crawler/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Copyright 2018 The TagWord Authors. All Rights Reserved. 3 | # Name: TAGWORD CRAWLER爬虫软件 4 | # Contact: contact@tagword_crawler.cn 5 | # Description: TAGWORD CRAWLER软件 6 | # ======================================================== 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 JIN KUN 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tagword_crawler/spider.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Name: 3 | # Description: 4 | # Contact: contact@tagword_crawler.cn 5 | # ========================================================= 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import random 11 | import http.cookiejar as cookielib 12 | from requests import Request, Session 13 | from urllib.parse import urlparse 14 | 15 | from tagword_crawler.ua import USER_AGENTS 16 | 17 | 18 | class SpiderProto(object): 19 | def __init__(self, proxies={}, timeout=5, headers=None, stream=False, verify=False, cert=()): 20 | """ 21 | 初始化爬虫 22 | :param proxies: 代理信息 23 | :param timeout: 超时时间 24 | :param headers: 头信息 25 | :param stream: 持续下载 26 | :param verify: 验证SSL 27 | :param cert: 验证证书 28 | """ 29 | self.proxies = proxies 30 | self.timeout = timeout 31 | self.stream = stream 32 | self.verify = verify 33 | self.cert = cert 34 | self.headers = { 35 | 'Host': None, 36 | 'Connection': 'keep-alive', 37 | 'Upgrade-Insecure-Requests': '1', 38 | 'User-Agent': random.choice(USER_AGENTS)['ua'], 39 | 'Accept': 'text/html,application/xhtml+xml,application/xml,application/json; */*;q=0.8', 40 | 'Accept-Encoding': 'gzip, deflate', 41 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8' 42 | } if headers is None else headers 43 | 44 | def set_random_user_agent(self): 45 | self.headers['User-Agent'] = random.choice(USER_AGENTS)['ua'] 46 | 47 | def set_user_agent(self, ua): 48 | self.headers['User-Agent'] = ua 49 | 50 | def fetch(self, url=None, params=None, data=None, cookies=None, method="GET"): 51 | parsed_uri = urlparse(url) 52 | host = parsed_uri.netloc 53 | self.headers['Host'] = host 54 | req = Request(method=method, 55 | url=url, 56 | params=params, 57 | data=data, 58 | cookies=cookies, 59 | headers=self.headers) 60 | session = Session() 61 | prepped = session.prepare_request(req) 62 | resp = session.send( 63 | prepped, 64 | stream=self.stream, # 抓取流文件或者大文件使用 65 | verify=self.verify, # #verify是否验证服务器的SSL证书 66 | proxies=self.proxies, 67 | cert=self.cert, 68 | timeout=self.timeout 69 | ) 70 | return resp 71 | -------------------------------------------------------------------------------- /tagword_crawler/crawler.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Name: 3 | # Description: 4 | # Contact: contact@tagword_crawler.cn 5 | # ========================================================= 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import sys 11 | import random 12 | from queue import Queue 13 | import threading 14 | import time 15 | 16 | import pkgutil 17 | import importlib 18 | from urllib.parse import urlparse 19 | 20 | 21 | def create_crawler(): 22 | app = TGWCrawler() 23 | for pkg in pkgutil.walk_packages(sys.path): 24 | if pkg.ispkg: 25 | if pkg.name.startswith("tagword_crawler_"): 26 | p = importlib.import_module(pkg.name) 27 | app.register_spider(p.main[1:], url_host=p.main[0]) 28 | return app 29 | 30 | 31 | class TGWCrawler(object): 32 | def __init__(self): 33 | self.__spiders = {} 34 | self.__proxies = {} 35 | 36 | def register_spider(self, spiders, url_host): 37 | for spider in spiders: 38 | self.__spiders[url_host] = self.__spiders.get(url_host, []) + [spider] 39 | 40 | def register_proxy(self, proxies): 41 | self.__proxies = proxies 42 | 43 | def multi_fetch(self, items, threads_num=4): 44 | q = Queue() 45 | p = Queue() 46 | for item in items: 47 | q.put(item) 48 | threads = [] 49 | for i in range(0, threads_num): 50 | t = threading.Thread(target=self.__fetch, args=(q, p)) 51 | threads.append(t) 52 | for t in threads: 53 | t.setDaemon(True) 54 | t.start() 55 | for thread in threads: 56 | thread.join() 57 | 58 | output = [] 59 | while not p.empty(): 60 | item = p.get() 61 | output.append(item) 62 | return output 63 | 64 | def fetch(self, items): 65 | output = [] 66 | for item in items: 67 | source = item['source'] 68 | result = self._fetch(**item) 69 | if result is None: 70 | continue 71 | for item in result: 72 | item['source'] = source 73 | output.append(item) 74 | time.sleep(random.randint(1, 5)) 75 | return output 76 | 77 | def __fetch(self, q, p): 78 | while not q.empty(): 79 | item = q.get() 80 | source = item['source'] 81 | result = self._fetch(**item) 82 | if result is None: 83 | continue 84 | for item in result: 85 | item['source'] = source 86 | p.put(item) 87 | time.sleep(random.randint(1, 5)) 88 | 89 | def _fetch(self, **kwargs): 90 | parsed_uri = urlparse(kwargs.get("url")) 91 | host = parsed_uri.netloc 92 | schema = parsed_uri.scheme 93 | spiders = self.__spiders.get(host, None) 94 | if spiders is None: 95 | return None 96 | spider = None 97 | if len(spiders) == 1: 98 | spider = spiders[0] 99 | elif len(spiders) > 1: 100 | for spider in spiders: 101 | if spider.__name__ == kwargs.get("spider"): 102 | break 103 | if spider is None: 104 | return None 105 | 106 | spider = spider() 107 | # 设置ssl验证 108 | if schema == 'https': 109 | spider.verify = True 110 | if schema == 'http': 111 | spider.verify = False 112 | 113 | # 设置代理 114 | if self.__proxies: 115 | proxy = random.choice(self.__proxies[schema]) 116 | spider.proxies = {schema: proxy} 117 | 118 | # 轮训相关页面 119 | try: 120 | result = spider.request(**kwargs) 121 | except: 122 | result = self._fetch(**kwargs) 123 | else: 124 | if self.__proxies: 125 | print(proxy) 126 | return result 127 | 128 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """A setuptools based setup module. 2 | See: 3 | https://packaging.python.org/en/latest/distributing.html 4 | https://github.com/pypa/sampleproject 5 | """ 6 | 7 | # Always prefer setuptools over distutils 8 | from setuptools import setup, find_packages 9 | from os import path 10 | 11 | here = path.abspath(path.dirname(__file__)) 12 | 13 | # Get the long description from the README file 14 | with open(path.join(here, 'README.rst')) as f: 15 | long_description = f.read() 16 | 17 | setup( 18 | name='tagword-crawler', 19 | 20 | # Versions should comply with PEP440. For a discussion on single-sourcing 21 | # the version across setup.py and the project code, see 22 | # https://packaging.python.org/en/latest/single_source_version.html 23 | version='0.2.0', 24 | 25 | description='a toolkit for cralwer Chinese OTA site data', 26 | long_description=long_description, 27 | 28 | # The project's main homepage. 29 | url='https://github.com/ushiao/tagword-crawler', 30 | 31 | # Author details 32 | author='Kun JIN', 33 | author_email='kun.jin@tagword.cn', 34 | 35 | # Choose your license 36 | license='MIT', 37 | 38 | # See https://pypi.python.org/pypi?%4Aaction=list_classifiers 39 | classifiers=[ 40 | # How mature is this project? Common values are 41 | # 3 - Alpha 42 | # 4 - Beta 43 | # 5 - Production/Stable, 44 | 'Development Status :: 3 - Alpha', 45 | 46 | # Indicate who your project is intended for 47 | 'Intended Audience :: Developers', 48 | 'Topic :: Software Development :: Build Tools', 49 | 50 | # Pick your license as you wish (should match "license" above) 51 | 'License :: OSI Approved :: MIT License', 52 | 53 | # Specify the Python versions you support here. In particular, ensure 54 | # that you indicate whether you support Python 2, Python 3 or both. 55 | # 'Programming Language :: Python :: 2', 56 | # 'Programming Language :: Python :: 2.7', 57 | 'Programming Language :: Python :: 3.3', 58 | 'Programming Language :: Python :: 3.4', 59 | 'Programming Language :: Python :: 3.5', 60 | 'Programming Language :: Python :: 3.6', 61 | 'Programming Language :: Python :: 3.7', 62 | ], 63 | 64 | # What does your project relate to? 65 | keywords='Spider, Crawler', 66 | 67 | # You can just specify the packages manually here if your project is 68 | # simple. Or you can use find_packages(). 69 | packages=find_packages(exclude=['contrib', 'docs', 'tests', '__pycache__']), 70 | # packages=["tagword_crawler"], 71 | 72 | # List run-time dependencies here. These will be installed by pip when 73 | # your project is installed. For an analysis of "install_requires" vs pip's 74 | # requirements files see: 75 | # https://packaging.python.org/en/latest/requirements.html 76 | install_requires=[ 77 | "bs4==0.0.1", 78 | "requests==2.23.0" 79 | ], 80 | 81 | # List additional groups of dependencies here (e.g. development 82 | # dependencies). You can install these using the following syntax, 83 | # for example: 84 | # $ pip install -e .[dev,test] 85 | # extras_require={ 86 | # 'dev': ['check-manifest'], 87 | # 'test': ['coverage'], 88 | # }, 89 | 90 | # If there are data files included in your packages that need to be 91 | # installed, specify them here. If using Python 2.6 or less, then these 92 | # have to be included in MANIFEST.in as well. 93 | # package_data={ 94 | # 'tagword_crawler': [ 95 | # 'data/*' 96 | # ], 97 | # }, 98 | 99 | # Although 'package_data' is the preferred approach, in some case you may 100 | # need to place data files outside of your packages. See: 101 | # http://docs.python.org/3.4/distutils/setupscript.html#installing-additional-files # noqa 102 | # In this case, 'data_file' will be installed into '/my_data' 103 | # data_files=[('my_data', ['data/data_file'])], 104 | 105 | # To provide executable scripts, use entry points in preference to the 106 | # "scripts" keyword. Entry points provide cross-platform support and allow 107 | # pip to create the appropriate form of executable for the target platform. 108 | # entry_points={ 109 | # 'console_scripts': [ 110 | # '=sample:main', 111 | # ], 112 | # }, 113 | ) 114 | -------------------------------------------------------------------------------- /tagword_crawler/ua.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # Copyright 2017 The TAGWORD Authors. All Rights Reserved. 3 | # Name: TAGWORD爬虫User-Agents列表 4 | # Description: 数据通信交互部件,方便对User-Agent进行查询或替换 5 | # Contact: contact@tagword_crawler.cn 6 | # ============================================================================== 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | USER_AGENTS = [ 12 | {'os': 'Safari for iOS', 13 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1', 14 | 'type': 'mobile browsers'}, 15 | {'os': 'Android Browser', 16 | 'ua': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30', 17 | 'type': 'mobile browsers'}, 18 | {'os': 'Chrome Mobile', 19 | 'ua': 'Mozilla/5.0 (Linux; Android 7.0; SM-G930V Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36', 20 | 'type': 'mobile browsers'}, 21 | {'os': 'Opera Mini', 22 | 'ua': 'Opera/9.80 (J2ME/MIDP; Opera Mini/5.1.21214/28.2725; U; ru) Presto/2.8.119 Version/11.10', 23 | 'type': 'mobile browsers'}, 24 | {'os': 'Opera Mini (iOS WebKit)', 25 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) OPiOS/10.2.0.93022 Mobile/11D257 Safari/9537.53', 26 | 'type': 'mobile browsers'}, 27 | {'os': 'Firefox for Android', 28 | 'ua': 'Mozilla/5.0 (Android 7.0; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0', 29 | 'type': 'mobile browsers'}, 30 | {'os': 'Firefox for iOS', 31 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) FxiOS/7.5b3349 Mobile/14F89 Safari/603.2.4', 32 | 'type': 'mobile browsers'}, 33 | {'os': 'UC Browser', 34 | 'ua': 'Mozilla/5.0 (Linux; U; Android 7.0; en-US; SM-G935F Build/NRD90M) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.3.8.976 U3/0.8.0 Mobile Safari/534.30"', 35 | 'type': 'mobile browsers'}, 36 | {'os': 'IE Mobile', 37 | 'ua': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; Microsoft; Lumia 950)', 38 | 'type': 'mobile browsers'}, 39 | {'os': 'Edge Mobile', 40 | 'ua': 'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.14977', 41 | 'type': 'mobile browsers'}, 42 | {'os': 'Apple iPhone XR (Safari)', 43 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/69.0.3497.105 Mobile/15E148 Safari/605.1', 44 | 'type': 'iPhone'}, 45 | {'os': 'Apple iPhone XS Max (Firefox)', 46 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) FxiOS/13.2b11866 Mobile/16A366 Safari/605.1.15', 47 | 'type': 'iPhone'}, 48 | {'os': 'Apple iPhone X', 49 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1', 50 | 'type': 'iPhone'}, 51 | {'os': 'Apple iPhone 8', 52 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1', 53 | 'type': 'iPhone'}, 54 | {'os': 'Apple iPhone 8 Plus', 55 | 'ua': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A5370a Safari/604.1', 56 | 'type': 'iPhone'}, 57 | {'os': 'Apple iPhone 7', 58 | 'ua': 'Mozilla/5.0 (iPhone9,3; U; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A403 Safari/602.1', 59 | 'type': 'iPhone'}, 60 | {'os': 'Apple iPhone 7 Plus', 61 | 'ua': 'Mozilla/5.0 (iPhone9,4; U; CPU iPhone OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A403 Safari/602.1', 62 | 'type': 'iPhone'}, 63 | {'os': 'Apple iPhone 6', 64 | 'ua': 'Mozilla/5.0 (Apple-iPhone7C2/1202.466; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543 Safari/419.3', 65 | 'type': 'iPhone'}, 66 | {'os': 'Microsoft Lumia 650', 67 | 'ua': 'Mozilla/5.0 (Windows Phone 10.0; Android 6.0.1; Microsoft; RM-1152) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Mobile Safari/537.36 Edge/15.15254', 68 | 'type': 'MS Windows Phone'}, 69 | {'os': 'Microsoft Lumia 550', 70 | 'ua': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; RM-1127_16056) AppleWebKit/537.36(KHTML, like Gecko) Chrome/42.0.2311.135 Mobile Safari/537.36 Edge/12.10536', 71 | 'type': 'MS Windows Phone'}, 72 | {'os': 'Microsoft Lumia 950', 73 | 'ua': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/13.1058', 74 | 'type': 'MS Windows Phone'}, 75 | {'os': 'Samsung Galaxy S9', 76 | 'ua': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', 77 | 'type': 'Android Mobile'}, 78 | {'os': 'Samsung Galaxy S8', 79 | 'ua': 'Mozilla/5.0 (Linux; Android 7.0; SM-G892A Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/60.0.3112.107 Mobile Safari/537.36', 80 | 'type': 'Android Mobile'}, 81 | {'os': 'Samsung Galaxy S7', 82 | 'ua': 'Mozilla/5.0 (Linux; Android 7.0; SM-G930VC Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/58.0.3029.83 Mobile Safari/537.36', 83 | 'type': 'Android Mobile'}, 84 | {'os': 'Samsung Galaxy S7 Edge', 85 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; SM-G935S Build/MMB29K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Mobile Safari/537.36', 86 | 'type': 'Android Mobile'}, 87 | {'os': 'Samsung Galaxy S6', 88 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; SM-G920V Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.98 Mobile Safari/537.36', 89 | 'type': 'Android Mobile'}, 90 | {'os': 'Samsung Galaxy S6 Edge Plus', 91 | 'ua': 'Mozilla/5.0 (Linux; Android 5.1.1; SM-G928X Build/LMY47X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36', 92 | 'type': 'Android Mobile'}, 93 | {'os': 'Nexus 6P', 94 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 6P Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.83 Mobile Safari/537.36', 95 | 'type': 'Android Mobile'}, 96 | {'os': 'Sony Xperia XZ', 97 | 'ua': 'Mozilla/5.0 (Linux; Android 7.1.1; G8231 Build/41.2.A.0.219; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/59.0.3071.125 Mobile Safari/537.36', 98 | 'type': 'Android Mobile'}, 99 | {'os': 'Sony Xperia Z5', 100 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; E6653 Build/32.2.A.0.253) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.98 Mobile Safari/537.36', 101 | 'type': 'Android Mobile'}, 102 | {'os': 'HTC One X10', 103 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0; HTC One X10 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/61.0.3163.98 Mobile Safari/537.36', 104 | 'type': 'Android Mobile'}, 105 | {'os': 'HTC One M9', 106 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0; HTC One M9 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.98 Mobile Safari/537.3', 107 | 'type': 'Android Mobile'}, 108 | {'os': 'Google Pixel C', 109 | 'ua': 'Mozilla/5.0 (Linux; Android 7.0; Pixel C Build/NRD90M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36', 110 | 'type': 'Tablet'}, 111 | {'os': 'Sony Xperia Z4 Tablet', 112 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; SGP771 Build/32.2.A.0.253; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/52.0.2743.98 Safari/537.36', 113 | 'type': 'Tablet'}, 114 | {'os': 'Nvidia Shield Tablet K1', 115 | 'ua': 'Mozilla/5.0 (Linux; Android 6.0.1; SHIELD Tablet K1 Build/MRA58K; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/55.0.2883.91 Safari/537.36', 116 | 'type': 'Tablet'}, 117 | {'os': 'Samsung Galaxy Tab S3', 118 | 'ua': 'Mozilla/5.0 (Linux; Android 7.0; SM-T827R4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.116 Safari/537.36', 119 | 'type': 'Tablet'}, 120 | {'os': 'Samsung Galaxy Tab A', 121 | 'ua': 'Mozilla/5.0 (Linux; Android 5.0.2; SAMSUNG SM-T550 Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/3.3 Chrome/38.0.2125.102 Safari/537.36', 122 | 'type': 'Tablet'}, 123 | {'os': 'Amazon Kindle Fire HDX 7', 124 | 'ua': 'Mozilla/5.0 (Linux; Android 4.4.3; KFTHWI Build/KTU84M) AppleWebKit/537.36 (KHTML, like Gecko) Silk/47.1.79 like Chrome/47.0.2526.80 Safari/537.36', 125 | 'type': 'Tablet'}, 126 | {'os': 'LG G Pad 7.0', 127 | 'ua': 'Mozilla/5.0 (Linux; Android 5.0.2; LG-V410/V41020c Build/LRX22G) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/34.0.1847.118 Safari/537.36', 128 | 'type': 'Tablet'}, 129 | {'os': 'Windows 10-based PC using Edge browser', 130 | 'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246', 131 | 'type': 'Desktop'}, 132 | {'os': 'Chrome OS-based laptop using Chrome browser (Chromebook)', 133 | 'ua': 'Mozilla/5.0 (X11; CrOS x86_64 8172.45.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.64 Safari/537.36', 134 | 'type': 'Desktop'}, 135 | {'os': 'Mac OS X-based computer using a Safari browser', 136 | 'ua': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9', 137 | 'type': 'Desktop'}, 138 | {'os': 'Windows 7-based PC using a Chrome browser', 139 | 'ua': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36', 140 | 'type': 'Desktop'}, 141 | {'os': 'Linux-based PC using a Firefox browser', 142 | 'ua': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1', 143 | 'type': 'Desktop'}, 144 | {'os': 'Chromecast', 145 | 'ua': 'Mozilla/5.0 (CrKey armv7l 1.5.16041) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.0 Safari/537.36', 146 | 'type': 'Set Top Boxes'}, 147 | {'os': 'Roku Ultra', 148 | 'ua': 'Roku4640X/DVP-7.70 (297.70E04154A)', 149 | 'type': 'Set Top Boxes'}, 150 | {'os': 'Minix NEO X5', 151 | 'ua': 'Mozilla/5.0 (Linux; U; Android 4.2.2; he-il; NEO-X5-116A Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Safari/534.30', 152 | 'type': 'Set Top Boxes'}, 153 | {'os': 'Amazon 4K Fire TV', 154 | 'ua': 'Mozilla/5.0 (Linux; Android 5.1; AFTS Build/LMY47O) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/41.99900.2250.0242 Safari/537.36', 155 | 'type': 'Set Top Boxes'}, 156 | {'os': 'Google Nexus Player', 157 | 'ua': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; Nexus Player Build/MMB29T)', 158 | 'type': 'Set Top Boxes'}, 159 | {'os': 'Apple TV 5th Gen 4K', 160 | 'ua': 'AppleTV6,2/11.1', 161 | 'type': 'Set Top Boxes'}, 162 | {'os': 'Apple TV 4th Gen', 163 | 'ua': 'AppleTV5,3/9.1.1', 164 | 'type': 'Set Top Boxes'}, 165 | {'os': 'Nintendo Wii U', 166 | 'ua': 'Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.12 NintendoBrowser/4.3.1.11264.US', 167 | 'type': 'Game Consoles'}, 168 | {'os': 'Xbox One S', 169 | 'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; XBOX_ONE_ED) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393', 170 | 'type': 'Game Consoles'}, 171 | {'os': 'Xbox One', 172 | 'ua': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/13.10586', 173 | 'type': 'Game Consoles'}, 174 | {'os': 'Playstation 4', 175 | 'ua': 'Mozilla/5.0 (PlayStation 4 3.11) AppleWebKit/537.73 (KHTML, like Gecko)', 176 | 'type': 'Game Consoles'}, 177 | {'os': 'Playstation Vita', 178 | 'ua': 'Mozilla/5.0 (PlayStation Vita 3.61) AppleWebKit/537.73 (KHTML, like Gecko) Silk/3.2', 179 | 'type': 'Game Consoles'}, 180 | {'os': 'Nintendo 3DS', 181 | 'ua': 'Mozilla/5.0 (Nintendo 3DS; U; ; en) Version/1.7412.EU', 182 | 'type': 'Game Consoles'}, 183 | {'os': 'Google bot', 184 | 'ua': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 185 | 'type': 'Bots and Crawlers'}, 186 | {'os': 'Bing bot', 187 | 'ua': 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', 188 | 'type': 'Bots and Crawlers'}, 189 | {'os': 'Yahoo! bot', 190 | 'ua': 'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)', 191 | 'type': 'Bots and Crawlers'}, 192 | {'os': 'Amazon Kindle 4', 193 | 'ua': 'Mozilla/5.0 (X11; U; Linux armv7l like Android; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Version/5.0 Safari/533.2+ Kindle/3.0+', 194 | 'type': 'E Readers'}, 195 | {'os': 'Amazon Kindle 3', 196 | 'ua': 'Mozilla/5.0 (Linux; U; en-US) AppleWebKit/528.5+ (KHTML, like Gecko, Safari/528.5+) Version/4.0 Kindle/3.0 (screen 600x800; rotate)', 197 | 'type': 'E Readers'}, 198 | ] 199 | --------------------------------------------------------------------------------