├── tests ├── __init__.py └── test.py ├── lib └── woothee │ ├── py.typed │ ├── crawler.pyi │ ├── appliance.pyi │ ├── misc.pyi │ ├── mobilephone.pyi │ ├── util.pyi │ ├── os.pyi │ ├── browser.pyi │ ├── util.py │ ├── dataset.pyi │ ├── __init__.pyi │ ├── appliance.py │ ├── misc.py │ ├── mobilephone.py │ ├── __init__.py │ ├── browser.py │ ├── crawler.py │ ├── os.py │ └── dataset.py ├── .gitmodules ├── mypy.ini ├── MANIFEST.in ├── .gitignore ├── setup.cfg ├── Makefile ├── tox.ini ├── .github └── workflows │ └── python-package.yml ├── setup.py ├── scripts └── dataset_yaml2py.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/woothee/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "woothee"] 2 | path = woothee 3 | url = https://github.com/woothee/woothee.git 4 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.7 3 | ignore_missing_imports = True 4 | incremental = True 5 | check_untyped_defs = True 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include tox.ini 2 | include mypy.ini 3 | include Makefile 4 | include README.rst 5 | recursive-include scripts *.py 6 | recursive-include tests *.py 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *~ 3 | *.tmproj 4 | tmtags 5 | \#* 6 | .\#* 7 | *.swp 8 | tmp/ 9 | build/ 10 | dist/ 11 | *.pyc 12 | *.egg 13 | lib/*.egg-info 14 | .tox 15 | .coverage 16 | .eggs 17 | .mypy_cache 18 | venv 19 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [sdist] 2 | formats = gztar 3 | 4 | [wheel] 5 | universal = 1 6 | 7 | [check] 8 | strict = 1 9 | 10 | [aliases] 11 | release = sdist bdist_wheel 12 | test=pytest 13 | 14 | [tool:pytest] 15 | addopts = -vv --cov lib/woothee --cov-report term-missing 16 | python_files = tests/*.py 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test flake8 mypy autopep8 2 | all: autopep8 test mypy flake8 3 | 4 | TIMESTAMP=$(shell date +%Y%m%d-%H%M%S) 5 | 6 | lib/woothee/dataset.py: woothee/dataset.yaml 7 | python setup.py dataset 8 | sync; sync; sync 9 | 10 | test: lib/woothee/dataset.py 11 | python setup.py test 12 | 13 | flake8: 14 | tox -eflake8 15 | 16 | mypy: 17 | tox -emypy 18 | 19 | autopep8: 20 | tox -eautopep8 21 | -------------------------------------------------------------------------------- /lib/woothee/crawler.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.crawler (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_google(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_crawlers(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_maybe_crawler(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | -------------------------------------------------------------------------------- /lib/woothee/appliance.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.appliance (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_playstation(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_nintendo(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_digitaltv(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py35,py36,py37,pypy,flake8,mypy,autopep8 3 | 4 | [testenv] 5 | commands= 6 | python setup.py test 7 | 8 | [testenv:flake8] 9 | deps = flake8 10 | commands= 11 | flake8 lib tests scripts 12 | 13 | [testenv:mypy] 14 | basepython = python3 15 | deps = 16 | mypy 17 | commands = mypy --install-types --non-interactive lib tests 18 | 19 | [testenv:autopep8] 20 | basepython = python3 21 | deps = 22 | autopep8 23 | commands = autopep8 --in-place --aggressive --aggressive --recursive lib/ tests/ 24 | -------------------------------------------------------------------------------- /lib/woothee/misc.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.misc (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_desktoptools(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_smartphone_patterns(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_http_library(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | def challenge_maybe_rss_reader(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 11 | -------------------------------------------------------------------------------- /lib/woothee/mobilephone.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.mobilephone (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_docomo(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_au(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_softbank(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | def challenge_willcom(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 11 | def challenge_misc(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 12 | -------------------------------------------------------------------------------- /lib/woothee/util.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.util (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def update_map(target: Dict[AnyStr, AnyStr], source: Dict[AnyStr, AnyStr]) -> None: ... 8 | def update_category(target: Dict[AnyStr, AnyStr], category: AnyStr) -> None: ... 9 | def update_version(target: Dict[AnyStr, AnyStr], version: AnyStr) -> None: ... 10 | def update_os(target: Dict[AnyStr, AnyStr], os: AnyStr) -> None: ... 11 | def update_os_version(target: Dict[AnyStr, AnyStr], version: AnyStr) -> None: ... 12 | -------------------------------------------------------------------------------- /lib/woothee/os.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.os (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_windows(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_osx(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_linux(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | def challenge_smartphone(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 11 | def challenge_mobilephone(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 12 | def challenge_appliance(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 13 | def challenge_misc(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 14 | -------------------------------------------------------------------------------- /lib/woothee/browser.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.browser (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Dict, AnyStr 6 | 7 | def challenge_msie(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 8 | def challenge_yandexbrowser(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 9 | def challenge_safari_chrome(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 10 | def challenge_firefox(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 11 | def challenge_opera(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 12 | def challenge_webview(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 13 | def challenge_sleipnir(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 14 | def challenge_vivaldi(ua: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 15 | -------------------------------------------------------------------------------- /lib/woothee/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | from . import dataset 6 | 7 | 8 | def update_map(target, source): 9 | for key in source.keys(): 10 | if key == dataset.KEY_LABEL or key == dataset.KEY_TYPE: 11 | continue 12 | if len(source[key]) > 0: 13 | target[key] = source[key] 14 | 15 | 16 | def update_category(target, category): 17 | target[dataset.ATTRIBUTE_CATEGORY] = category 18 | 19 | 20 | def update_version(target, version): 21 | target[dataset.ATTRIBUTE_VERSION] = version 22 | 23 | 24 | def update_os(target, os): 25 | target[dataset.ATTRIBUTE_OS] = os 26 | 27 | 28 | def update_os_version(target, version): 29 | target[dataset.ATTRIBUTE_OS_VERSION] = version 30 | -------------------------------------------------------------------------------- /lib/woothee/dataset.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee.dataset (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Text, List, Dict 6 | 7 | KEY_LABEL: Text 8 | KEY_NAME: Text 9 | KEY_TYPE: Text 10 | KEY_CATEGORY: Text 11 | KEY_OS: Text 12 | KEY_OS_VERSION: Text 13 | KEY_VENDOR: Text 14 | KEY_VERSION: Text 15 | TYPE_BROWSER: Text 16 | TYPE_OS: Text 17 | TYPE_FULL: Text 18 | CATEGORY_PC: Text 19 | CATEGORY_SMARTPHONE: Text 20 | CATEGORY_MOBILEPHONE: Text 21 | CATEGORY_CRAWLER: Text 22 | CATEGORY_APPLIANCE: Text 23 | CATEGORY_MISC: Text 24 | ATTRIBUTE_NAME: Text 25 | ATTRIBUTE_CATEGORY: Text 26 | ATTRIBUTE_OS: Text 27 | ATTRIBUTE_OS_VERSION: Text 28 | ATTRIBUTE_VENDOR: Text 29 | ATTRIBUTE_VERSION: Text 30 | VALUE_UNKNOWN: Text 31 | CATEGORY_LIST: List[Text] 32 | ATTRIBUTE_LIST: List[Text] 33 | DATASET: Dict[Text, Text] 34 | 35 | def get(label: Text) -> Dict[Text, Text]: ... 36 | -------------------------------------------------------------------------------- /lib/woothee/__init__.pyi: -------------------------------------------------------------------------------- 1 | # Stubs for woothee (Python 3) 2 | # 3 | # NOTE: This dynamically typed stub was automatically generated by stubgen. 4 | 5 | from typing import Text, AnyStr, Tuple, Dict 6 | 7 | VERSION: Tuple 8 | FILLED: Dict[Text, Text] 9 | 10 | def parse(useragent: AnyStr) -> Dict[AnyStr, AnyStr]: ... 11 | def is_crawler(useragent: AnyStr) -> bool: ... 12 | def exec_parse(useragent: AnyStr) -> Dict[AnyStr, AnyStr]: ... 13 | def try_crawler(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 14 | def try_browser(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 15 | def try_os(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 16 | def try_mobilephone(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 17 | def try_appliance(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 18 | def try_misc(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 19 | def try_rare_cases(useragent: AnyStr, result: Dict[AnyStr, AnyStr]) -> bool: ... 20 | def fill_result(result: Dict[AnyStr, AnyStr]) -> Dict[AnyStr, AnyStr]: ... 21 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | schedule: 12 | - cron: 0 15 * * * 13 | 14 | jobs: 15 | build: 16 | 17 | runs-on: ubuntu-latest 18 | strategy: 19 | matrix: 20 | python-version: ['3.8', '3.9', '3.10', '3.11', 'pypy3.9'] 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Set up Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v4 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | python -m pip install flake8 coveralls wheel 32 | python -m pip install .[test] .[setup] 33 | python setup.py install 34 | - name: setup project 35 | run: | 36 | git submodule update --init 37 | python setup.py dataset 38 | - name: Lint with flake8 39 | run: | 40 | flake8 lib tests scripts 41 | - name: mypy 42 | if: matrix.python-version != 'pypy3' 43 | run: | 44 | python -m pip install mypy 45 | mypy --install-types --non-interactive lib tests 46 | - name: Test with pytest 47 | run: | 48 | pytest -vv --cov lib/woothee --cov-report term-missing tests/*.py 49 | - name: Coveralls 50 | env: 51 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 52 | run: | 53 | coveralls --service=github 54 | -------------------------------------------------------------------------------- /lib/woothee/appliance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | import re 5 | 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_playstation(ua, result): 11 | 12 | data = None 13 | os_version = None 14 | if 'PSP (PlayStation Portable);' in ua: 15 | data = dataset.get('PSP') 16 | regex = re.compile(r"PSP \(PlayStation Portable\); ([.0-9]+)\)") 17 | m = regex.search(ua) 18 | if m: 19 | os_version = m.group(1) 20 | elif 'PlayStation Vita' in ua: 21 | data = dataset.get('PSVita') 22 | regex = re.compile(r"PlayStation Vita ([.0-9]+)\)") 23 | m = regex.search(ua) 24 | if m: 25 | os_version = m.group(1) 26 | elif 'PLAYSTATION 3 ' in ua or 'PLAYSTATION 3;' in ua: 27 | data = dataset.get('PS3') 28 | regex = re.compile(r"PLAYSTATION 3;? ([.0-9]+)\)") 29 | m = regex.search(ua) 30 | if m: 31 | os_version = m.group(1) 32 | elif 'PlayStation 4 ' in ua: 33 | data = dataset.get('PS4') 34 | regex = re.compile(r"PlayStation 4 ([.0-9]+)\)") 35 | m = regex.search(ua) 36 | if m: 37 | os_version = m.group(1) 38 | else: 39 | return False 40 | util.update_map(result, data) 41 | if os_version: 42 | util.update_os_version(result, os_version) 43 | return True 44 | 45 | 46 | def challenge_nintendo(ua, result): 47 | if 'Nintendo 3DS;' in ua: 48 | data = dataset.get('Nintendo3DS') 49 | elif 'Nintendo DSi;' in ua: 50 | data = dataset.get('NintendoDSi') 51 | elif 'Nintendo Wii;' in ua: 52 | data = dataset.get('NintendoWii') 53 | elif '(Nintendo WiiU)' in ua: 54 | data = dataset.get('NintendoWiiU') 55 | else: 56 | return False 57 | util.update_map(result, data) 58 | return True 59 | 60 | 61 | def challenge_digitaltv(ua, result): 62 | if 'InettvBrowser/' in ua: 63 | data = dataset.get('DigitalTV') 64 | else: 65 | return False 66 | util.update_map(result, data) 67 | return True 68 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import os 4 | import re 5 | import sys 6 | from setuptools import setup, Command, find_packages 7 | 8 | 9 | with open(os.path.join('lib', 'woothee', '__init__.py'), 'r') as f: 10 | version = re.compile( 11 | r".*__version__ = '(.*?)'", re.S).match(f.read()).group(1) 12 | 13 | with open('README.rst', 'r') as f: 14 | long_description = f.read() 15 | 16 | classifiers = [ 17 | "Development Status :: 5 - Production/Stable", 18 | "Environment :: Web Environment", 19 | "Intended Audience :: Developers", 20 | "License :: OSI Approved :: Apache Software License", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.6", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Programming Language :: Python :: 3.9", 28 | "Topic :: Internet :: WWW/HTTP", 29 | "Topic :: Software Development :: Libraries :: Python Modules", 30 | ] 31 | 32 | setup_pkgs = ['PyYAML', 'pytest-runner'] 33 | test_pkgs = ['pytest', 'pytest-cov', 'pytest-mock', 'zipp==1.1.0'] 34 | 35 | 36 | class DatasetCommand(Command): 37 | 38 | description = 'generate dataset.py' 39 | user_options = [] 40 | 41 | def initialize_options(self): 42 | pass 43 | 44 | def finalize_options(self): 45 | pass 46 | 47 | def run(self): 48 | import os 49 | import sys 50 | root_dir = os.path.dirname(os.path.abspath(__file__)) 51 | scripts_dir = os.path.join(root_dir, 'scripts') 52 | sys.path.insert(0, scripts_dir) 53 | import dataset_yaml2py # NOQA 54 | 55 | 56 | setup( 57 | name='woothee', 58 | version=version, 59 | description='Cross-language UserAgent classifier library, python implementation', # NOQA 60 | author='tell-k', 61 | author_email='ffk2005@gmail.com', 62 | url='https://github.com/woothee/woothee-python', 63 | license='Apache License 2.0', 64 | packages=find_packages('lib'), 65 | package_dir={'': 'lib'}, 66 | package_data={ 67 | 'woothee': ['py.typed', '*.pyi'], 68 | }, 69 | platforms='any', 70 | setup_requires=setup_pkgs, 71 | tests_require=test_pkgs, 72 | extras_require={ 73 | "test": test_pkgs, 74 | "setup": setup_pkgs, 75 | }, 76 | long_description=long_description, 77 | classifiers=classifiers, 78 | keywords=['web', 'user-agent', 'parser'], 79 | cmdclass={'dataset': DatasetCommand}, 80 | ) 81 | -------------------------------------------------------------------------------- /lib/woothee/misc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import re 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_desktoptools(ua, result): 11 | if 'AppleSyndication/' in ua: 12 | data = dataset.get('SafariRSSReader') 13 | elif 'compatible; Google Desktop/' in ua: 14 | data = dataset.get('GoogleDesktop') 15 | elif 'Windows-RSS-Platform' in ua: 16 | data = dataset.get('WindowsRSSReader') 17 | else: 18 | return False 19 | util.update_map(result, data) 20 | return True 21 | 22 | 23 | def challenge_smartphone_patterns(ua, result): 24 | if 'CFNetwork/' in ua: 25 | data = dataset.get('iOS') 26 | util.update_category(result, data[dataset.KEY_CATEGORY]) 27 | util.update_os(result, data[dataset.KEY_NAME]) 28 | return True 29 | return False 30 | 31 | 32 | def challenge_http_library(ua, result): 33 | if re.search('^(?:Apache-HttpClient/|Jakarta Commons-HttpClient/|Java/)', 34 | ua) or re.search('[- ]HttpClient(/|$)', ua): 35 | 36 | data, version = dataset.get('HTTPLibrary'), 'Java' 37 | elif 'Java(TM) 2 Runtime Environment,' in ua: 38 | data, version = dataset.get('HTTPLibrary'), 'Java' 39 | elif re.search('^Wget', ua): 40 | data, version = dataset.get('HTTPLibrary'), 'wget' 41 | elif re.search( 42 | '^(?:libwww-perl|WWW-Mechanize|LWP::Simple|LWP |lwp-trivial)', ua): 43 | 44 | data, version = dataset.get('HTTPLibrary'), 'perl' 45 | elif re.search('^(?:Ruby|feedzirra|Typhoeus)', ua): 46 | data, version = dataset.get('HTTPLibrary'), 'ruby' 47 | elif re.search('^(?:Python-urllib/|Twisted )', ua): 48 | data, version = dataset.get('HTTPLibrary'), 'python' 49 | elif re.search('^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:/| |$)', 50 | ua) or re.search('(?:PEAR |)HTTP_Request(?: class|2)', ua): 51 | 52 | data, version = dataset.get('HTTPLibrary'), 'php' 53 | elif ua.startswith('curl/'): 54 | data, version = dataset.get('HTTPLibrary'), 'curl' 55 | else: 56 | return False 57 | util.update_map(result, data) 58 | util.update_version(result, version) 59 | return True 60 | 61 | 62 | def challenge_maybe_rss_reader(ua, result): 63 | if re.search('rss(?:reader|bar|[-_ /;()]|[ +]*/)', 64 | ua, re.I) or re.search('headline-reader', ua, re.I): 65 | data = dataset.get('VariousRSSReader') 66 | elif 'cococ/' in ua: 67 | data = dataset.get('VariousRSSReader') 68 | else: 69 | return False 70 | util.update_map(result, data) 71 | return True 72 | -------------------------------------------------------------------------------- /lib/woothee/mobilephone.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import re 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_docomo(ua, result): 11 | if 'DoCoMo' not in ua and ';FOMA;' not in ua: 12 | return False 13 | 14 | version = dataset.VALUE_UNKNOWN 15 | obj = re.search('DoCoMo/[.0-9]+[ /]([^- /;()"\']+)', ua) 16 | if obj: 17 | version = obj.group(1) 18 | else: 19 | obj = re.search(r'\(([^;)]+);FOMA;', ua) 20 | if obj: 21 | version = obj.group(1) 22 | 23 | util.update_map(result, dataset.get('docomo')) 24 | util.update_version(result, version) 25 | return True 26 | 27 | 28 | def challenge_au(ua, result): 29 | if 'KDDI-' not in ua: 30 | return False 31 | version = dataset.VALUE_UNKNOWN 32 | obj = re.search('KDDI-([^- /;()"\']+)', ua) 33 | if obj: 34 | version = obj.group(1) 35 | 36 | util.update_map(result, dataset.get('au')) 37 | util.update_version(result, version) 38 | return True 39 | 40 | 41 | def challenge_softbank(ua, result): 42 | if 'SoftBank' not in ua and 'Vodafone' not in ua and 'J-PHONE' not in ua: 43 | return False 44 | 45 | version = dataset.VALUE_UNKNOWN 46 | obj = re.search('(?:SoftBank|Vodafone|J-PHONE)/[.0-9]+/([^ /;()]+)', ua) 47 | if obj: 48 | version = obj.group(1) 49 | 50 | util.update_map(result, dataset.get('SoftBank')) 51 | util.update_version(result, version) 52 | return True 53 | 54 | 55 | def challenge_willcom(ua, result): 56 | if 'WILLCOM' not in ua and 'DDIPOCKET' not in ua: 57 | return False 58 | 59 | version = dataset.VALUE_UNKNOWN 60 | obj = re.search('(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)', ua) 61 | if obj: 62 | version = obj.group(1) 63 | 64 | util.update_map(result, dataset.get('willcom')) 65 | util.update_version(result, version) 66 | return True 67 | 68 | 69 | def challenge_misc(ua, result): 70 | if 'jig browser' in ua: 71 | util.update_map(result, dataset.get('jig')) 72 | obj = re.search('jig browser[^;]+; ([^);]+)', ua) 73 | if obj: 74 | util.update_version(result, obj.group(1)) 75 | return True 76 | 77 | if 'emobile/' in ua or 'OpenBrowser' in ua\ 78 | or 'Browser/Obigo-Browser' in ua: 79 | util.update_map(result, dataset.get('emobile')) 80 | return True 81 | 82 | if 'SymbianOS' in ua: 83 | util.update_map(result, dataset.get('SymbianOS')) 84 | return True 85 | 86 | if 'Hatena-Mobile-Gateway/' in ua: 87 | util.update_map(result, dataset.get('MobileTranscoder')) 88 | util.update_version(result, 'Hatena') 89 | return True 90 | 91 | if 'livedoor-Mobile-Gateway/' in ua: 92 | util.update_map(result, dataset.get('MobileTranscoder')) 93 | util.update_version(result, 'livedoor') 94 | return True 95 | 96 | return False 97 | -------------------------------------------------------------------------------- /scripts/dataset_yaml2py.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import os 6 | import yaml 7 | import datetime 8 | 9 | base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 10 | dataset_file = os.path.join(base_dir, 'woothee', 'dataset.yaml') 11 | py_file = os.path.join(base_dir, 'lib', 'woothee', 'dataset.py') 12 | 13 | generated_timestamp = datetime.datetime.now() 14 | try: 15 | import pwd 16 | generated_username = pwd.getpwuid(os.getuid())[0] 17 | except Exception: 18 | generated_username = os.environ.get('USERNAME') 19 | 20 | timestamp = '# GENERATED from dataset.yaml at %s by %s' % ( 21 | generated_timestamp.ctime(), generated_username) 22 | 23 | lines = [timestamp] 24 | 25 | fp = open(dataset_file, 'rb') 26 | try: 27 | for datasets in yaml.safe_load_all(fp): 28 | for dataset in datasets: 29 | label = dataset['label'] 30 | name = dataset['name'] 31 | type = dataset['type'] 32 | 33 | # obj = { 34 | # "label": "MSIE", 35 | # "name": "Internet Explorer", 36 | # "type": "browser" 37 | # } 38 | # obj[vendor] = 'Microsoft' 39 | # DATASET['MSIE'] = obj 40 | 41 | lines.append("obj = {'label': '%s', 'name': '%s', 'type': '%s'} # NOQA" 42 | % (label, name, type)) 43 | if type == 'browser': 44 | lines.append("obj['vendor'] = '%s'" % dataset['vendor']) 45 | elif type == 'os': 46 | lines.append("obj['category'] = '%s'" % dataset['category']) 47 | elif type == 'full': 48 | if dataset.get('vendor'): 49 | lines.append("obj['vendor'] = '%s'" % dataset['vendor']) 50 | lines.append("obj['category'] = '%s'" % dataset['category']) 51 | if dataset.get('os'): 52 | lines.append("obj['os'] = '%s'" % dataset['os']) 53 | else: 54 | raise Exception('unknown type ' + type) 55 | lines.append("DATASET[obj['label']] = obj") 56 | finally: 57 | fp.close() 58 | 59 | dynamic_lines = '\n '.join(lines) 60 | 61 | module_text = """\ 62 | # -*- coding: utf-8 -*- 63 | 64 | KEY_LABEL = 'label' 65 | KEY_NAME = 'name' 66 | KEY_TYPE = 'type' 67 | KEY_CATEGORY = 'category' 68 | KEY_OS = 'os' 69 | KEY_OS_VERSION = 'os_version' 70 | KEY_VENDOR = 'vendor' 71 | KEY_VERSION = 'version' 72 | 73 | TYPE_BROWSER = 'browser' 74 | TYPE_OS = 'os' 75 | TYPE_FULL = 'full' 76 | 77 | CATEGORY_PC = 'pc' 78 | CATEGORY_SMARTPHONE = 'smartphone' 79 | CATEGORY_MOBILEPHONE = 'mobilephone' 80 | CATEGORY_CRAWLER = 'crawler' 81 | CATEGORY_APPLIANCE = 'appliance' 82 | CATEGORY_MISC = 'misc' 83 | 84 | ATTRIBUTE_NAME = 'name' 85 | ATTRIBUTE_CATEGORY = 'category' 86 | ATTRIBUTE_OS = 'os' 87 | ATTRIBUTE_OS_VERSION = 'os_version' 88 | ATTRIBUTE_VENDOR = 'vendor' 89 | ATTRIBUTE_VERSION = 'version' 90 | 91 | VALUE_UNKNOWN = 'UNKNOWN' 92 | 93 | CATEGORY_LIST = [ 94 | CATEGORY_PC, CATEGORY_SMARTPHONE, CATEGORY_MOBILEPHONE, 95 | CATEGORY_CRAWLER, CATEGORY_APPLIANCE, CATEGORY_MISC, VALUE_UNKNOWN 96 | ] 97 | ATTRIBUTE_LIST = [ 98 | ATTRIBUTE_NAME, ATTRIBUTE_CATEGORY, ATTRIBUTE_OS, 99 | ATTRIBUTE_VENDOR, ATTRIBUTE_VERSION, ATTRIBUTE_OS_VERSION 100 | ] 101 | 102 | DATASET = {} 103 | 104 | 105 | def _init(): 106 | %s 107 | 108 | 109 | _init() 110 | 111 | 112 | def get(label): 113 | return DATASET[label] 114 | """ % dynamic_lines 115 | 116 | fp = open(py_file, 'w') 117 | try: 118 | fp.write(module_text) 119 | finally: 120 | fp.close() 121 | -------------------------------------------------------------------------------- /lib/woothee/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | from typing import Dict # noqa 5 | 6 | from . import dataset 7 | from . import browser 8 | from . import os 9 | from . import mobilephone 10 | from . import crawler 11 | from . import appliance 12 | from . import misc 13 | 14 | VERSION = (1, 12, 1) 15 | __version__ = '1.12.1' 16 | 17 | FILLED = { 18 | dataset.ATTRIBUTE_NAME: dataset.VALUE_UNKNOWN, 19 | dataset.ATTRIBUTE_CATEGORY: dataset.VALUE_UNKNOWN, 20 | dataset.ATTRIBUTE_OS: dataset.VALUE_UNKNOWN, 21 | dataset.ATTRIBUTE_OS_VERSION: dataset.VALUE_UNKNOWN, 22 | dataset.ATTRIBUTE_VERSION: dataset.VALUE_UNKNOWN, 23 | dataset.ATTRIBUTE_VENDOR: dataset.VALUE_UNKNOWN, 24 | } 25 | 26 | 27 | def parse(useragent): 28 | return fill_result(exec_parse(useragent)) 29 | 30 | 31 | def is_crawler(useragent): 32 | return useragent and useragent != '-' and try_crawler(useragent, {}) 33 | 34 | 35 | def exec_parse(useragent): 36 | result = {} # type: Dict[str, str] 37 | 38 | if not useragent or useragent == '-': 39 | return result 40 | 41 | if try_crawler(useragent, result): 42 | return result 43 | 44 | if try_browser(useragent, result): 45 | try_os(useragent, result) 46 | return result 47 | 48 | if try_mobilephone(useragent, result): 49 | return result 50 | 51 | if try_appliance(useragent, result): 52 | return result 53 | 54 | if try_misc(useragent, result): 55 | return result 56 | 57 | # browser unknown. check os only 58 | if try_os(useragent, result): 59 | return result 60 | 61 | try_rare_cases(useragent, result) 62 | 63 | return result 64 | 65 | 66 | def try_crawler(useragent, result): 67 | if crawler.challenge_google(useragent, result): 68 | return True 69 | if crawler.challenge_crawlers(useragent, result): 70 | return True 71 | return False 72 | 73 | 74 | def try_browser(useragent, result): 75 | if browser.challenge_msie(useragent, result): 76 | return True 77 | if browser.challenge_vivaldi(useragent, result): 78 | return True 79 | if browser.challenge_yandexbrowser(useragent, result): 80 | return True 81 | if browser.challenge_samsung(useragent, result): 82 | return True 83 | if browser.challenge_safari_chrome(useragent, result): 84 | return True 85 | if browser.challenge_firefox(useragent, result): 86 | return True 87 | if browser.challenge_opera(useragent, result): 88 | return True 89 | if browser.challenge_webview(useragent, result): 90 | return True 91 | return False 92 | 93 | 94 | def try_os(useragent, result): 95 | if os.challenge_windows(useragent, result): 96 | return True 97 | # OSX PC and iOS devices (strict check) 98 | if os.challenge_osx(useragent, result): 99 | return True 100 | # Linux PC and Android 101 | if os.challenge_linux(useragent, result): 102 | return True 103 | # all useragents matches /(iPhone|iPad|iPod|Android|BlackBerry)/ 104 | if os.challenge_smartphone(useragent, result): 105 | return True 106 | # mobile phones like KDDI-.* 107 | if os.challenge_mobilephone(useragent, result): 108 | return True 109 | # Nintendo DSi/Wii with Opera 110 | if os.challenge_appliance(useragent, result): 111 | return True 112 | # Win98, BSD, classic MacOS, ... 113 | if os.challenge_misc(useragent, result): 114 | return True 115 | return False 116 | 117 | 118 | def try_mobilephone(useragent, result): 119 | if mobilephone.challenge_docomo(useragent, result): 120 | return True 121 | if mobilephone.challenge_au(useragent, result): 122 | return True 123 | if mobilephone.challenge_softbank(useragent, result): 124 | return True 125 | if mobilephone.challenge_willcom(useragent, result): 126 | return True 127 | if mobilephone.challenge_misc(useragent, result): 128 | return True 129 | return False 130 | 131 | 132 | def try_appliance(useragent, result): 133 | if appliance.challenge_playstation(useragent, result): 134 | return True 135 | if appliance.challenge_nintendo(useragent, result): 136 | return True 137 | if appliance.challenge_digitaltv(useragent, result): 138 | return True 139 | return False 140 | 141 | 142 | def try_misc(useragent, result): 143 | if misc.challenge_desktoptools(useragent, result): 144 | return True 145 | return False 146 | 147 | 148 | def try_rare_cases(useragent, result): 149 | if misc.challenge_smartphone_patterns(useragent, result): 150 | return True 151 | if browser.challenge_sleipnir(useragent, result): 152 | return True 153 | if misc.challenge_http_library(useragent, result): 154 | return True 155 | if misc.challenge_maybe_rss_reader(useragent, result): 156 | return True 157 | if crawler.challenge_maybe_crawler(useragent, result): 158 | return True 159 | return False 160 | 161 | 162 | def fill_result(result): 163 | merged = FILLED.copy() 164 | merged.update(result) 165 | return merged 166 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Woothee python 2 | ============== 3 | 4 | |github actions| |coveralls| |version| |license| 5 | 6 | The Python implementation of Project Woothee, which is multi-language 7 | user-agent strings parsers. 8 | 9 | https://github.com/woothee/woothee 10 | 11 | Installation 12 | ------------ 13 | 14 | :: 15 | 16 | $ pip install woothee 17 | 18 | Usage 19 | ----- 20 | 21 | Parsing user-agent 22 | ~~~~~~~~~~~~~~~~~~ 23 | 24 | .. code:: python 25 | 26 | import woothee 27 | woothee.parse("Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)") 28 | # => {'name': 'Internet Explorer', 'category': 'pc', 'os': 'Windows 7', 'version': '8.0', 'vendor': 'Microsoft', 'os_version': 'NT 6.1'} 29 | 30 | Parse user-agent string and returns a object with keys ``name``, ``category``, ``os``, ``version``, ``vendor`` and ``os_version``. 31 | 32 | For unknown user-agent (or partially failed to parse), result objects 33 | may have value 'UNKNOWN'. 34 | 35 | * ``category`` 36 | 37 | * labels of user terminal type, one of 'pc', 'smartphone', 'mobilephone', 'appliance', 'crawler' or 'misc' (or 'UNKNOWN') 38 | 39 | * ``name`` 40 | 41 | * the name of browser, like 'Internet Explorer', 'Firefox', 'GoogleBot' 42 | 43 | * ``version`` 44 | 45 | * version string, like '8.0' for IE, '9.0.1' for Firefix, '0.2.149.27' for Chrome, and so on 46 | 47 | * ``os`` 48 | 49 | * ex: 'Windows 7', 'Mac OSX', 'iPhone', 'iPad', 'Android' 50 | * This field used to indicate cellar phone carrier for category 'mobilephone' 51 | 52 | * ``vendor`` 53 | 54 | * optional field, shows browser vendor 55 | 56 | * ``os_version`` 57 | 58 | * optional field, shows version of operating systems 59 | 60 | Finding crawlers (almost all, not all) in fast 61 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 62 | 63 | .. code:: python 64 | 65 | woothee.is_crawler('Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)') 66 | # => False 67 | 68 | Try to see useragent's category is 'crawler' or not, by casual(fast) 69 | method. Minor case of crawlers is not tested in this method. To check 70 | crawler strictly, use ``woothee.parse(str)['category'] == 'crawler'``. 71 | 72 | Authors 73 | ------- 74 | 75 | * UEDA Tetsuhiro (najeira) 76 | * TAGOMORI Satoshi tagomoris@gmail.com 77 | * tell-k ffk2005@gmail.com 78 | 79 | License 80 | ------- 81 | 82 | Copyright 2012- TAGOMORI Satoshi (tagomoris) 83 | 84 | Licensed under the Apache License, Version 2.0 (the "License"); you may 85 | not use this file except in compliance with the License. You may obtain 86 | a copy of the License at 87 | 88 | http://www.apache.org/licenses/LICENSE-2.0 89 | 90 | Unless required by applicable law or agreed to in writing, software 91 | distributed under the License is distributed on an "AS IS" BASIS, 92 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 93 | See the License for the specific language governing permissions and 94 | limitations under the License. 95 | 96 | History 97 | ------- 98 | 99 | 1.13.0(not publish in PyPI) 100 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 | 102 | * `#27 support woothee v1.13.0 `_. 103 | 104 | 1.12.0, 1.12.1(not publish in PyPI) 105 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 106 | 107 | * **drop support Python2 and Python3.5** 108 | * `#23 support woothee v1.12 `_. 109 | 110 | * Add supoort samsugn browser 111 | * Add support Google bot 112 | 113 | 1.11.0(not publish in PyPI) 114 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 115 | 116 | * `#18 support woothee v1.11 `_. 117 | 118 | * Add support GSA 119 | 120 | 1.10.1(Aug 8, 2019) 121 | ~~~~~~~~~~~~~~~~~~~~ 122 | 123 | * `#16 drop support Python3.4 `_. 124 | 125 | 1.10.0(Apr 14, 2019) 126 | ~~~~~~~~~~~~~~~~~~~~ 127 | 128 | * `#15 Support for v1.10.0 `_ 129 | 130 | 1.8.0(Jul 5, 2018) 131 | ~~~~~~~~~~~~~~~~~~~~ 132 | 133 | * `#14 Drop support Python 2.6 and Python 3.2 `_. 134 | * `#13 Add support for Yandex Browser `_. Thanks to hhatto . 135 | 136 | 1.7.0(May 7, 2017) 137 | ~~~~~~~~~~~~~~~~~~~~ 138 | 139 | * `#12 Release v1.7.0 `_ 140 | * Add support for WebView on Android. 141 | * Add support for curl. 142 | * Add support for trendictionbot crawler. 143 | * Add support for Yeti 1.1. 144 | * Compatible with Python 3.6. 145 | * **Caution. We'll drop Python2.6 and python3.2 support in the next version.** 146 | 147 | 1.5.0(Aug 16, 2016) 148 | ~~~~~~~~~~~~~~~~~~~~ 149 | 150 | * `#11 Support BingPreview `_ Thanks to taise. 151 | 152 | 1.4.0(May 17, 2016) 153 | ~~~~~~~~~~~~~~~~~~~~ 154 | 155 | * Add support for Vivaldi 156 | 157 | 1.3.0(Jan 7, 2016) 158 | ~~~~~~~~~~~~~~~~~~~~ 159 | 160 | * Add support for Firefox for iOS 161 | 162 | 1.2.0(Aug 16, 2015) 163 | ~~~~~~~~~~~~~~~~~~~~ 164 | 165 | * Add support for Twitterbot 166 | * Add support for webviews of mobile devices 167 | * Add support for Windows 10 and Edge browser 168 | * Add support for BlackBerry10 169 | 170 | 1.1.0(Mar 1, 2015) 171 | ~~~~~~~~~~~~~~~~~~~~ 172 | * `#9 Test blank cases `_ Thanks to yuya-takeyama. 173 | 174 | 1.0.0(Jan 20, 2015) 175 | ~~~~~~~~~~~~~~~~~~~~ 176 | * First release 177 | 178 | 179 | .. |github actions| image:: https://github.com/woothee/woothee-python/workflows/Python%20package/badge.svg 180 | :target: https://github.com/woothee/woothee-python/actions 181 | :alt: GitHub Actions build status 182 | 183 | .. |coveralls| image:: https://coveralls.io/repos/woothee/woothee-python/badge.png 184 | :target: https://coveralls.io/r/woothee/woothee-python 185 | :alt: coveralls.io 186 | 187 | .. |version| image:: https://img.shields.io/pypi/v/woothee.svg 188 | :target: http://pypi.python.org/pypi/woothee/ 189 | :alt: latest version 190 | 191 | .. |license| image:: https://img.shields.io/pypi/l/woothee.svg 192 | :target: http://pypi.python.org/pypi/woothee/ 193 | :alt: license 194 | -------------------------------------------------------------------------------- /lib/woothee/browser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import re 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_msie(ua, result): 11 | if 'compatible; MSIE' not in ua and 'Trident/' not in ua\ 12 | and 'IEMobile' not in ua: 13 | return False 14 | version = dataset.VALUE_UNKNOWN 15 | msie = re.search(r'MSIE ([.0-9]+);', ua) 16 | trident = re.search( 17 | r'Trident\/([.0-9]+);', ua) 18 | tridentVersion = re.search(r' rv:([.0-9]+)', ua) 19 | iemobile = re.search(r'IEMobile\/([.0-9]+);', ua) 20 | 21 | if msie: 22 | version = msie.group(1) 23 | elif trident and tridentVersion: 24 | version = tridentVersion.group(1) 25 | elif iemobile: 26 | version = iemobile.group(1) 27 | 28 | util.update_map(result, dataset.get('MSIE')) 29 | util.update_version(result, version) 30 | return True 31 | 32 | 33 | def challenge_yandexbrowser(ua, result): 34 | if 'YaBrowser/' not in ua: 35 | return False 36 | obj = re.search(r'YaBrowser/(\d+\.\d+\.\d+\.\d+)', ua) 37 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 38 | util.update_map(result, dataset.get('YaBrowser')) 39 | util.update_version(result, version) 40 | return True 41 | 42 | 43 | def challenge_safari_chrome(ua, result): 44 | if 'Safari/' not in ua: 45 | return False 46 | if 'Chrome' in ua and 'wv' in ua: 47 | return False 48 | 49 | version = dataset.VALUE_UNKNOWN 50 | 51 | # Edge 52 | obj = re.search(r'(?:Edge|Edg|EdgiOS|EdgA)\/([.0-9]+)', ua) 53 | if obj: 54 | version = obj.group(1) 55 | util.update_map(result, dataset.get('Edge')) 56 | util.update_version(result, version) 57 | return True 58 | 59 | obj = re.search(r'FxiOS\/([.0-9]+)', ua) 60 | if obj: 61 | version = obj.group(1) 62 | util.update_map(result, dataset.get('Firefox')) 63 | util.update_version(result, version) 64 | return True 65 | 66 | obj = re.search('(?:Chrome|CrMo|CriOS)/([.0-9]+)', ua) 67 | if obj: 68 | chromeVersion = obj.group(1) 69 | obj = re.search('OPR/([.0-9]+)', ua) 70 | if obj: 71 | # Opera (blink) 72 | version = obj.group(1) 73 | util.update_map(result, dataset.get('Opera')) 74 | util.update_version(result, version) 75 | return True 76 | 77 | # Chrome 78 | util.update_map(result, dataset.get('Chrome')) 79 | util.update_version(result, chromeVersion) 80 | return True 81 | 82 | # GSA 83 | obj = re.search('GSA/([.0-9]+)', ua) 84 | if obj: 85 | version = obj.group(1) 86 | util.update_map(result, dataset.get('GSA')) 87 | util.update_version(result, version) 88 | return True 89 | 90 | # Safari 91 | obj = re.search('Version/([.0-9]+)', ua) 92 | if obj: 93 | version = obj.group(1) 94 | util.update_map(result, dataset.get('Safari')) 95 | util.update_version(result, version) 96 | return True 97 | 98 | 99 | def challenge_firefox(ua, result): 100 | if 'Firefox/' not in ua: 101 | return False 102 | obj = re.search('Firefox/([.0-9]+)', ua) 103 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 104 | util.update_map(result, dataset.get('Firefox')) 105 | util.update_version(result, version) 106 | return True 107 | 108 | 109 | def challenge_opera(ua, result): 110 | if 'Opera' not in ua: 111 | return False 112 | obj = re.search('Version/([.0-9]+)', ua) 113 | version = dataset.VALUE_UNKNOWN 114 | if obj: 115 | version = obj.group(1) 116 | else: 117 | obj = re.search('Opera[/ ]([.0-9]+)', ua) 118 | if obj: 119 | version = obj.group(1) 120 | util.update_map(result, dataset.get('Opera')) 121 | util.update_version(result, version) 122 | return True 123 | 124 | 125 | def challenge_webview(ua, result): 126 | 127 | # Android(Lollipop and Above) 128 | if 'Chrome' in ua and 'wv' in ua: 129 | obj = re.search('Version/([.0-9]+)', ua) 130 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 131 | util.update_map(result, dataset.get('Webview')) 132 | util.update_version(result, version) 133 | return True 134 | 135 | # iOS 136 | obj = re.search('iP(?:hone;|ad;|od) .*like Mac OS X', ua) 137 | if not obj or 'Safari/' in ua: 138 | return False 139 | 140 | obj = re.search(r'Version\/([.0-9]+)', ua) 141 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 142 | util.update_map(result, dataset.get('Webview')) 143 | util.update_version(result, version) 144 | return True 145 | 146 | 147 | def challenge_sleipnir(ua, result): 148 | if 'Sleipnir/' not in ua: 149 | return False 150 | obj = re.search('Sleipnir/([.0-9]+)', ua) 151 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 152 | util.update_map(result, dataset.get('Sleipnir')) 153 | util.update_version(result, version) 154 | # Sleipnir's user-agent doesn't contain Windows version, 155 | # so put 'Windows UNKNOWN Ver'. 156 | # Sleipnir is IE component browser, so for Windows only. 157 | win = dataset.get('Win') 158 | util.update_category(result, win[dataset.KEY_CATEGORY]) 159 | util.update_os(result, win[dataset.KEY_NAME]) 160 | return True 161 | 162 | 163 | def challenge_vivaldi(ua, result): 164 | if 'Vivaldi/' not in ua: 165 | return False 166 | 167 | obj = re.search('Vivaldi/([.0-9]+)', ua) 168 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 169 | util.update_map(result, dataset.get('Vivaldi')) 170 | util.update_version(result, version) 171 | return True 172 | 173 | 174 | def challenge_samsung(ua, result): 175 | if 'SamsungBrowser/' not in ua: 176 | return False 177 | 178 | obj = re.search('SamsungBrowser/([.0-9]+)', ua) 179 | version = obj.group(1) if obj else dataset.VALUE_UNKNOWN 180 | util.update_map(result, dataset.get('SamsungBrowser')) 181 | util.update_version(result, version) 182 | return True 183 | -------------------------------------------------------------------------------- /lib/woothee/crawler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import re 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_google(ua, result): 11 | if 'Google' not in ua: 12 | return False 13 | 14 | if 'compatible; Googlebot' in ua: 15 | if 'compatible; Googlebot-Mobile' in ua: 16 | util.update_map(result, dataset.get('GoogleBotMobile')) 17 | else: 18 | util.update_map(result, dataset.get('GoogleBot')) 19 | return True 20 | 21 | if 'compatible; AdsBot-Google-Mobile;' in ua: 22 | util.update_map(result, dataset.get('AdsBotGoogleMobile')) 23 | return True 24 | 25 | if ua.startswith('AdsBot-Google'): 26 | util.update_map(result, dataset.get('AdsBotGoogle')) 27 | return True 28 | 29 | if 'Googlebot-Image/' in ua: 30 | util.update_map(result, dataset.get('GoogleBot')) 31 | return True 32 | 33 | if 'Mediapartners-Google' in ua: 34 | if 'compatible; Mediapartners-Google' in ua\ 35 | or ua == 'Mediapartners-Google': 36 | util.update_map(result, dataset.get('GoogleMediaPartners')) 37 | return True 38 | 39 | if 'Feedfetcher-Google;' in ua: 40 | util.update_map(result, dataset.get('GoogleFeedFetcher')) 41 | return True 42 | 43 | if 'AppEngine-Google' in ua: 44 | util.update_map(result, dataset.get('GoogleAppEngine')) 45 | return True 46 | 47 | if 'Google Web Preview' in ua: 48 | util.update_map(result, dataset.get('GoogleWebPreview')) 49 | return True 50 | 51 | return False 52 | 53 | 54 | def challenge_crawlers(ua, result): 55 | if 'Yahoo' in ua or 'help.yahoo.co.jp/help/jp/' in ua\ 56 | or 'listing.yahoo.co.jp/support/faq/' in ua: 57 | if 'compatible; Yahoo! Slurp' in ua: 58 | util.update_map(result, dataset.get('YahooSlurp')) 59 | return True 60 | 61 | if 'YahooFeedSeekerJp' in ua or 'YahooFeedSeekerBetaJp' in ua: 62 | util.update_map(result, dataset.get('YahooJP')) 63 | return True 64 | 65 | if 'crawler (http://listing.yahoo.co.jp/support/faq/' in ua\ 66 | or 'crawler (http://help.yahoo.co.jp/help/jp/' in ua: 67 | util.update_map(result, dataset.get('YahooJP')) 68 | return True 69 | 70 | if 'Y!J-BRZ/YATSHA crawler' in ua or 'Y!J-BRY/YATSH crawler' in ua: 71 | util.update_map(result, dataset.get('YahooJP')) 72 | return True 73 | 74 | if 'Yahoo Pipes' in ua: 75 | util.update_map(result, dataset.get('YahooPipes')) 76 | return True 77 | 78 | if 'msnbot' in ua: 79 | util.update_map(result, dataset.get('msnbot')) 80 | return True 81 | 82 | if 'bingbot' in ua: 83 | if 'compatible; bingbot' in ua: 84 | util.update_map(result, dataset.get('bingbot')) 85 | return True 86 | 87 | if 'BingPreview' in ua: 88 | util.update_map(result, dataset.get('BingPreview')) 89 | return True 90 | 91 | if 'Baidu' in ua: 92 | if 'compatible; Baiduspider' in ua\ 93 | or 'Baiduspider+' in ua or 'Baiduspider-image+' in ua: 94 | util.update_map(result, dataset.get('Baiduspider')) 95 | return True 96 | 97 | if 'Yeti' in ua: 98 | if 'http://help.naver.com/robots' in ua\ 99 | or 'http://naver.me/bot' in ua\ 100 | or 'http://help.naver.com/support/robots.html' in ua: 101 | 102 | util.update_map(result, dataset.get('Yeti')) 103 | return True 104 | 105 | if 'FeedBurner/' in ua: 106 | util.update_map(result, dataset.get('FeedBurner')) 107 | return True 108 | 109 | if 'facebookexternalhit' in ua: 110 | util.update_map(result, dataset.get('facebook')) 111 | return True 112 | 113 | if 'Twitterbot/' in ua: 114 | util.update_map(result, dataset.get('twitter')) 115 | return True 116 | 117 | if 'ichiro' in ua: 118 | if 'http://help.goo.ne.jp/door/crawler.html' in ua\ 119 | or 'compatible; ichiro/mobile goo;' in ua: 120 | util.update_map(result, dataset.get('goo')) 121 | return True 122 | if 'gooblogsearch/' in ua: 123 | util.update_map(result, dataset.get('goo')) 124 | return True 125 | 126 | if 'Apple-PubSub' in ua: 127 | util.update_map(result, dataset.get('ApplePubSub')) 128 | return True 129 | 130 | if '(www.radian6.com/crawler)' in ua: 131 | util.update_map(result, dataset.get('radian6')) 132 | return True 133 | 134 | if 'Genieo/' in ua: 135 | util.update_map(result, dataset.get('Genieo')) 136 | return True 137 | 138 | if 'labs.topsy.com/butterfly/' in ua: 139 | util.update_map(result, dataset.get('topsyButterfly')) 140 | return True 141 | 142 | if 'rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot' in ua: 143 | util.update_map(result, dataset.get('rogerbot')) 144 | return True 145 | 146 | if 'compatible; AhrefsBot/' in ua: 147 | util.update_map(result, dataset.get('AhrefsBot')) 148 | return True 149 | 150 | if 'livedoor FeedFetcher' in ua or 'Fastladder FeedFetcher' in ua: 151 | util.update_map(result, dataset.get('livedoorFeedFetcher')) 152 | return True 153 | 154 | if 'Hatena ' in ua: 155 | if 'Hatena Antenna' in ua\ 156 | or 'Hatena Pagetitle Agent' in ua or 'Hatena Diary RSS' in ua: 157 | util.update_map(result, dataset.get('Hatena')) 158 | return True 159 | 160 | if 'mixi-check' in ua or 'mixi-crawler' in ua or 'mixi-news-crawler' in ua: 161 | util.update_map(result, dataset.get('mixi')) 162 | return True 163 | 164 | if 'Indy Library' in ua: 165 | if 'compatible; Indy Library' in ua: 166 | util.update_map(result, dataset.get('IndyLibrary')) 167 | return True 168 | 169 | if 'trendictionbot' in ua: 170 | util.update_map(result, dataset.get('trendictionbot')) 171 | return True 172 | 173 | return False 174 | 175 | 176 | def challenge_maybe_crawler(ua, result): 177 | if re.search('(bot|crawler|spider)(?:[-_ ./;@()]|$)', ua, re.I): 178 | util.update_map(result, dataset.get('VariousCrawler')) 179 | return True 180 | 181 | regex = '(?:Rome Client |UnwindFetchor/|ia_archiver |Summify |PostRank/)' 182 | if re.search(regex, ua) or 'ASP-Ranker Feed Crawler' in ua: 183 | util.update_map(result, dataset.get('VariousCrawler')) 184 | return True 185 | 186 | if re.search('(feed|web) ?parser', ua, re.I): 187 | util.update_map(result, dataset.get('VariousCrawler')) 188 | return True 189 | 190 | if re.search('watch ?dog', ua, re.I): 191 | util.update_map(result, dataset.get('VariousCrawler')) 192 | return True 193 | 194 | return False 195 | -------------------------------------------------------------------------------- /lib/woothee/os.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | 5 | import re 6 | from . import dataset 7 | from . import util 8 | 9 | 10 | def challenge_windows(ua, result): 11 | if 'Windows' not in ua: 12 | return False 13 | 14 | # Xbox Series 15 | if 'Xbox' in ua: 16 | if 'Xbox; Xbox One)' in ua: 17 | util.update_map(result, dataset.get("XboxOne")) 18 | else: 19 | util.update_map(result, dataset.get("Xbox360")) 20 | # overwrite browser detections as appliance 21 | return True 22 | 23 | data = dataset.get('Win') 24 | obj = re.search('Windows ([ .a-zA-Z0-9]+)[;\\)]', ua) 25 | if not obj: 26 | # Windows, but version unknown 27 | util.update_category(result, data[dataset.KEY_CATEGORY]) 28 | util.update_os(result, data[dataset.KEY_NAME]) 29 | return True 30 | 31 | version = obj.group(1) 32 | winphone_regex = re.compile(r"^Phone(?: OS)? ([.0-9]+)") 33 | winphone_matched = winphone_regex.search(version) 34 | 35 | if version == 'NT 10.0': 36 | data = dataset.get('Win10') 37 | elif version == 'NT 6.3': 38 | data = dataset.get('Win8.1') 39 | elif version == 'NT 6.2': 40 | data = dataset.get('Win8') 41 | elif version == 'NT 6.1': 42 | data = dataset.get('Win7') 43 | elif version == 'NT 6.0': 44 | data = dataset.get('WinVista') 45 | elif version == 'NT 5.1': 46 | data = dataset.get('WinXP') 47 | elif winphone_matched: 48 | version = winphone_matched.group(1) 49 | data = dataset.get('WinPhone') 50 | elif version == 'NT 5.0': 51 | data = dataset.get('Win2000') 52 | elif version == 'NT 4.0': 53 | data = dataset.get('WinNT4') 54 | elif version == '98': 55 | # wow, WinMe is shown as 'Windows 98; Win9x 4.90', fxxxk 56 | data = dataset.get('Win98') 57 | elif version == '95': 58 | data = dataset.get('Win95') 59 | elif version == 'CE': 60 | data = dataset.get('WinCE') 61 | 62 | util.update_category(result, data[dataset.KEY_CATEGORY]) 63 | util.update_os(result, data[dataset.KEY_NAME]) 64 | util.update_os_version(result, version) 65 | return True 66 | 67 | 68 | def challenge_osx(ua, result): 69 | if 'Mac OS X' not in ua: 70 | return False 71 | data = dataset.get('OSX') 72 | version = None 73 | if 'like Mac OS X' in ua: 74 | if 'iPhone;' in ua: 75 | data = dataset.get('iPhone') 76 | elif 'iPad;' in ua: 77 | data = dataset.get('iPad') 78 | elif 'iPod' in ua: 79 | data = dataset.get('iPod') 80 | 81 | regex = re.compile( 82 | r"; CPU(?: iPhone)? OS (\d+_\d+(?:_\d+)?) like Mac OS X") 83 | m = regex.search(ua) 84 | if m: 85 | version = m.group(1).replace('_', '.') 86 | else: 87 | regex = re.compile(r"Mac OS X (10[._]\d+(?:[._]\d+)?)(?:\)|;)") 88 | m = regex.search(ua) 89 | if m: 90 | version = m.group(1).replace('_', '.') 91 | 92 | util.update_category(result, data[dataset.KEY_CATEGORY]) 93 | util.update_os(result, data[dataset.KEY_NAME]) 94 | if version: 95 | util.update_os_version(result, version) 96 | 97 | return True 98 | 99 | 100 | def challenge_linux(ua, result): 101 | if 'Linux' not in ua: 102 | return False 103 | 104 | data = None 105 | os_version = None 106 | if 'Android' in ua: 107 | data = dataset.get('Android') 108 | regex = re.compile(r"Android[- ](\d+(?:\.\d+(?:\.\d+)?)?)") 109 | m = regex.search(ua) 110 | if m: 111 | os_version = m.group(1) 112 | else: 113 | data = dataset.get('Linux') 114 | util.update_category(result, data[dataset.KEY_CATEGORY]) 115 | util.update_os(result, data[dataset.KEY_NAME]) 116 | if os_version: 117 | util.update_os_version(result, os_version) 118 | return True 119 | 120 | 121 | def challenge_smartphone(ua, result): 122 | data = None 123 | os_version = None 124 | if 'iPhone' in ua: 125 | data = dataset.get('iPhone') 126 | elif 'iPad' in ua: 127 | data = dataset.get('iPad') 128 | elif 'iPod' in ua: 129 | data = dataset.get('iPod') 130 | elif 'Android' in ua: 131 | data = dataset.get('Android') 132 | regex = re.compile(r"Android[- ](\d+(?:\.\d+(?:\.\d+)?)?)") 133 | m = regex.search(ua) 134 | if m: 135 | os_version = m.group(1) 136 | elif 'CFNetwork' in ua: 137 | data = dataset.get('iOS') 138 | elif 'BB10' in ua: 139 | data = dataset.get('BlackBerry10') 140 | regex = re.compile(r"BB10(?:.+)Version\/([.0-9]+)") 141 | m = regex.search(ua) 142 | if m: 143 | os_version = m.group(1) 144 | elif 'BlackBerry' in ua: 145 | data = dataset.get('BlackBerry') 146 | regex = re.compile(r"BlackBerry(?:\d+)\/([.0-9]+) ") 147 | m = regex.search(ua) 148 | if m: 149 | os_version = m.group(1) 150 | 151 | if result.get(dataset.KEY_NAME) ==\ 152 | dataset.get('Firefox')[dataset.KEY_NAME]: 153 | 154 | # Firefox OS specific pattern 155 | # http://lawrencemandel.com/2012/07/27/decision-made-firefox-os-user-agent-string/ 156 | # https://github.com/woothee/woothee/issues/2 157 | regex = re.compile( 158 | r"^Mozilla\/[.0-9]+ \((?:Mobile|Tablet);(?:.*;)?" 159 | r" rv:([.0-9]+)\) Gecko\/[.0-9]+ Firefox\/[.0-9]+$" 160 | ) 161 | m = regex.search(ua) 162 | if m: 163 | data = dataset.get('FirefoxOS') 164 | os_version = m.group(1) 165 | 166 | if not data: 167 | return False 168 | 169 | util.update_category(result, data[dataset.KEY_CATEGORY]) 170 | util.update_os(result, data[dataset.KEY_NAME]) 171 | if os_version: 172 | util.update_os_version(result, os_version) 173 | return True 174 | 175 | 176 | def challenge_mobilephone(ua, result): 177 | if 'KDDI-' in ua: 178 | obj = re.search('KDDI-([^- /;()"\']+)', ua) 179 | if obj: 180 | term = obj.group(1) 181 | data = dataset.get('au') 182 | util.update_category(result, data[dataset.KEY_CATEGORY]) 183 | util.update_os(result, data[dataset.KEY_OS]) 184 | util.update_version(result, term) 185 | return True 186 | if 'WILLCOM' in ua or 'DDIPOCKET' in ua: 187 | obj = re.search('(?:WILLCOM|DDIPOCKET);[^/]+/([^ /;()]+)', ua) 188 | if obj: 189 | term = obj.group(1) 190 | data = dataset.get('willcom') 191 | util.update_category(result, data[dataset.KEY_CATEGORY]) 192 | util.update_os(result, data[dataset.KEY_OS]) 193 | util.update_version(result, term) 194 | return True 195 | if 'SymbianOS' in ua: 196 | data = dataset.get('SymbianOS') 197 | util.update_category(result, data[dataset.KEY_CATEGORY]) 198 | util.update_os(result, data[dataset.KEY_OS]) 199 | return True 200 | if 'Google Wireless Transcoder' in ua: 201 | util.update_map(result, dataset.get('MobileTranscoder')) 202 | util.update_version(result, 'Google') 203 | return True 204 | if 'Naver Transcoder' in ua: 205 | util.update_map(result, dataset.get('MobileTranscoder')) 206 | util.update_version(result, 'Naver') 207 | return True 208 | return False 209 | 210 | 211 | def challenge_appliance(ua, result): 212 | if 'Nintendo DSi;' in ua: 213 | data = dataset.get('NintendoDSi') 214 | util.update_category(result, data[dataset.KEY_CATEGORY]) 215 | util.update_os(result, data[dataset.KEY_OS]) 216 | return True 217 | if 'Nintendo Wii;' in ua: 218 | data = dataset.get('NintendoWii') 219 | util.update_category(result, data[dataset.KEY_CATEGORY]) 220 | util.update_os(result, data[dataset.KEY_OS]) 221 | return True 222 | return False 223 | 224 | 225 | def challenge_misc(ua, result): 226 | data = None 227 | os_version = None 228 | if '(Win98;' in ua: 229 | data = dataset.get('Win98') 230 | os_version = '98' 231 | elif 'Macintosh; U; PPC;' in ua: 232 | data = dataset.get('MacOS') 233 | regex = re.compile(r"rv:(\d+\.\d+\.\d+)") 234 | m = regex.search(ua) 235 | if m: 236 | os_version = m.group(1) 237 | elif 'Mac_PowerPC' in ua: 238 | data = dataset.get('MacOS') 239 | elif 'X11; FreeBSD ' in ua: 240 | data = dataset.get('BSD') 241 | regex = re.compile(r"FreeBSD ([^;\)]+);") 242 | m = regex.search(ua) 243 | if m: 244 | os_version = m.group(1) 245 | elif 'X11; CrOS ' in ua: 246 | data = dataset.get('ChromeOS') 247 | regex = re.compile(r"CrOS ([^\)]+)\)") 248 | m = regex.search(ua) 249 | if m: 250 | os_version = m.group(1) 251 | else: 252 | return False 253 | util.update_category(result, data[dataset.KEY_CATEGORY]) 254 | util.update_os(result, data[dataset.KEY_NAME]) 255 | if os_version: 256 | util.update_os_version(result, os_version) 257 | return True 258 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | from __future__ import (division, print_function, 3 | absolute_import, unicode_literals) 4 | import os 5 | import sys 6 | 7 | import yaml 8 | import pytest 9 | 10 | from typing import Dict # NOQA 11 | 12 | BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 13 | sys.path.insert(0, os.path.join(BASE_PATH, 'lib')) # type: ignore 14 | TESTSET_DIR = os.path.join(BASE_PATH, 'woothee', 'testsets') 15 | 16 | TARGETS = [ 17 | ['appliance.yaml', 'Appliance'], 18 | ['blank.yaml', 'Blank'], 19 | ['crawler.yaml', 'Crawler'], 20 | ['crawler_google.yaml', 'Crawler/Google'], 21 | ['crawler_nonmajor.yaml', 'Crawler/NonMajor'], 22 | ['misc.yaml', 'Misc'], 23 | ['mobilephone_au.yaml', 'MobilePhone/au'], 24 | ['mobilephone_docomo.yaml', 'MobilePhone/docomo'], 25 | ['mobilephone_misc.yaml', 'MobilePhone/misc'], 26 | ['mobilephone_softbank.yaml', 'MobilePhone/softbank'], 27 | ['mobilephone_willcom.yaml', 'MobilePhone/willcom'], 28 | ['pc_lowpriority.yaml', 'PC/LowPriority'], 29 | ['pc_misc.yaml', 'PC/Misc'], 30 | ['pc_windows.yaml', 'PC/Windows'], 31 | ['smartphone_android.yaml', 'SmartPhone/android'], 32 | ['smartphone_ios.yaml', 'SmartPhone/ios'], 33 | ['smartphone_misc.yaml', 'SmartPhone/misc'], 34 | ] 35 | 36 | 37 | def gen_test_cases(): 38 | for filename, groupname in TARGETS: 39 | with open(os.path.join(TESTSET_DIR, filename), 'rb') as fp: 40 | for test_cases in yaml.safe_load_all(fp): 41 | for test_case in test_cases: 42 | yield groupname, test_case 43 | 44 | 45 | class TestDataset: 46 | 47 | def test_contains_constants(self): 48 | from woothee import dataset 49 | assert dataset.ATTRIBUTE_NAME == 'name' 50 | 51 | def test_contains_attribute_list(self): 52 | from woothee import dataset 53 | assert dataset.ATTRIBUTE_LIST == [ 54 | dataset.ATTRIBUTE_NAME, 55 | dataset.ATTRIBUTE_CATEGORY, 56 | dataset.ATTRIBUTE_OS, 57 | dataset.ATTRIBUTE_VENDOR, 58 | dataset.ATTRIBUTE_VERSION, 59 | dataset.ATTRIBUTE_OS_VERSION 60 | ] 61 | 62 | def test_contains_category_list(self): 63 | from woothee import dataset 64 | 65 | assert dataset.CATEGORY_LIST == [ 66 | dataset.CATEGORY_PC, 67 | dataset.CATEGORY_SMARTPHONE, 68 | dataset.CATEGORY_MOBILEPHONE, 69 | dataset.CATEGORY_CRAWLER, 70 | dataset.CATEGORY_APPLIANCE, 71 | dataset.CATEGORY_MISC, 72 | dataset.VALUE_UNKNOWN 73 | ] 74 | 75 | 76 | class TestParse: 77 | 78 | @pytest.fixture() 79 | def target(self): 80 | from woothee import parse 81 | return parse 82 | 83 | @pytest.mark.parametrize(('groupname', 'test_case'), gen_test_cases()) 84 | def test_testsets(self, target, groupname, test_case): 85 | ua_string = test_case.pop('target') 86 | expected = test_case 87 | 88 | parsed = target(ua_string) 89 | 90 | # Check only the attrs exists in the expected(=test_case). 91 | actual = {k: v for k, v in parsed.items() if k in expected} 92 | msg = '{0} test({1})'.format(groupname, ua_string) 93 | assert actual == expected, msg 94 | 95 | @pytest.mark.parametrize(('expected', 'ua_string'), [ 96 | # 48 line in lib/woothee/appliance.py 97 | ( 98 | { 99 | "name": "Nintendo DSi", 100 | "version": "UNKNOWN", 101 | "os": "Nintendo DSi", 102 | "os_version": "UNKNOWN", 103 | "category": "appliance", 104 | "vendor": "Nintendo", 105 | }, 106 | "(Nintendo DSi; U; ja)" 107 | ), 108 | # 50 line in lib/woothee/appliance.py 109 | ( 110 | { 111 | "name": "Nintendo Wii", 112 | "version": "UNKNOWN", 113 | "os": "Nintendo Wii", 114 | "os_version": "UNKNOWN", 115 | "category": "appliance", 116 | "vendor": "Nintendo", 117 | }, 118 | "(Nintendo Wii; U; ; 3642; ja)" 119 | ), 120 | # 26 line lib/woothee/browser.py 121 | ( 122 | { 123 | "name": "Internet Explorer", 124 | "version": "11.0", 125 | "os": "Windows Phone OS", 126 | "os_version": "8.1", 127 | "category": "smartphone", 128 | "vendor": "Microsoft", 129 | }, 130 | ( 131 | "Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0;" 132 | " Touch; IEMobile/11.0; NOKIA; Lumia 930) like Gecko" 133 | ) 134 | ), 135 | # 159 line lib/woothee/crawler.py 136 | ( 137 | { 138 | "name": "UNKNOWN", 139 | "version": "UNKNOWN", 140 | "os": "UNKNOWN", 141 | "os_version": "UNKNOWN", 142 | "category": "UNKNOWN", 143 | "vendor": "UNKNOWN", 144 | }, 145 | "Data-Hotel-Cat/1.1" 146 | ), 147 | # 74-75 line lib/woothee/mobilephone.py 148 | ( 149 | { 150 | "name": "SymbianOS", 151 | "version": "UNKNOWN", 152 | "os": "SymbianOS", 153 | "os_version": "UNKNOWN", 154 | "category": "mobilephone", 155 | "vendor": "UNKNOWN", 156 | }, 157 | "SymbianOS/9.2;" 158 | ), 159 | # 78-80 line lib/woothee/mobilephone.py 160 | ( 161 | { 162 | "name": "Mobile Transcoder", 163 | "version": "Hatena", 164 | "os": "Mobile Transcoder", 165 | "os_version": "UNKNOWN", 166 | "category": "mobilephone", 167 | "vendor": "UNKNOWN", 168 | }, 169 | ( 170 | "(compatible; Hatena-Mobile-Gateway/1.2;" 171 | " +http://mgw.hatena.ne.jp/help)" 172 | ) 173 | ), 174 | # 25-27 line lib/woothee/os.py 175 | ( 176 | { 177 | "name": "UNKNOWN", 178 | "version": "UNKNOWN", 179 | "os": "Windows UNKNOWN Ver", 180 | "os_version": "UNKNOWN", 181 | "category": "pc", 182 | "vendor": "UNKNOWN", 183 | }, 184 | "Mozilla/5.0 (Windows ; rv:8.0) Gecko/20111105 Thunderbird/8.0" 185 | ), 186 | # 49 line lib/woothee/os.py 187 | ( 188 | { 189 | "name": "Internet Explorer", 190 | "version": "UNKNOWN", 191 | "os": "Windows NT 4.0", 192 | "os_version": "NT 4.0", 193 | "category": "pc", 194 | "vendor": "Microsoft", 195 | }, 196 | "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 4.0)" 197 | ), 198 | # 51 line lib/woothee/os.py 199 | ( 200 | { 201 | "name": "Internet Explorer", 202 | "version": "6.0", 203 | "os": "Windows 98", 204 | "os_version": "98", 205 | "category": "pc", 206 | "vendor": "Microsoft", 207 | }, 208 | "Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)" 209 | ), 210 | # 53 line lib/woothee/os.py 211 | ( 212 | { 213 | "name": "Internet Explorer", 214 | "version": "5.50", 215 | "os": "Windows 95", 216 | "os_version": "95", 217 | "category": "pc", 218 | "vendor": "Microsoft", 219 | }, 220 | "Mozilla/4.0 (compatible; MSIE 5.50; Windows 95; SiteKiosk 4.8)" 221 | ), 222 | # 121 line lib/woothee/os.py 223 | ( 224 | { 225 | "name": "UNKNOWN", 226 | "version": "UNKNOWN", 227 | "os": "iPad", 228 | "os_version": "UNKNOWN", 229 | "category": "smartphone", 230 | "vendor": "UNKNOWN", 231 | }, 232 | "Mozilla/5.0 (iPad; " 233 | ), 234 | # 123 line lib/woothee/os.py 235 | ( 236 | { 237 | "name": "UNKNOWN", 238 | "version": "UNKNOWN", 239 | "os": "iPod", 240 | "os_version": "UNKNOWN", 241 | "category": "smartphone", 242 | "vendor": "UNKNOWN", 243 | }, 244 | "Mozilla/5.0 (iPod; " 245 | ), 246 | # 183-185 line lib/woothee/os.py 247 | ( 248 | { 249 | "name": "Mobile Transcoder", 250 | "version": "Naver", 251 | "os": "Mobile Transcoder", 252 | "os_version": "UNKNOWN", 253 | "category": "mobilephone", 254 | "vendor": "UNKNOWN", 255 | }, 256 | "Naver Transcoder" 257 | ) 258 | ]) 259 | def test_non_provide_testsets(self, target, expected, ua_string): 260 | # This test pattern that does not exist in testsets. 261 | # The main purpose is that each logic to pass. 262 | # UserAgent is a dummy that does not exist in the world. 263 | assert expected == target(ua_string) 264 | 265 | 266 | class TestIsCrawler: 267 | 268 | @pytest.fixture 269 | def target(self): 270 | from woothee import is_crawler 271 | return is_crawler 272 | 273 | @pytest.mark.parametrize('ua_string', [ 274 | "", 275 | "-", 276 | None, 277 | "Mozilla/5.0 (Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko" 278 | ]) 279 | def test_false(self, target, ua_string): 280 | assert not target(ua_string) 281 | 282 | @pytest.mark.parametrize('ua_string', [ 283 | ("Mozilla/5.0 (compatible; Yahoo! Slurp;" 284 | " http://help.yahoo.com/help/us/ysearch/slurp)"), 285 | ]) 286 | def test_true(self, target, ua_string): 287 | assert target(ua_string) 288 | 289 | 290 | class TestTryRareCases: 291 | """ challenge_smartphone_patterns in try_rare_cases is never return True. 292 | Because, "CFNetwork" is caught by the challenge_smartphone in try_os. 293 | Therefore, I have prepared the individual a test case 294 | 295 | Not need this function(challenge_smartphone_patterns) just maybe. 296 | """ 297 | 298 | @pytest.fixture() 299 | def target(self): 300 | from woothee import try_rare_cases 301 | return try_rare_cases 302 | 303 | def test_challenge_smartphone_patterns(self, target, mocker): 304 | m = mocker.patch("woothee.browser.challenge_sleipnir") 305 | result = {} # type: Dict[str, str] 306 | 307 | ret = target("CFNetwork/", result) 308 | 309 | expected = { 310 | 'return_value': True, 311 | 'result': {'category': 'smartphone', 'os': 'iOS'}, 312 | 'called_check': False, 313 | } 314 | actual = { 315 | 'return_value': ret, 316 | 'result': result, 317 | 'called_check': m.called, 318 | } 319 | assert expected == actual 320 | -------------------------------------------------------------------------------- /lib/woothee/dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | KEY_LABEL = 'label' 4 | KEY_NAME = 'name' 5 | KEY_TYPE = 'type' 6 | KEY_CATEGORY = 'category' 7 | KEY_OS = 'os' 8 | KEY_OS_VERSION = 'os_version' 9 | KEY_VENDOR = 'vendor' 10 | KEY_VERSION = 'version' 11 | 12 | TYPE_BROWSER = 'browser' 13 | TYPE_OS = 'os' 14 | TYPE_FULL = 'full' 15 | 16 | CATEGORY_PC = 'pc' 17 | CATEGORY_SMARTPHONE = 'smartphone' 18 | CATEGORY_MOBILEPHONE = 'mobilephone' 19 | CATEGORY_CRAWLER = 'crawler' 20 | CATEGORY_APPLIANCE = 'appliance' 21 | CATEGORY_MISC = 'misc' 22 | 23 | ATTRIBUTE_NAME = 'name' 24 | ATTRIBUTE_CATEGORY = 'category' 25 | ATTRIBUTE_OS = 'os' 26 | ATTRIBUTE_OS_VERSION = 'os_version' 27 | ATTRIBUTE_VENDOR = 'vendor' 28 | ATTRIBUTE_VERSION = 'version' 29 | 30 | VALUE_UNKNOWN = 'UNKNOWN' 31 | 32 | CATEGORY_LIST = [ 33 | CATEGORY_PC, CATEGORY_SMARTPHONE, CATEGORY_MOBILEPHONE, 34 | CATEGORY_CRAWLER, CATEGORY_APPLIANCE, CATEGORY_MISC, VALUE_UNKNOWN 35 | ] 36 | ATTRIBUTE_LIST = [ 37 | ATTRIBUTE_NAME, ATTRIBUTE_CATEGORY, ATTRIBUTE_OS, 38 | ATTRIBUTE_VENDOR, ATTRIBUTE_VERSION, ATTRIBUTE_OS_VERSION 39 | ] 40 | 41 | DATASET = {} 42 | 43 | 44 | def _init(): 45 | # GENERATED from dataset.yaml at Fri Oct 1 10:08:57 2021 by hattori 46 | obj = {'label': 'MSIE', 'name': 'Internet Explorer', 'type': 'browser'} # NOQA 47 | obj['vendor'] = 'Microsoft' 48 | DATASET[obj['label']] = obj 49 | obj = {'label': 'Edge', 'name': 'Edge', 'type': 'browser'} # NOQA 50 | obj['vendor'] = 'Microsoft' 51 | DATASET[obj['label']] = obj 52 | obj = {'label': 'Chrome', 'name': 'Chrome', 'type': 'browser'} # NOQA 53 | obj['vendor'] = 'Google' 54 | DATASET[obj['label']] = obj 55 | obj = {'label': 'Safari', 'name': 'Safari', 'type': 'browser'} # NOQA 56 | obj['vendor'] = 'Apple' 57 | DATASET[obj['label']] = obj 58 | obj = {'label': 'Firefox', 'name': 'Firefox', 'type': 'browser'} # NOQA 59 | obj['vendor'] = 'Mozilla' 60 | DATASET[obj['label']] = obj 61 | obj = {'label': 'Opera', 'name': 'Opera', 'type': 'browser'} # NOQA 62 | obj['vendor'] = 'Opera' 63 | DATASET[obj['label']] = obj 64 | obj = {'label': 'Vivaldi', 'name': 'Vivaldi', 'type': 'browser'} # NOQA 65 | obj['vendor'] = 'Vivaldi Technologies' 66 | DATASET[obj['label']] = obj 67 | obj = {'label': 'Sleipnir', 'name': 'Sleipnir', 'type': 'browser'} # NOQA 68 | obj['vendor'] = 'Fenrir Inc.' 69 | DATASET[obj['label']] = obj 70 | obj = {'label': 'GSA', 'name': 'Google Search App', 'type': 'browser'} # NOQA 71 | obj['vendor'] = 'Google' 72 | DATASET[obj['label']] = obj 73 | obj = {'label': 'Webview', 'name': 'Webview', 'type': 'browser'} # NOQA 74 | obj['vendor'] = 'OS vendor' 75 | DATASET[obj['label']] = obj 76 | obj = {'label': 'YaBrowser', 'name': 'Yandex Browser', 'type': 'browser'} # NOQA 77 | obj['vendor'] = 'Yandex' 78 | DATASET[obj['label']] = obj 79 | obj = {'label': 'SamsungBrowser', 'name': 'SamsungBrowser', 'type': 'browser'} # NOQA 80 | obj['vendor'] = 'Samsung' 81 | DATASET[obj['label']] = obj 82 | obj = {'label': 'Win', 'name': 'Windows UNKNOWN Ver', 'type': 'os'} # NOQA 83 | obj['category'] = 'pc' 84 | DATASET[obj['label']] = obj 85 | obj = {'label': 'Win10', 'name': 'Windows 10', 'type': 'os'} # NOQA 86 | obj['category'] = 'pc' 87 | DATASET[obj['label']] = obj 88 | obj = {'label': 'Win8.1', 'name': 'Windows 8.1', 'type': 'os'} # NOQA 89 | obj['category'] = 'pc' 90 | DATASET[obj['label']] = obj 91 | obj = {'label': 'Win8', 'name': 'Windows 8', 'type': 'os'} # NOQA 92 | obj['category'] = 'pc' 93 | DATASET[obj['label']] = obj 94 | obj = {'label': 'Win7', 'name': 'Windows 7', 'type': 'os'} # NOQA 95 | obj['category'] = 'pc' 96 | DATASET[obj['label']] = obj 97 | obj = {'label': 'WinVista', 'name': 'Windows Vista', 'type': 'os'} # NOQA 98 | obj['category'] = 'pc' 99 | DATASET[obj['label']] = obj 100 | obj = {'label': 'WinXP', 'name': 'Windows XP', 'type': 'os'} # NOQA 101 | obj['category'] = 'pc' 102 | DATASET[obj['label']] = obj 103 | obj = {'label': 'Win2000', 'name': 'Windows 2000', 'type': 'os'} # NOQA 104 | obj['category'] = 'pc' 105 | DATASET[obj['label']] = obj 106 | obj = {'label': 'WinNT4', 'name': 'Windows NT 4.0', 'type': 'os'} # NOQA 107 | obj['category'] = 'pc' 108 | DATASET[obj['label']] = obj 109 | obj = {'label': 'WinMe', 'name': 'Windows Me', 'type': 'os'} # NOQA 110 | obj['category'] = 'pc' 111 | DATASET[obj['label']] = obj 112 | obj = {'label': 'Win98', 'name': 'Windows 98', 'type': 'os'} # NOQA 113 | obj['category'] = 'pc' 114 | DATASET[obj['label']] = obj 115 | obj = {'label': 'Win95', 'name': 'Windows 95', 'type': 'os'} # NOQA 116 | obj['category'] = 'pc' 117 | DATASET[obj['label']] = obj 118 | obj = {'label': 'WinPhone', 'name': 'Windows Phone OS', 'type': 'os'} # NOQA 119 | obj['category'] = 'smartphone' 120 | DATASET[obj['label']] = obj 121 | obj = {'label': 'WinCE', 'name': 'Windows CE', 'type': 'os'} # NOQA 122 | obj['category'] = 'smartphone' 123 | DATASET[obj['label']] = obj 124 | obj = {'label': 'OSX', 'name': 'Mac OSX', 'type': 'os'} # NOQA 125 | obj['category'] = 'pc' 126 | DATASET[obj['label']] = obj 127 | obj = {'label': 'MacOS', 'name': 'Mac OS Classic', 'type': 'os'} # NOQA 128 | obj['category'] = 'pc' 129 | DATASET[obj['label']] = obj 130 | obj = {'label': 'Linux', 'name': 'Linux', 'type': 'os'} # NOQA 131 | obj['category'] = 'pc' 132 | DATASET[obj['label']] = obj 133 | obj = {'label': 'BSD', 'name': 'BSD', 'type': 'os'} # NOQA 134 | obj['category'] = 'pc' 135 | DATASET[obj['label']] = obj 136 | obj = {'label': 'ChromeOS', 'name': 'ChromeOS', 'type': 'os'} # NOQA 137 | obj['category'] = 'pc' 138 | DATASET[obj['label']] = obj 139 | obj = {'label': 'Android', 'name': 'Android', 'type': 'os'} # NOQA 140 | obj['category'] = 'smartphone' 141 | DATASET[obj['label']] = obj 142 | obj = {'label': 'iPhone', 'name': 'iPhone', 'type': 'os'} # NOQA 143 | obj['category'] = 'smartphone' 144 | DATASET[obj['label']] = obj 145 | obj = {'label': 'iPad', 'name': 'iPad', 'type': 'os'} # NOQA 146 | obj['category'] = 'smartphone' 147 | DATASET[obj['label']] = obj 148 | obj = {'label': 'iPod', 'name': 'iPod', 'type': 'os'} # NOQA 149 | obj['category'] = 'smartphone' 150 | DATASET[obj['label']] = obj 151 | obj = {'label': 'iOS', 'name': 'iOS', 'type': 'os'} # NOQA 152 | obj['category'] = 'smartphone' 153 | DATASET[obj['label']] = obj 154 | obj = {'label': 'FirefoxOS', 'name': 'Firefox OS', 'type': 'os'} # NOQA 155 | obj['category'] = 'smartphone' 156 | DATASET[obj['label']] = obj 157 | obj = {'label': 'BlackBerry', 'name': 'BlackBerry', 'type': 'os'} # NOQA 158 | obj['category'] = 'smartphone' 159 | DATASET[obj['label']] = obj 160 | obj = {'label': 'BlackBerry10', 'name': 'BlackBerry 10', 'type': 'os'} # NOQA 161 | obj['category'] = 'smartphone' 162 | DATASET[obj['label']] = obj 163 | obj = {'label': 'docomo', 'name': 'docomo', 'type': 'full'} # NOQA 164 | obj['vendor'] = 'docomo' 165 | obj['category'] = 'mobilephone' 166 | obj['os'] = 'docomo' 167 | DATASET[obj['label']] = obj 168 | obj = {'label': 'au', 'name': 'au by KDDI', 'type': 'full'} # NOQA 169 | obj['vendor'] = 'au' 170 | obj['category'] = 'mobilephone' 171 | obj['os'] = 'au' 172 | DATASET[obj['label']] = obj 173 | obj = {'label': 'SoftBank', 'name': 'SoftBank Mobile', 'type': 'full'} # NOQA 174 | obj['vendor'] = 'SoftBank' 175 | obj['category'] = 'mobilephone' 176 | obj['os'] = 'SoftBank' 177 | DATASET[obj['label']] = obj 178 | obj = {'label': 'willcom', 'name': 'WILLCOM', 'type': 'full'} # NOQA 179 | obj['vendor'] = 'WILLCOM' 180 | obj['category'] = 'mobilephone' 181 | obj['os'] = 'WILLCOM' 182 | DATASET[obj['label']] = obj 183 | obj = {'label': 'jig', 'name': 'jig browser', 'type': 'full'} # NOQA 184 | obj['category'] = 'mobilephone' 185 | obj['os'] = 'jig' 186 | DATASET[obj['label']] = obj 187 | obj = {'label': 'emobile', 'name': 'emobile', 'type': 'full'} # NOQA 188 | obj['category'] = 'mobilephone' 189 | obj['os'] = 'emobile' 190 | DATASET[obj['label']] = obj 191 | obj = {'label': 'SymbianOS', 'name': 'SymbianOS', 'type': 'full'} # NOQA 192 | obj['category'] = 'mobilephone' 193 | obj['os'] = 'SymbianOS' 194 | DATASET[obj['label']] = obj 195 | obj = {'label': 'MobileTranscoder', 'name': 'Mobile Transcoder', 'type': 'full'} # NOQA 196 | obj['category'] = 'mobilephone' 197 | obj['os'] = 'Mobile Transcoder' 198 | DATASET[obj['label']] = obj 199 | obj = {'label': 'Nintendo3DS', 'name': 'Nintendo 3DS', 'type': 'full'} # NOQA 200 | obj['vendor'] = 'Nintendo' 201 | obj['category'] = 'appliance' 202 | obj['os'] = 'Nintendo 3DS' 203 | DATASET[obj['label']] = obj 204 | obj = {'label': 'NintendoDSi', 'name': 'Nintendo DSi', 'type': 'full'} # NOQA 205 | obj['vendor'] = 'Nintendo' 206 | obj['category'] = 'appliance' 207 | obj['os'] = 'Nintendo DSi' 208 | DATASET[obj['label']] = obj 209 | obj = {'label': 'NintendoWii', 'name': 'Nintendo Wii', 'type': 'full'} # NOQA 210 | obj['vendor'] = 'Nintendo' 211 | obj['category'] = 'appliance' 212 | obj['os'] = 'Nintendo Wii' 213 | DATASET[obj['label']] = obj 214 | obj = {'label': 'NintendoWiiU', 'name': 'Nintendo Wii U', 'type': 'full'} # NOQA 215 | obj['vendor'] = 'Nintendo' 216 | obj['category'] = 'appliance' 217 | obj['os'] = 'Nintendo Wii U' 218 | DATASET[obj['label']] = obj 219 | obj = {'label': 'PSP', 'name': 'PlayStation Portable', 'type': 'full'} # NOQA 220 | obj['vendor'] = 'Sony' 221 | obj['category'] = 'appliance' 222 | obj['os'] = 'PlayStation Portable' 223 | DATASET[obj['label']] = obj 224 | obj = {'label': 'PSVita', 'name': 'PlayStation Vita', 'type': 'full'} # NOQA 225 | obj['vendor'] = 'Sony' 226 | obj['category'] = 'appliance' 227 | obj['os'] = 'PlayStation Vita' 228 | DATASET[obj['label']] = obj 229 | obj = {'label': 'PS3', 'name': 'PlayStation 3', 'type': 'full'} # NOQA 230 | obj['vendor'] = 'Sony' 231 | obj['category'] = 'appliance' 232 | obj['os'] = 'PlayStation 3' 233 | DATASET[obj['label']] = obj 234 | obj = {'label': 'PS4', 'name': 'PlayStation 4', 'type': 'full'} # NOQA 235 | obj['vendor'] = 'Sony' 236 | obj['category'] = 'appliance' 237 | obj['os'] = 'PlayStation 4' 238 | DATASET[obj['label']] = obj 239 | obj = {'label': 'Xbox360', 'name': 'Xbox 360', 'type': 'full'} # NOQA 240 | obj['vendor'] = 'Microsoft' 241 | obj['category'] = 'appliance' 242 | obj['os'] = 'Xbox 360' 243 | DATASET[obj['label']] = obj 244 | obj = {'label': 'XboxOne', 'name': 'Xbox One', 'type': 'full'} # NOQA 245 | obj['vendor'] = 'Microsoft' 246 | obj['category'] = 'appliance' 247 | obj['os'] = 'Xbox One' 248 | DATASET[obj['label']] = obj 249 | obj = {'label': 'DigitalTV', 'name': 'InternetTVBrowser', 'type': 'full'} # NOQA 250 | obj['category'] = 'appliance' 251 | obj['os'] = 'DigitalTV' 252 | DATASET[obj['label']] = obj 253 | obj = {'label': 'SafariRSSReader', 'name': 'Safari RSSReader', 'type': 'full'} # NOQA 254 | obj['vendor'] = 'Apple' 255 | obj['category'] = 'misc' 256 | DATASET[obj['label']] = obj 257 | obj = {'label': 'GoogleDesktop', 'name': 'Google Desktop', 'type': 'full'} # NOQA 258 | obj['vendor'] = 'Google' 259 | obj['category'] = 'misc' 260 | DATASET[obj['label']] = obj 261 | obj = {'label': 'WindowsRSSReader', 'name': 'Windows RSSReader', 'type': 'full'} # NOQA 262 | obj['vendor'] = 'Microsoft' 263 | obj['category'] = 'misc' 264 | DATASET[obj['label']] = obj 265 | obj = {'label': 'VariousRSSReader', 'name': 'RSSReader', 'type': 'full'} # NOQA 266 | obj['category'] = 'misc' 267 | DATASET[obj['label']] = obj 268 | obj = {'label': 'HTTPLibrary', 'name': 'HTTP Library', 'type': 'full'} # NOQA 269 | obj['category'] = 'misc' 270 | DATASET[obj['label']] = obj 271 | obj = {'label': 'GoogleBot', 'name': 'Googlebot', 'type': 'full'} # NOQA 272 | obj['category'] = 'crawler' 273 | DATASET[obj['label']] = obj 274 | obj = {'label': 'GoogleBotMobile', 'name': 'Googlebot Mobile', 'type': 'full'} # NOQA 275 | obj['category'] = 'crawler' 276 | DATASET[obj['label']] = obj 277 | obj = {'label': 'GoogleMediaPartners', 'name': 'Google Mediapartners', 'type': 'full'} # NOQA 278 | obj['category'] = 'crawler' 279 | DATASET[obj['label']] = obj 280 | obj = {'label': 'GoogleFeedFetcher', 'name': 'Google Feedfetcher', 'type': 'full'} # NOQA 281 | obj['category'] = 'crawler' 282 | DATASET[obj['label']] = obj 283 | obj = {'label': 'GoogleAppEngine', 'name': 'Google AppEngine', 'type': 'full'} # NOQA 284 | obj['category'] = 'crawler' 285 | DATASET[obj['label']] = obj 286 | obj = {'label': 'GoogleWebPreview', 'name': 'Google Web Preview', 'type': 'full'} # NOQA 287 | obj['category'] = 'crawler' 288 | DATASET[obj['label']] = obj 289 | obj = {'label': 'YahooSlurp', 'name': 'Yahoo! Slurp', 'type': 'full'} # NOQA 290 | obj['category'] = 'crawler' 291 | DATASET[obj['label']] = obj 292 | obj = {'label': 'YahooJP', 'name': 'Yahoo! Japan', 'type': 'full'} # NOQA 293 | obj['category'] = 'crawler' 294 | DATASET[obj['label']] = obj 295 | obj = {'label': 'YahooPipes', 'name': 'Yahoo! Pipes', 'type': 'full'} # NOQA 296 | obj['category'] = 'crawler' 297 | DATASET[obj['label']] = obj 298 | obj = {'label': 'Baiduspider', 'name': 'Baiduspider', 'type': 'full'} # NOQA 299 | obj['category'] = 'crawler' 300 | DATASET[obj['label']] = obj 301 | obj = {'label': 'msnbot', 'name': 'msnbot', 'type': 'full'} # NOQA 302 | obj['category'] = 'crawler' 303 | DATASET[obj['label']] = obj 304 | obj = {'label': 'bingbot', 'name': 'bingbot', 'type': 'full'} # NOQA 305 | obj['category'] = 'crawler' 306 | DATASET[obj['label']] = obj 307 | obj = {'label': 'BingPreview', 'name': 'BingPreview', 'type': 'full'} # NOQA 308 | obj['category'] = 'crawler' 309 | DATASET[obj['label']] = obj 310 | obj = {'label': 'Yeti', 'name': 'Naver Yeti', 'type': 'full'} # NOQA 311 | obj['category'] = 'crawler' 312 | DATASET[obj['label']] = obj 313 | obj = {'label': 'FeedBurner', 'name': 'Google FeedBurner', 'type': 'full'} # NOQA 314 | obj['category'] = 'crawler' 315 | DATASET[obj['label']] = obj 316 | obj = {'label': 'facebook', 'name': 'facebook', 'type': 'full'} # NOQA 317 | obj['category'] = 'crawler' 318 | DATASET[obj['label']] = obj 319 | obj = {'label': 'twitter', 'name': 'twitter', 'type': 'full'} # NOQA 320 | obj['category'] = 'crawler' 321 | DATASET[obj['label']] = obj 322 | obj = {'label': 'trendictionbot', 'name': 'trendiction', 'type': 'full'} # NOQA 323 | obj['category'] = 'crawler' 324 | DATASET[obj['label']] = obj 325 | obj = {'label': 'mixi', 'name': 'mixi', 'type': 'full'} # NOQA 326 | obj['category'] = 'crawler' 327 | DATASET[obj['label']] = obj 328 | obj = {'label': 'IndyLibrary', 'name': 'Indy Library', 'type': 'full'} # NOQA 329 | obj['category'] = 'crawler' 330 | DATASET[obj['label']] = obj 331 | obj = {'label': 'ApplePubSub', 'name': 'Apple iCloud', 'type': 'full'} # NOQA 332 | obj['category'] = 'crawler' 333 | DATASET[obj['label']] = obj 334 | obj = {'label': 'Genieo', 'name': 'Genieo Web Filter', 'type': 'full'} # NOQA 335 | obj['category'] = 'crawler' 336 | DATASET[obj['label']] = obj 337 | obj = {'label': 'topsyButterfly', 'name': 'topsy Butterfly', 'type': 'full'} # NOQA 338 | obj['category'] = 'crawler' 339 | DATASET[obj['label']] = obj 340 | obj = {'label': 'rogerbot', 'name': 'SeoMoz rogerbot', 'type': 'full'} # NOQA 341 | obj['category'] = 'crawler' 342 | DATASET[obj['label']] = obj 343 | obj = {'label': 'AhrefsBot', 'name': 'ahref AhrefsBot', 'type': 'full'} # NOQA 344 | obj['category'] = 'crawler' 345 | DATASET[obj['label']] = obj 346 | obj = {'label': 'radian6', 'name': 'salesforce radian6', 'type': 'full'} # NOQA 347 | obj['category'] = 'crawler' 348 | DATASET[obj['label']] = obj 349 | obj = {'label': 'Hatena', 'name': 'Hatena', 'type': 'full'} # NOQA 350 | obj['category'] = 'crawler' 351 | DATASET[obj['label']] = obj 352 | obj = {'label': 'goo', 'name': 'goo', 'type': 'full'} # NOQA 353 | obj['category'] = 'crawler' 354 | DATASET[obj['label']] = obj 355 | obj = {'label': 'livedoorFeedFetcher', 'name': 'livedoor FeedFetcher', 'type': 'full'} # NOQA 356 | obj['category'] = 'crawler' 357 | DATASET[obj['label']] = obj 358 | obj = {'label': 'VariousCrawler', 'name': 'misc crawler', 'type': 'full'} # NOQA 359 | obj['category'] = 'crawler' 360 | DATASET[obj['label']] = obj 361 | obj = {'label': 'AdsBotGoogleMobile', 'name': 'AdsBot-Google-Mobile', 'type': 'full'} # NOQA 362 | obj['category'] = 'crawler' 363 | DATASET[obj['label']] = obj 364 | obj = {'label': 'AdsBotGoogle', 'name': 'AdsBot-Google', 'type': 'full'} # NOQA 365 | obj['category'] = 'crawler' 366 | DATASET[obj['label']] = obj 367 | 368 | 369 | _init() 370 | 371 | 372 | def get(label): 373 | return DATASET[label] 374 | --------------------------------------------------------------------------------